如何用特殊字符复制HTML web源代码到excel



所以我有这个代码,它将整个HTML源代码剥离到列中的下一个单元格。问题是,我用来提取HTML源代码的网页有一些波兰字母,如"ą","ś"等等......有办法粘贴那些波兰字母的代码吗?现在我得到了一些带问号的疯狂方块。有什么建议吗?

p。我有这个代码感谢@pizzettix https://stackoverflow.com/users/6254609/pizzettix

Sub audycje()

Dim strona As Object
Dim adres As String
Dim wb As Workbook
Dim a As Object
Dim str_var As Variant

Set wb = ThisWorkbook
adres = InputBox("Podaj adres strony")
If adres = "" Then
MsgBox ("Nie podano strony do zaladowania")
Exit Sub
End If

Set strona = CreateObject("htmlfile")   'Create HTMLFile Object
With CreateObject("msxml2.xmlhttp")  'Get the WebPage Content
.Open "GET", adres, False
.send
strona.Body.Innerhtml = .responseText
End With

'Split_with_delimiter_newline
split_var = Split(strona.Body.Innerhtml, Chr(10))

Application.ScreenUpdating = False

For i = 0 To UBound(split_var, 1)
Cells(2 + i, 2).Value2 = split_var(i)
Next i

Application.ScreenUpdating = True

End Sub

任何你想在Excel表格中使用HTML的东西。数据类型String不能处理UTF-8字符。但是您可以检测每个CHR(10),并且使用一些字符串操作可以将每一行HTML代码直接从responseText编写到当前单元格。甚至不需要HTML文档:

Sub audycje()

Dim adres As String
Dim currRow As Long
Dim posFrom As Long
Dim posTo As Long

currRow = 2
adres = InputBox("Podaj adres strony")
If adres = "" Then
MsgBox ("Nie podano strony do zaladowania")
Exit Sub
End If

With CreateObject("msxml2.xmlhttp.6.0")  'Get the WebPage Content
.Open "GET", adres, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0"
.setRequestHeader "Content-Type", "text/plain"
.send

If .Status = 200 Then
posFrom = 1
Do
posTo = InStr(posFrom, .responseText, Chr(10))
If posTo > 0 Then
Cells(currRow, 2) = Mid(.responseText, posFrom, posTo - posFrom)
posFrom = posTo + 1
currRow = currRow + 1
Else
Exit Do
End If
Loop
Else
MsgBox "Page not loaded: HTTP Status " & .Status
End If
End With
End Sub

经过一个月的搜索,我终于找到了!下面的代码实现了这个功能:)

Sub audycje()
Dim strona As Object
Dim adres As String
Dim wb As Workbook
Dim str_var As Variant
Dim Mystring As String

Set wb = ThisWorkbook
adres = InputBox("Podaj adres strony")
If adres = "" Then
MsgBox ("Nie podano strony do zaladowania")
Exit Sub
End If

Set strona = CreateObject("htmlfile")   'Create HTMLFile Object
With CreateObject("msxml2.xmlhttp")  'Get the WebPage Content
.Open "GET", adres, False
.setRequestHeader "Content-Type", "text/plain;charset=UTF-8"
.send
strona.body.innerHTML = StrConv(.responseBody, vbUnicode)
End With

'Split_with_delimiter_newline
split_var = Split(strona.body.innerHTML, Chr(10))

Application.ScreenUpdating = False

For i = 0 To UBound(split_var, 1)
wb.Worksheets("Dane").Cells(2 + i, 2).Value2 = split_var(i)
Next i

Application.ScreenUpdating = True

最新更新