VBA:使用 <ul 和 <li 以及 <div 和 <span 进行网页抓取



我正在使用 VBA 从 HTML 中提取数据,<span代码在<Div下,在<li

下,在<ul

我正在尝试从HTML中提取"日期和事项"。 "日期"应该在Excel的A列中,"Matter"应该在B列中。

我的代码的缺点是,它将所有Datematter拉到单个单元格中。

Sub GetDat()
Dim IE As New InternetExplorer, html As HTMLDocument
Dim elem As Object, data As String
With IE
.Visible = True
.navigate "https://www.MyURL/sc/wo/Worders/index?id=76888564"
Do While .readyState <> READYSTATE_COMPLETE: Loop
Set html = .document
End With
data = ""
For Each elem In html.getElementsByClassName("simple-list")(0).getElementsByTagName("li")
data = data & " " & elem.innerText
Next elem
Range("A1").Value = data
IE.Quit
End Sub

我需要的输出如图所示:

.HTML:

你可以获取两个nodeLists,一个用于日期,一个用于事务,然后将那些写出的循环到工作表中。根据data-bind属性值匹配dates;mattersclassname

Dim dates As Object, matters As Object, i As Long, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set dates = ie.document.querySelectorAll("[data-bind^='text:createdDate']") '.wo-notes-col-1 [data-bind^='text:createdDate']
Set matters = ie.document.querySelectorAll(".wo-notes")
With ws
For i = 0 To dates.Length - 1
.Cells(i + 1, 1) = dates.Item(i).innertext
.Cells(i + 1, 2) = matters.Item(i).innertext
Next
End With

从 C 列读取值的示例:

Option Explicit
Public Sub GetMatters()
Dim ws As Worksheet, lastRow As Long, urls(), results(), ie As SHDocVw.InternetExplorer, r As Long
Set ie = New SHDocVw.InternetExplorer
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.Rows.Count, "C").End(xlUp).Row
urls = Application.Transpose(ws.Range("C2:C" & lastRow).Value)
ReDim results(1 To 1000, 1 To 2)
With ie
.Visible = True
For i = LBound(urls) To UBound(urls)
.navigate2 "https://www.MyURL/sc/wo/Worders/index?id=" & urls(i)
While .Busy Or .readyState <> 4: DoEvents: Wend
Dim dates As Object, matters As Object, i As Long
Set dates = .document.querySelectorAll("[data-bind^='text:createdDate']") '.wo-notes-col-1 [data-bind^='text:createdDate']
Set matters = .document.querySelectorAll(".wo-notes")
For i = 0 To dates.Length - 1
r = r + 1
results(r, 1) = dates.Item(i).innertext
results(r, 2) = matters.Item(i).innertext
Next
Set dates = Nothing: Set matter = dates
Next
.Quit
End With
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

引用:

  1. document.querySelectorAll
  2. CSS 选择器

相关内容

最新更新