我有以下代码可以工作(谢谢大家的帮助!),但它运行相对较慢。运行大约 20-25 个链接大约需要 1000 分钟。
有效利用这有点长(尽管我知道打开和抓取 1000 个列表需要时间) - 有没有办法缩短它?
理想情况下,我想从超过 10K 个链接中提取信息。
Public Sub ListingInfo()
Dim cell As Range
With ThisWorkbook.Worksheets("eBayListings")
For Each cell In .Range("A1", .Cells(.Rows.count, 1).End(xlUp))
Dim Document As MSHTML.HTMLDocument
Dim elem As MSHTML.IHTMLElement
Dim elem2 As MSHTML.IHTMLElement
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", cell.Value, False
.send
Set Document = New MSHTML.HTMLDocument
Document.body.innerHTML = .responseText
End With
Set elem2 = Document.getElementById("itemTitle")
If Not elem2 Is Nothing Then
cell.Offset(0, 1).Value = elem2.innerText
Else
End If
Set elem2 = Document.getElementById("vi-cdown_timeLeft")
If Not elem2 Is Nothing Then
cell.Offset(0, 2).Value = elem2.innerText
Else
End If
Set elem = Document.getElementById("prcIsum_bidPrice")
If Not elem Is Nothing Then
cell.Offset(0, 3).Value = elem.innerText
Else
End If
Set elem = Document.getElementById("prcIsum")
If Not elem Is Nothing Then
cell.Offset(0, 4).Value = elem.innerText
Else
End If
Set elem2 = Document.getElementById("mbgLink")
If Not elem2 Is Nothing Then
cell.Offset(0, 5).Value = elem2.innerText
Else
End If
Set elem2 = Document.getElementById("si-fb")
If Not elem2 Is Nothing Then
cell.Offset(0, 6).Value = elem2.innerText
Else
End If
Set elem2 = Document.getElementById("binBtn_btn")
If Not elem2 Is Nothing Then
cell.Offset(0, 7).Value = elem2.innerText
Else
End If
Set elem2 = Document.getElementById(".ds_div")
If Not elem2 Is Nothing Then
cell.Offset(0, 8).Value = elem2.innerText
Else
End If
If Not Document.querySelector(".viSNotesCnt") Is Nothing Then
cell.Offset(0, 9).Value = Document.querySelector(".viSNotesCnt").innerText
Else
'Try Something Else
End If
Next
End With
End Sub
关于限制的评论很重要。您可能需要添加一些等待时间。一种技术可以是维护访问的 url 计数,并且每个 x 个数字都会引入等待。
对于上述内容,您可以通过避免每次都点击工作表来访问值和写出来节省一些时间。相反,将 url 存储在数组中并循环。将每个运行循环的结果存储到数组中。最后一次性写出整个结果数组。
将 xmlhttp 对象创建移出循环。切换屏幕更新和您想要的任何其他应用程序/工作表优化。
可能会减少代码行数,如下所示。
您可能希望添加一个测试,以防工作表中仅存在一个 url,在这种情况下,您需要重新显示 urls 数组以防止错误,并简单地将填充的单元格直接分配给数组。
未经测试。
Option Explicit
Public Sub ListingInfo()
Dim Document As MSHTML.HTMLDocument, urls(), url As String, results()
Set Document = New MSHTML.HTMLDocument
Application.ScreenUpdating = False
With ThisWorkbook.Worksheets("eBayListings")
urls = .Range("A1", .Cells(.Rows.Count, 1).End(xlUp)).Value
ReDim results(1 To UBound(urls, 1), 1 To 9)
With CreateObject("MSXML2.XMLHTTP")
For url = LBound(urls, 1) To UBound(urls, 1)
.Open "GET", urls(url), False
.send
Document.body.innerHTML = .responseText
On Error Resume Next
With Document
results(url, 1) = .getElementById("itemTitle").innerText
results(url, 2) = .getElementById("vi-cdown_timeLeft").innerText
results(url, 3) = .getElementById("prcIsum_bidPrice").innerText
results(url, 4) = .getElementById("prcIsum").innerText
results(url, 5) = .getElementById("mbgLink").innerText
results(url, 6) = .getElementById("si-fb").innerText
results(url, 7) = .getElementById("binBtn_btn").innerText
results(url, 8) = .getElementById(".ds_div").innerText '<== is this id correct
results(url, 9) = .querySelector(".viSNotesCnt").innerText
'any tests on current row (url) for empty.......
End With
On Error GoTo 0
Next
End With
.Cells(1, 2).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub