为什么我的链接点击以获取网络从链接列表中真正缓慢地爬行?C#



我想单击带有文本" 300"的所有链接。我的网络剪裁代码单击每个链接真的很慢。我将链接存储在列表中,然后单击它们。

i计算索引的链接,然后使用(int pos = 0; pos&lt; numberOfelementsFound; pos (。我尝试了此代码进行计数并单击(by.partiallinktext(" 3600"(,on [https://www.w3schools.com/html/default.asp],并且响应速度非常迅速,但在另一个站点上非常慢。<<<<<<<<<<<<<<<<<<<<<<<<<<

class Program
{
    private static IWebDriver driver = null;
    static void Main(string[] args)
    {
        driver = new InternetExplorerDriver();
        driver.Manage().Window.Maximize();
        driver.Navigate().GoToUrl("https://arbitrary.com/");
        clickAllLinks("300");
    }
    //clicking links AND get data
    public static void clickAllLinks(string tagName)
    {
        IWebElement element = 
        driver.FindElement(By.XPath("//div[@class='data']"));
        int elements = 
        element.FindElements(By.PartialLinkText(tagName)).Count();
        for (int pos = 0; pos < elements; pos++)
        {
            getElementWithIndex(By.PartialLinkText(tagName), pos).Click();
            //fetchdata();
        }
    }
    public static IWebElement getElementWithIndex(By by, int pos)
    {
        IWebElement element = 
        driver.FindElement(By.XPath("//div[@class='data']"));
        IList<IWebElement> elements = 
        element.FindElements(By.PartialLinkText("300"));
        return elements.ElementAt(pos);
    }

    //scrape data
    public static async void fetchdata()
    {
        string currentURL = driver.Url; //url to string
        Console.WriteLine("URL: " + currentURL);
        var httpclient = new HttpClient();
        var html = await httpclient.GetStringAsync(currentURL);
        var htmldoc = new HtmlDocument();
        htmldoc.LoadHtml(html); //html to htmldoc
      List<List<string>> Receipt = 
      htmldoc.DocumentNode.SelectSingleNode("//table[@class='classname']")
            //htmldoc into list TABLE->TR->TD->InnerText
            .Descendants("tr")
            .Where(tr => tr.Elements("td").Count() > 0)
            .Select(tr => tr.Elements("td")
            .ToList())
            .ToList();

这是您的clickAllLinks方法的简化版本。这将减少当前方法中的开销(获取元素并不必存储可能会影响执行速度(。

//clicking links AND get data
public static void clickAllLinks(string tagName)
{
    int elements = 
    driver.FindElements(By.xpath("//div[@class='data']//a[contains(.," + tagName + ")]").Count();
    for (int pos = 1; pos < elements; pos++)
    {
        driver.FindElements(By.xpath("(//div[@class='data']//a[contains(.," + tagName + ")])[" +  pos + "]").Click();
        //fetchdata();
    }
}

最新更新