Selenium中的滚动高度返回"None": [ 参数[0].scrollHeight ]



在使用硒的python Bot上工作,由于"arguments[0].scrollHeight"返回"None",对话框中的无限滚动无法工作

dialogBx=driver.find_element_by_xpath("//div[@role='dialog']/div[2]")
print(dialogBx)  #<selenium.webdriver.remote.webelement.WebElement (session="fcec89cc11fa5fa5eaf29a8efa9989f9", element="31bfd470-de78-XXXX-XXXX-ac1ffa6224c4")>
print(type(dialogBx)) #<class 'selenium.webdriver.remote.webelement.WebElement'>
sleep(5)
last_height=driver.execute_script("arguments[0].scrollHeight",dialogBx);
print("Height : ",last_height) #None

我需要最后一个身高来比较,请提出解决方案。

好的,回答你的问题,因为你在对话框中,我们应该专注于它。当你执行:last_height=driver.execute_script("arguments[0].scrollHeight",dialogBx);时,我相信你在主页面或错误的div中执行了这个(不是100%确定(。无论哪种方式,我都采取了不同的方法,我们将选择对话框中当前可用的最后一个<li>项目,并向下滚动到其位置,这将迫使对话框更新。我将从下面的完整代码中提取一个代码:

last_li_item = driver.find_element_by_xpath('/html/body/div[4]/div/div[2]/ul/div/li[{p}]'.format(p=start_pos))
last_li_item.location_once_scrolled_into_view

我们首先选择最后一个列表项,然后选择属性"location_once_scrolled_into_view"。此属性将把我们的对话框向下滚动到最后一个项目,然后它将加载更多项目。start_pos只是我们可用的<li>元素列表中的位置。即:<div><li></li><li></li><li></li></div>start_pos=2,这是从0开始的最后一个li项。我之所以放这个变量名,是因为它在一个for循环中,该循环正在监视div中li项的变化,一旦看到完整的代码,就会得到它。

另一方面,要执行此操作,只需更改顶部的参数并执行测试函数test((。如果您已经登录到instagram,只需运行get_list_of_flowers((。注意:使用此函数时,请使用此代码中的Follower类。如果您愿意,可以删除,但需要修改函数。重要:

当你执行这个程序时,对话框中的项目会不断增加,直到没有更多的项目可以加载,所以TODO会删除你已经处理过的元素,否则我相信当你开始达到大数字时,性能会变慢!

如果你需要其他解释,请告诉我。现在的代码:

import time
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement
# instagram url as our base
base_url = "https://www.instagram.com"
# =====================MODIFY THESE TO YOUR NEED=========
# the user we wish to get the followers from
base_user = "/nasa/"
# how much do you wish to sleep to wait for loading (seconds)
sleep_time = 3
# True will attempt login with facebook, False with instagram
login_with_facebook = True
# Credentials here
username = "YOUR_USERNAME"
password = "YOUR_PASSWORD"
# How many users do you wish to retrieve? -1 = all or n>0
get_users = 10
#==========================================================
# This is the div that contains all the followers info not the dialog box itself
dialog_box_xpath = '/html/body/div[4]/div/div[2]/ul/div'
total_followers_xpath = '/html/body/div[1]/section/main/div/header/section/ul/li[2]/a/span'
followers_button_xpath = '/html/body/div[1]/section/main/div/header/section/ul/li[2]/a'
insta_username_xpath = '/html/body/div[5]/div/div[2]/div[2]/div/div/div[1]/div/form/div[2]/div/label/input'
insta_pwd_xpath = '/html/body/div[5]/div/div[2]/div[2]/div/div/div[1]/div/form/div[3]/div/label/input'
insta_login_button_xpath = '/html/body/div[5]/div/div[2]/div[2]/div/div/div[1]/div/form/div[4]/button'
insta_fb_login_button_xpath = '/html/body/div[5]/div/div[2]/div[2]/div/div/div[1]/div/form/div[6]/button'
fb_username_xpath = '/html/body/div[1]/div[3]/div[1]/div/div/div[2]/div[1]/form/div/div[1]/input'
fb_pwd_xpath = '/html/body/div[1]/div[3]/div[1]/div/div/div[2]/div[1]/form/div/div[2]/input'
fb_login_button_xpath = '/html/body/div[1]/div[3]/div[1]/div/div/div[2]/div[1]/form/div/div[3]/button'
u_path = fb_username_xpath if login_with_facebook else insta_username_xpath
p_path = fb_pwd_xpath if login_with_facebook else insta_pwd_xpath
lb_path = fb_login_button_xpath if login_with_facebook else insta_login_button_xpath

# Simple class of a follower, you dont actually need this but for explanation is ok.
class Follower:
def __init__(self, user_name, href):
self.username = user_name
self.href = href
@property
def get_username(self):
return self.username
@property
def get_href(self):
return self.href
def __repr__(self):
return self.username

def test():
base_user_path = base_url + base_user
driver = webdriver.Chrome()
driver.get(base_user_path)
# click the followers button and will ask for login
driver.find_element_by_xpath(followers_button_xpath).click()
time.sleep(sleep_time)
# now we decide if we will login with facebook or instagram
if login_with_facebook:
driver.find_element_by_xpath(insta_fb_login_button_xpath).click()
time.sleep(sleep_time)
username_input = driver.find_element_by_xpath(u_path)
username_input.send_keys(username)
password_input = driver.find_element_by_xpath(p_path)
password_input.send_keys(password)
driver.find_element_by_xpath(lb_path).click()
# We need to wait a little longer for the page to load so. Feel free to change this to your needs.
time.sleep(10)
# click the followers button again
driver.find_element_by_xpath(followers_button_xpath).click()
time.sleep(sleep_time)
# now we get the list of followers from the dialog box. This function will return a list of follower objects.
followers: list[Follower] = get_list_of_followers(driver, dialog_box_xpath, get_users)
# close the driver we do not need it anymore.
driver.close()
for follower in followers:
print(follower, follower.get_href)

def get_list_of_followers(driver, d_xpath=dialog_box_xpath, get_items=10):
"""
Get a list of followers from instagram
:param driver: driver instance
:param d_xpath: dialog box xpath. By default it gets the global parameter but you can change it
:param get_items: how many items do you wish to obtain? -1 = Try to get all of them. Any positive number will be
= the number of followers to obtain
:return: list of follower objects
"""
# getting the dialog content element
dialog_box: WebElement = driver.find_element_by_xpath(d_xpath)
# getting all the list items (<li></li>) inside the dialog box.
dialog_content: list[WebElement] = dialog_box.find_elements_by_tag_name("li")
# Get the total number of followers. since we get a string we need to convert to int by int(<str>)
total_followers = int(driver.find_element_by_xpath('/html/body/div[1]/section/main/div/header/section/ul/li['
'2]/a/span').get_attribute("title").replace(".",""))
# how many items we have without scrolling down?
li_items = len(dialog_content)
# We are trying to get n elements (n=get_items variable). Now we need to check if there are enough followers to
# retrieve from if not we will get the max quantity of following. This applies only if n is >=0. If -1 then the
# total amount of followers is n
if get_items == -1:
get_items = total_followers
elif -1 < get_items <= total_followers:
# no need to change anything, git is ok to work with get_items
pass
else:
# if it not -1 and not between 0 and total followers then we raise an error
raise IndexError
# You can start from greater than 0 but that will give you a shorter list of followers than what you wish if
# there is not enough followers available. i.e: total_followers = 10, get_items=10, start_from=1. This will only
# return 9 followers not 10 even if get_items is 10.
return generate_followers(0, get_items, total_followers, dialog_box, driver)

def generate_followers(start_pos, get_items, total_followers, dialog_box_element: WebElement, driver):
"""
Generate followers based on the parameters
:param start_pos: index of where to start getting the followers from
:param get_items: total items to get
:param total_followers = total number of followers
:param dialog_box_element: dialog box to get the list items count
:param driver: driver object
:return: followers list
"""
if -1 < start_pos < total_followers:
# we want to count items from our current position until the last element available without scrolling. We do
# it this way so when we scroll down, the list items will be greater but we will start generating followers
# from our last current position not from the beginning!
first = dialog_box_element.find_element_by_xpath("./li[{pos}]".format(pos=start_pos+1))
li_items = dialog_box_element.find_elements_by_xpath("./li[position()={pos}][last("
")]/following-sibling::li"
.format(pos=(start_pos + 1)))
li_items.insert(0, first)
print("Generating followers from position position: {pos} with {li_count} list items"
.format(pos=(start_pos+1), li_count=len(li_items)))
followers = []
for i in range(len(li_items)):
anchors = li_items[i].find_elements_by_tag_name("a")
anchor = anchors[0] if len(anchors) ==1 else anchors[1]
follower = Follower(anchor.text, anchor.get_attribute(
"href"))
followers.append(follower)
get_items -= 1
start_pos += 1
print("Follower {f} added to the list".format(f=follower))
# we break the loop if our starting position is greater than 0 or if get_items has reached 0 (means if we
# request 10 items we got them all no need to continue
if start_pos >= total_followers or get_items == 0:
print("finished")
return followers
print("finished loop, executing scroll down...")
last_li_item = driver.find_element_by_xpath('/html/body/div[4]/div/div[2]/ul/div/li[{p}]'.format(p=start_pos))
last_li_item.location_once_scrolled_into_view
time.sleep(sleep_time)
followers.extend(generate_followers(start_pos, get_items, total_followers, dialog_box_element, driver))
return followers
else:
raise IndexError

最新更新