在div内滚动时突破while循环

删除谷歌地图评论。当while循环到达评论末尾时，我如何突破它？我哪里搞错了？

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import csv

driver = webdriver.Chrome(executable_path='C:/chromedriver.exe')
header_added = False
with open('urls.txt') as f:
for url in f:
driver.get(url) #line
driver.maximize_window()
# Page Title
title = driver.title
ftitle = title.split("-")
title = ftitle[0]
old_reviews = set()
time.sleep(3)
last_count = 0
while True:
scroll_div = WebDriverWait(driver, 8).until(EC.presence_of_element_located(
(By.XPATH, '//div[@class="section-layout section-scrollbox cYB2Ge-oHo7ed cYB2Ge-ti6hGc"]')))
driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scroll_div)  # Scroll
reviews = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.XPATH, '//div[@class="section-layout"]//div[@aria-label]')))
ans = set(reviews) - set(old_reviews)  # Remove duplicate reviews
new_count = len(reviews)
# Reviews Div
for div in ans:
driver.execute_script("arguments[0].scrollIntoView();", div)
name = div.get_attribute('aria-label')
photo_url = div.find_element_by_tag_name('a').get_attribute('href')
rating = div.find_element_by_xpath('.//span[@class="ODSEW-ShBeI-H1e3jb"]').get_attribute(
'aria-label')
try:
image_links = []
image_div = div.find_element_by_xpath('.//div[@class="ODSEW-ShBeI-Jz7rA"]')
images = image_div.find_elements_by_xpath('button')
for img in images:
im_link = img.value_of_css_property('background-image')
im_link = im_link[5:]
im_link = im_link[:-2]
image_links.append(im_link)
except:
image_links = ''
pass
try:
div.find_element_by_xpath('.//jsl//button').click()
time.sleep(1)
except:
pass
comment = div.find_element_by_xpath('.//span[@class="ODSEW-ShBeI-text"]').text
dict1 = {'Title': title, "Name": name, "Profile": photo_url, "Rating": rating,
"Comment": comment, "Images Posted": image_links}
with open(f'Google_reviews_for_{title}.csv', 'a+', encoding='utf-8-sig', newline='') as f:
w = csv.DictWriter(f, dict1.keys())
if not header_added:
w.writeheader()
header_added = True
w.writerow(dict1)
old_reviews = reviews
if last_count == new_count:
break
last_count = new_count

URL：-https://www.google.com/maps/place/El+TabanKo/@42.848117，-26741402,19z/数据=！4m15！1米7！3m6！1s0xd4fc26e313bc85:0xb10d327c782f87fa！2校正%C3%ADa+Kalea，+45，+001001+Gasteiz，+Araba！3b1！8m2！3d42.8480012！4d-2.6737255！3m6！1s0xd4fc26e26c5be1:0x5f5e0ee05fe08041！8m2！3d42.8481171！4d-2.6735931！9m1！1b1

尝试使用return而不是break
由于您的break在for循环中，它会将您带出for循环，但您仍然在while True:的无尽循环中
要使用return，您的代码应该在方法中
UPD
您不会在内部for循环内更新new_count！！！

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import csv

driver = webdriver.Chrome(executable_path='C:/chromedriver.exe')
header_added = False
with open('urls.txt') as f:
for url in f:
driver.get(url) #line
driver.maximize_window()
# Page Title
title = driver.title
ftitle = title.split("-")
title = ftitle[0]
old_reviews = set()
time.sleep(3)
last_count = 0
while True:
scroll_div = WebDriverWait(driver, 8).until(EC.presence_of_element_located(
(By.XPATH, '//div[@class="section-layout section-scrollbox cYB2Ge-oHo7ed cYB2Ge-ti6hGc"]')))
driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scroll_div)  # Scroll
reviews = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.XPATH, '//div[@class="section-layout"]//div[@aria-label]')))
ans = set(reviews) - set(old_reviews)  # Remove duplicate reviews
new_count = len(reviews)
# Reviews Div
for div in ans:
driver.execute_script("arguments[0].scrollIntoView();", div)
name = div.get_attribute('aria-label')

photo_url = div.find_element_by_tag_name('a').get_attribute('href')

rating = div.find_element_by_xpath('.//span[@class="ODSEW-ShBeI-H1e3jb"]').get_attribute(
'aria-label')

try:
image_links = []
image_div = div.find_element_by_xpath('.//div[@class="ODSEW-ShBeI-Jz7rA"]')
images = image_div.find_elements_by_xpath('button')
for img in images:
im_link = img.value_of_css_property('background-image')
im_link = im_link[5:]
im_link = im_link[:-2]
image_links.append(im_link)

except:
image_links = ''
pass
try:
div.find_element_by_xpath('.//jsl//button').click()
time.sleep(1)
except:
pass
comment = div.find_element_by_xpath('.//span[@class="ODSEW-ShBeI-text"]').text

dict1 = {'Title': title, "Name": name, "Profile": photo_url, "Rating": rating,
"Comment": comment, "Images Posted": image_links}
with open(f'Google_reviews_for_{title}.csv', 'a+', encoding='utf-8-sig', newline='') as f:
w = csv.DictWriter(f, dict1.keys())
if not header_added:
w.writeheader()
header_added = True
w.writerow(dict1)
old_reviews = reviews
if last_count == new_count:
break
last_count = new_count

我猜您想退出while循环：

if last_count == new_count:
break

您现在正在做的是退出for循环，而不是while循环。为了退出while循环，您需要设置一个条件，如下所示：

exit_condition = False
last_count = 0
old_reviews =set()
while not exit_condition:
scroll_div = WebDriverWait(driver, 8).until(EC.presence_of_element_located(
(By.XPATH, '//div[@class="section-layout section-scrollbox cYB2Ge-oHo7ed cYB2Ge-ti6hGc"]')))
driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scroll_div)  # Scroll
reviews = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.XPATH, '//div[@class="section-layout"]//div[@aria-label]')))
ans = set(reviews) - set(old_reviews)  # Remove duplicate reviews
new_count = len(reviews)
# Reviews Div
for div in ans:
driver.execute_script("arguments[0].scrollIntoView();", div)
name = div.get_attribute('aria-label')
old_reviews = reviews
if last_count == new_count:
exit_condition = True
break
last_count = new_count

相关内容

最新更新

热门标签：