在我的python selenium项目中随机发生错误,我在那里用我的树莓派从网站上抓取数据。它可以获取日期、温度、风速和降雨量。脚本有时会正常运行,但有时会弹出错误:
selenium.common.exceptions.StaleElementReferenceException:信息:陈旧的元素引用:元素没有附加到页面文档(Session info: chrome=84.0.4147.141)
是否有任何包装器来实现以避免这样的错误消息?如果你能分享一个解决方案,我会很高兴的。
完整代码:
from selenium import webdriver
import pandas as pd
from datetime import datetime
import time
import schedule
def job():
driver = webdriver.Chrome()
driver.get("https://pent.no/60.19401,11.09936")
date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")
i = 0
for klikk in date:
date[i].click()
i = i+1
if i==len(date):
break
time = driver.find_elements_by_class_name("forecast-hour-view-hour-label")
count = len(time)-193
temp = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__temperature")
temp2 = temp[::2]
temp3 = temp[1::2]
wind = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__wind-speed")
wind2 = wind[::2]
wind3 = wind[1::2]
rainfall = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__precipitation")
rainfall2 = rainfall[::2]
rainfall3 = rainfall[1::2]
a = []
b = []
c = []
d = []
e = []
f = []
g = []
h = []
k = 0
for datoer in date:
print("Dato:"+datoer.text)
a.append(datoer.text)
if k==0:
a.extend([""]*count)
else:
a.extend([""]*23)
k = k+1
df1 = pd.DataFrame(a, columns= ["Date"])
#
for tider in time:
print("Tid:"+tider.text)
b.append(tider.text)
df2 = pd.DataFrame(b, columns= ["Time"])
#
for tempyr in temp2:
print("Temp yr:"+tempyr.text)
c.append(tempyr.text)
df3 = pd.DataFrame(c, columns= ["Temp Yr"])
for tempstorm in temp3:
print("Temp storm:"+tempstorm.text)
d.append(tempstorm.text)
df4 = pd.DataFrame(d, columns= ["Temp Storm"])
#
for windyr in wind2:
print("Vind yr:"+windyr.text)
e.append(windyr.text)
df5 = pd.DataFrame(e, columns= ["Wind Yr"])
for windstorm in wind3:
print("Vind storm:"+windstorm.text)
f.append(windstorm.text)
df6 = pd.DataFrame(f, columns= ["Wind Storm"])
#
for rainfallyr in rainfall2:
g.append(rainfallyr.text)
if rainfallyr.text == "":
print("Rein yr:"+"0.0 mm")
else:
print("Rein yr:"+rainfallyr.text)
df7 = pd.DataFrame(g, columns= ["Rainfall Yr"])
df7 = df7.replace(r'^s*$', "0.0 mm", regex=True)
for rainfallstorm in rainfall3:
h.append(rainfallstorm.text)
if rainfallstorm.text == "":
print("Rein storm:"+"0.0 mm")
else:
print("Rein storm:"+rainfallstorm.text)
df8 = pd.DataFrame(h, columns= ["Rainfall Storm"])
df8 = df8.replace(r'^s*$', "0.0 mm", regex=True)
#
tabell = [df1, df2, df3, df4, df5, df6, df7, df8]
result = pd.concat(tabell, axis=1)
result.to_excel("weather" + str(int(datetime.now().day)) + ".xlsx")
driver.quit()
schedule.every().day.at("00:00").do(job)
while 1:
schedule.run_pending()
time.sleep(60)
编辑:
Traceback (most recent call last):
File "/home/pi/Desktop/Data Scraper/test.py", line 108, in <module>
schedule.run_pending()
File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 563, in run_pending
default_scheduler.run_pending()
File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 94, in run_pending
self._run_job(job)
File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 147, in _run_job
ret = job.run()
File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 466, in run
ret = self.job_func()
File "/home/pi/Desktop/Data Scraper/test.py", line 47, in job
a.append(datoer.text)
File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webelement.py", line 76, in text
return self._execute(Command.GET_ELEMENT_TEXT)['value']
File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webelement.py", line 633, in _execute
return self._parent.execute(command, params)
File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=84.0.4147.141)
Selenium提供对浏览器内存中当前页面对象的引用。当你click()
或它运行Javascript代码,添加元素,然后在浏览器内存中的对象改变位置和引用导致错误的元素在浏览器内存-这给错误stale element reference: element is not attached to the page document
。
你必须在click()
之后再得到date
。
或者你应该把date
作为click()
之前的文本
date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")
# get all dates as text (before `click()`)
date_text = [item.text for item in date]
for item in date:
item.click()
之后你应该使用这个列表
for k, text in enumerate(date_text):
print("Dato:", text)
a.append(text)
if k == 0:
a.extend([""]*count)
else:
a.extend([""]*23)
编辑:
我的版本与其他更改-即。我少用DataFrame
。
我试着让一些元素非常相似,使它发挥作用,使它更短。
在Linux上,我将使用服务cron
而不是Python模块schedule
当代码在一些调度程序或cron
中运行时,我不必显示文本,所以我会使用一些变量来停止显示if display: print(...)
。如果不显示,它应该运行得更快。
from selenium import webdriver
import pandas as pd
from datetime import datetime
import time
import schedule
def job():
driver = webdriver.Chrome()
driver.get("https://pent.no/60.19401,11.09936")
date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")
# get all dates as text (before `click()`)
date_text = [item.text for item in date]
for item in date:
item.click()
time = driver.find_elements_by_class_name("forecast-hour-view-hour-label")
count = len(time)-193
temp = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__temperature")
temp2 = temp[::2]
temp3 = temp[1::2]
wind = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__wind-speed")
wind2 = wind[::2]
wind3 = wind[1::2]
rainfall = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__precipitation")
rainfall2 = rainfall[::2]
rainfall3 = rainfall[1::2]
# --- dictionary for all columns ---
all_columns = dict()
# --- Date ---
rows = []
for k, text in enumerate(date_text):
print("Dato:", text)
rows.append(text)
if k == 0:
rows.extend([""]*count)
else:
rows.extend([""]*23)
all_columns["Date"] = rows
# --- Time ---
rows = []
for item in time:
text = item.text.strip()
print("Tid:", text)
rows.append(text)
all_columns["Time"] = rows
# --- Temp Yr ---
rows = []
for item in temp2:
text = item.text.strip()
print("Temp yr:", text)
rows.append(text)
all_columns["Temp Yr"] = rows
# --- Temp Storm ---
rows = []
for item in temp3:
text = item.text.strip()
print("Temp storm:", text)
rows.append(text)
all_columns["Temp Storm"] = rows
# --- Vind Yr ---
rows = []
for item in wind2:
text = item.text.strip()
print("Vind yr:", text)
rows.append(text)
all_columns["Wind Yr"] = rows
# --- Vind Storm ---
rows = []
for item in wind3:
text = item.text.strip()
print("Vind storm:", text)
rows.append(text)
all_columns["Wind Storm"] = rows
# --- Rainfall Yr ---
rows = []
for item in rainfall2:
text = item.text.strip()
if text == "":
text = "0.0 mm"
print("Rein yr:", text)
rows.append(text)
all_columns["Rainfall Yr"] = rows
# now I don't need to replace() empty string
# --- Rainfall Storm ---
rows = []
for item in rainfall3:
text = item.text.strip()
if text == "":
text = "0.0 mm"
print("Rein storm:", text)
rows.append(text)
all_columns["Rainfall Storm"] = rows
# now I don't need to replace() empty string
# --- ---
result = pd.DataFrame(all_columns)
result.to_excel("weather{}.xlsx".format(datetime.now().day))
driver.quit()
#schedule.every().day.at("00:00").do(job)
#while True: # `True` instead of `1` is more readable, besides Python will run `while bool(1):`
# schedule.run_pending()
# time.sleep(60)
job()
编辑:
带有功能的版本
def get_rows(items, description=None, replace=None):
rows = []
for item in items:
text = item.text.strip()
if replace and text == "":
text = replace
rows.append(text)
if DISPLAY and description:
print(description, text)
return rows
,现在代码更短了
from selenium import webdriver
import pandas as pd
from datetime import datetime
import time
import schedule
# --- constans --- (PEP8: UPPER_CASE_NAMES)
DISPLAY = True
# --- classes --- (PEP8: CamelCaseNames)
# empty
# --- functions --- (PEP8: lower_case_names)
def get_rows(items, description=None, replace=None):
rows = []
for item in items:
text = item.text.strip()
if replace and text == "":
text = replace
rows.append(text)
if DISPLAY and description:
print(description, text)
return rows
def job():
driver = webdriver.Chrome()
driver.get("https://pent.no/60.19401,11.09936")
date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")
# get all dates as text (before `click()`)
date_text = [item.text for item in date]
for item in date:
item.click()
time = driver.find_elements_by_class_name("forecast-hour-view-hour-label")
count = len(time)-193
temp = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__temperature")
temp2 = temp[::2]
temp3 = temp[1::2]
wind = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__wind-speed")
wind2 = wind[::2]
wind3 = wind[1::2]
rainfall = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__precipitation")
rainfall2 = rainfall[::2]
rainfall3 = rainfall[1::2]
# - Date -
rows_date = []
for k, text in enumerate(date_text):
if DISPLAY:
print("Dato:", text)
rows_date.append(text)
if k == 0:
rows_date.extend([""]*count)
else:
rows_date.extend([""]*23)
# - other -
result = pd.DataFrame({
"Date": rows_date,
"Time": get_rows(time, "Tid:"),
"Temp Yr": get_rows(temp2, "Temp yr:"),
"Temp Storm": get_rows(temp3, "Temp storm:"),
"Wind Yr": get_rows(wind2, "Vind yr:"),
"Wind Storm": get_rows(wind3, "Vind storm:"),
"Rainfall Yr": get_rows(rainfall2, "Rein yr:", "0.0 mm"),
"Rainfall Storm": get_rows(rainfall3, "Rein storm:", "0.0 mm"),
})
# - save -
result.to_excel("weather--{}.xlsx".format(datetime.now().day))
driver.quit()
# --- main --- (PEP8: loser_case_names)
#schedule.every().day.at("00:00").do(job)
#while True: # `True` instead of `1` is more readable, besides Python will run `while bool(1):`
# schedule.run_pending()
# time.sleep(60)
job()
PEP 8——Python代码风格指南