Python selenium webdriver:基于日期检索google评论



我使用Selenium webdriver从google play商店提取应用程序的谷歌评论。下面是我的代码:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import pandas as pd
import datetime as dt
driver = webdriver.Chrome('path')
baseurl = 'https://play.google.com/store/apps/details?id=com.mapmyrun.android2&showAllReviews=true'
driver.get(baseurl)
counter = 0
while True:
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
time.sleep(2)
counter = counter + 1
if len(driver.find_elements_by_xpath("//span[text()='Show More']"))>0:
driver.find_element_by_xpath("//span[contains(text(),'Show More')]").click()
counter = 0
if counter == 10:
element = driver.find_elements_by_xpath("//div[@class='LXrl4c']")
break;
names = driver.find_elements_by_xpath("//div[@class='bAhLNe kx8XBd']//span[@class='X43Kjb']")
person_info = driver.find_elements_by_xpath("//div[@class='d15Mdf bAhLNe']")

for count, person in enumerate(person_info):
review_response_person = ''
response_date = ''
response_text = ''
full_text = ''

name = person.find_element_by_xpath(".//span[@class='X43Kjb']").text
review = person.find_element_by_xpath(".//div[@class='UD7Dzf']/span").text
review_date = person.find_element_by_xpath(".//span[@class='p2TkOb']").text
rating = person.find_element_by_xpath(".//div[@class='pf5lIe']/div").get_attribute('aria-label')
useful = person.find_element_by_xpath(".//div[@class='XlMhZe']//div[@aria-label='Number of times this review was rated helpful']").text
reviewText = person.find_element(By.CSS_SELECTOR, "span[jsname='fbQN7e']")
full_text = reviewText.get_attribute("innerHTML")

if len(full_text) > 1:
review = full_text

if person.find_elements_by_xpath(".//div[@class='LVQB0b']"):
review_response_person = person.find_element_by_xpath(".//div[@class='LVQB0b']//div/span[@class='X43Kjb']").text
response_date = person.find_element_by_xpath(".//div[@class='LVQB0b']//div/span[@class='p2TkOb']").text
response_text = person.find_element_by_xpath(".//div[@class='LVQB0b']").text
response_text = response_text.replace(review_response_person, '')
response_text = response_text.replace(response_date, '')

问题是我想提取评论w.r.t到日期范围。例如,我想提取今天或明天发布的评论。我试图在selenium webdriver中找到任何方法,但找不到它。如果我们可以根据日期检索评论,谁能指导我?

您可以使用一个条件来获取数据,如:

today_data=[] 
for count, person in enumerate(person_info):
review_date = person.find_element_by_xpath(".//span[@class='p2TkOb']").text.lower()
review_date=dt.datetime.strptime(review_date,"%d %B %Y")
name = person.find_element_by_xpath(".//span[@class='X43Kjb']").text
review = person.find_element_by_xpath(".//div[@class='UD7Dzf']/span").text
if review_date > dt.datetime(year=2021,month=1,day=23):
today_data.append(name)
today_data.append(review)

然后,您将拥有从2021年1月23日到今天的名称和评论,例如。

如果您位于特定的国家/地区,请将时区更改为您所在的国家/地区,以免在解释"%B":

时出现错误。
import locale
locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')

最新更新