如何从标签中抓取文本<strong>?



如何从该元素中提取文本"01 Jul 00:00"?我正在使用python请求和BeautifulSoup

<div class="plc-product-date___1zgO_ gl-label gl-label--m gl-label--condensed"><strong>Thursday 01 Jul 00:00</strong></div>

编辑当前代码:

import requests
from bs4 import BeautifulSoup
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
url = "https://www.adidas.com.sg/release-dates"
productsource = requests.get(url,headers=headers,timeout=15)
productinfo = BeautifulSoup(productsource.text, "lxml")
def jdMonitor():
#webscraper
for item in productinfo.find('div', {'class': 'plc-product-list___1Lg2h'}):
pname = item.find("div", class_='plc-product-name___2cofu gl-product-card__name gl-label gl-label--m gl-label--condensed').get_text(strip=True)                #product title
price = item.find("div", class_="gl-price-item notranslate gl-label--m").get_text(strip=True)                      #product price
imagelink = item.find('img')['src']                           #product image link
plink = f"https://www.adidas.com.sg/{item.a['href']}"                                         #to get product page link
pdate = item.find("div",class_='plc-product-date___1zgO_ gl-label gl-label--m gl-label--condensed').get_text(strip=True),
print(pdate)
jdMonitor()
from bs4 import BeautifulSoup

item = '<div class="plc-product-date___1zgO_ gl-label gl-label--m gl-label--condensed"><strong>Thursday 01 Jul 00:00</strong></div>'
soup = BeautifulSoup(item, 'lxml')
abc = soup.find(name="strong")
print(abc.text)

输出:

Thursday 01 Jul 00:00

看到你的代码,我试图纠正你的错误:

import requests
from bs4 import BeautifulSoup
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'84.0.4147.105 Safari/537.36'}
url = "https://www.adidas.com.sg/release-dates"
productsource = requests.get(url, headers=headers, timeout=15)
productinfo = BeautifulSoup(productsource.text, "lxml")

def jdMonitor():
# webscraper
all_items = productinfo.find_all(name="div", class_="gl-product-card")
# print(all_items)
for item in all_items:
# print(item)
pname = item.find(name="div", class_="plc-product-name___2cofu").text
pprice = item.find(name="div", class_="gl-price-item").text
imagelink = item.find(name="img")['src']
plink = f"https://www.adidas.com.sg/{item.a['href']}"
try:
pdate = item.find_all(name="div", class_="plc-product-date___1zgO_").strong.text
except AttributeError:
pdate = ""
print(f"""
Product Name: {pname}
Product Price: {pprice}
Image Link: {imagelink}
Product Link: {plink}
Product Date: {pdate}
""")

jdMonitor()

这应该给出如下输出(仅显示1项(:

Product Name: X Speedflow Messi.1 Firm Ground Boots
Product Price: $400.00
Image Link: https://assets.adidas.com/images/w_512,h_512,f_auto,q_auto:sensitive,fl_lossy,c_fill,g_auto/fcb0489ff2f642c5ad86ad35009947ec_9366/X_Speedflow_Messi.1_Firm_Ground_Boots_Silver_GX0216_01_standard.jpg
Product Link: https://www.adidas.com.sg//x-speedflow-messi.1-firm-ground-boots/GX0216.html
Product Date: Wednesday 30 Jun 21:30

您试图使用item.find("div",class_='plc-product-date___1zgO_ gl-label gl-label--m gl-label--condensed').get_text(strip=True)提取<strong>标记内的文本;项目";不包含class_='plc-product-date___1zgO_ gl-label gl-label--m gl-label--condensed',所以您得到了NoneType错误
我使用了一个简单的try/except块来克服错误,并对代码进行了一些小的更改。

获取包含类plc-product-date___1zgO的元素内的所有文本

html = '<div class="plc-product-date___1zgO_ gl-label gl-label--m gl-label--condensed"><strong>Thursday 01 Jul 00:00</strong></div>'
soup = BeautifulSoup(html, "html.parser")
for element in soup.find_all("div", {"class": "plc-product-date___1zgO_"}):
print(element.get_text(strip=True))
...
>>> Thursday 01 Jul 00:00

最新更新