我试图刮这个网站的产品列表,我想在这里实现的是抓住每个产品的所有信息,例如:product_name,价格和它们的变体信息,以及像10公斤,20公斤,3公斤和他们的价格相应。我有搜索的html,他们不提供所有的信息,我正在寻找,但在脚本标签下,他们有一个json驻留这可能是有用的。下面是json脚本标签:
</script><script type="text/x-magento-init">
{
"[data-role=swatch-option-111105]": {
"Magento_Swatches/js/swatch-renderer": {
"selectorProduct": ".product-item-details",
"onlySwatches": true,
"enableControlLabel": false,
"numberToShow": 16,
"jsonConfig": {"attributes":{"1299":{"id":"1299","code":"size","label":"Size","options":[{"id":"6651","label":"10kg","products":["116724"]},{"id":"6780","label":"20kg","products":["108981"]},{"id":"6234","label":"3kg","products":["108987"]}],"position":"0"}},"template":"$<%- data.price %>","currencyFormat":"$%s","optionPrices":{"108987":{"baseOldPrice":{"amount":49.990908090909},"oldPrice":{"amount":54.99},"basePrice":{"amount":42.718180818182},"finalPrice":{"amount":46.99},"tierPrices":[],"msrpPrice":{"amount":0}},"108981":{"baseOldPrice":{"amount":172.71818081818},"oldPrice":{"amount":189.99},"basePrice":{"amount":128.17272627273},"finalPrice":{"amount":140.99},"tierPrices":[],"msrpPrice":{"amount":0}},"116724":{"baseOldPrice":{"amount":120.899999},"oldPrice":{"amount":132.99},"basePrice":{"amount":102.71818081818},"finalPrice":{"amount":112.99},"tierPrices":[],"msrpPrice":{"amount":0}}},"priceFormat":{"pattern":"$%s","precision":2,"requiredPrecision":2,"decimalSymbol":".","groupSymbol":",","groupLength":3,"integerRequired":false},"prices":{"baseOldPrice":{"amount":49.990908090909},"oldPrice":{"amount":54.99},"basePrice":{"amount":172.71818081818},"finalPrice":{"amount":189.99}},"productId":"111105","chooseText":"Choose an Option...","images":[],"index":{"108987":{"1299":"6234"},"108981":{"1299":"6780"},"116724":{"1299":"6651"}},"preSelectedGallery":[],"channel":"website","salesChannelCode":"base","sku":{"108987":"127956","108981":"127960","116724":"501600"},"labels":{"108987":{"sales_flag_label":"Great low price"},"108981":{"sales_flag_label":"Great low price"},"116724":{"sales_flag_label":"Great low price"}},"hasEndDate":{"108987":false,"108981":false,"116724":false},"dynamic":{"name":{"108987":{"value":"Black Hawk Fish And Potato Adult Dog Food - 3kg"},"108981":{"value":"Black Hawk Fish And Potato Adult Dog Food - 20kg"},"116724":{"value":"Black Hawk Fish & Potato Dog Food 10kg"}},"sku":{"108987":{"value":"127956"},"108981":{"value":"127960"},"116724":{"value":"501600"}},"gtin":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"marketing_offer_short":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"advice_care":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"product_category":{"108987":{"value":"Dry Food"},"108981":{"value":"Dry Food"},"116724":{"value":"Dry Food"}},"benefits":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"feeding_guide":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"health_condition_dietary":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"brand_filter":{"108987":{"value":"Black Hawk"},"108981":{"value":"Black Hawk"},"116724":{"value":"Black Hawk"}},"ingredients":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"activity_level":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"size":{"108987":{"value":"3kg"},"108981":{"value":"20kg"},"116724":{"value":"10kg"}},"food_type":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"health_benefits":{"108987":{"value":"Total Wellbeing"},"108981":{"value":"Total Wellbeing"},"116724":{"value":"Total Wellbeing"}},"life_stage":{"108987":{"value":"Adult"},"108981":{"value":"Adult"},"116724":{"value":"Adult"}},"flavour":{"108987":{"value":"Fish"},"108981":{"value":"Fish"},"116724":{"value":"Fish"}},"nutritional_info":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"breed":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"nutritional_info_table":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"australia_made":{"108987":{"value":"No"},"108981":{"value":"No"},"116724":{"value":"No"}},"nutrition_grade":{"108987":{"value":"Premium"},"108981":{"value":"Premium"},"116724":{"value":"Premium"}},"lifestyle":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"weight_control":{"108987":{"value":"No"},"108981":{"value":"No"},"116724":{"value":"No"}},"frequent_feeder_price":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}},"size_swatches":{"108987":{"value":""},"108981":{"value":""},"116724":{"value":""}}}},
"jsonSwatchConfig": {"1299":{"6651":{"type":"0","value":null,"label":"10kg"},"6780":{"type":"0","value":null,"label":"20kg"},"6234":{"type":"0","value":null,"label":"3kg"},"additional_data":"{"update_product_preview_image":"1","use_product_image_for_swatch":0,"text_swatch_as_multiple_select":"1","swatch_input_type":"text"}"}},
"mediaCallback": "httpsu003Au002Fu002Fwww.petbarn.com.auu002Fswatchesu002Fajaxu002Fmediau002F",
"jsonSwatchImageSizeConfig": {"swatchImage":{"width":30,"height":20},"swatchThumb":{"height":90,"width":110}},
"showTooltip": 1 }
}
}
</script>
我已经设法解析该脚本标签,并通过json.loads()将其转换为python字典,但无法找出提取信息并将其导出到csv的最佳方法。下面是我到目前为止的代码:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import json
from datetime import datetime
from datetime import date
now = datetime.now()
today = date.today()
class PetBarnProdScraper:
all_info = []
def fetch(self, url):
print(f"HTTP GET request to URL: {url}", end="")
res = requests.get(url)
print(f" | Status Code: {res.status_code}")
return res
def parse(self, response):
soup = BeautifulSoup(response.text, "html.parser")
product_urls = [a.get("href") for a in soup.select("a.product-item-link")]
product_ids = [
pid.get("id").split("-")[-1] for pid in soup.select("div.product-item-info")
]
titles = [
a.text.replace("n", "").strip() for a in soup.select("a.product-item-link")
]
old_price = [
p.select_one("span.price").text for p in soup.select("span.old-price")
]
ratings = [r.get("title") for r in soup.select("div.rating-result")]
no_of_reviews = [review.text for review in soup.select("a.action.view")]
data = (
soup.select('script[type="text/x-magento-init"]')[3]
.text.replace("n", "")
.strip()
)
data_json = json.loads(data)
data_j = json.loads(
data_json["*"]["Overdose_AdobeAnalytics/js/view/datalayer"]["datalayer"][0]
)
for idx in range(len(titles)):
try:
ratings_count = ratings[idx]
reviews_count = no_of_reviews[idx]
last_price = old_price[idx]
except:
ratings_count = "N/A"
reviews_count = "N/A"
last_price = "N/A"
d = {
"Scraped_Date": now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[0],
"Scraped_Time": now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[1],
"product_name": titles[idx],
"price": data_j["PLP"]["products"][idx]["productPrice"],
"old_price": last_price,
"ratings": ratings_count,
"number_of_reviews": reviews_count,
"productSKU": data_j["PLP"]["products"][idx]["productSKU"],
"productSize": data_j["PLP"]["products"][idx]["productSize"],
"priceWithoutTax": data_j["PLP"]["products"][idx][
"productPriceLessTax"
],
"lifeStage": data_j["PLP"]["products"][idx]["lifeStage"],
}
for prod_id in product_ids:
details = soup.select_one(
f"script:-soup-contains('[data-role=swatch-option-{prod_id}]')"
)
labels = []
if details:
json_details = json.loads(details.text.replace("n", "").strip())
json_endpoint = json_details[f"[data-role=swatch-option-{prod_id}]"]
label_option_list = json_endpoint[
"Magento_Swatches/js/swatch-renderer"
]["jsonConfig"]["attributes"]["1299"]["options"]
for lab in label_option_list:
labels.append(lab["label"])
d["label_options"] = labels
print(d)
self.all_info.append(d)
def to_csv(self):
df = pd.DataFrame(self.all_info).fillna("")
df.to_csv(f"{today}_petbarn.csv", index=False)
print('Stored results to "petbarn.csv"')
def run(self):
for i in range(1, 2): # total_number of pages
url = f"https://www.petbarn.com.au/dogs/dog-food/dry-dog-food?p={i}"
response = self.fetch(url)
self.parse(response)
self.to_csv()
if __name__ == "__main__":
scraper = PetBarnProdScraper()
scraper.run()
每次我运行该代码时,label_options列总是具有相同的值,这是我猜测的最后一个。下面是我得到的输出:
Scraped_Date,Scraped_Time,product_name,price,old_price,ratings,number_of_reviews,productSKU,productSize,priceWithoutTax,lifeStage,label_options
11/21/2022, 00:31:47,Black Hawk Fish And Potato Adult Dog Food,189.99,N/A,N/A,N/A,black-hawk-fish-&-potato-adult-dog-food,,172.72,Adult,"['10kg', '20kg', '3kg']"
11/21/2022, 00:31:47,SavourLife Ancient Grains Lean Chicken Adult Dog Food,159.99,N/A,N/A,N/A,savourlife-ancient-grains-lean-chicken-adult-dog-food,,145.45,Adult,"['10kg', '20kg', '3kg']"
预期输出:
Scraped_Date,Scraped_Time,product_name,price,old_price,ratings,number_of_reviews,productSKU,productSize,priceWithoutTax,lifeStage,label_options
11/21/2022, 00:31:47,Black Hawk Fish And Potato Adult Dog Food,189.99,N/A,N/A,N/A,black-hawk-fish-&-potato-adult-dog-food,,172.72,Adult,10kg
11/21/2022, 00:31:47,Black Hawk Fish And Potato Adult Dog Food,189.99,N/A,N/A,N/A,black-hawk-fish-&-potato-adult-dog-food,,172.72,Adult,20kg
11/21/2022, 00:31:47,Black Hawk Fish And Potato Adult Dog Food,189.99,N/A,N/A,N/A,black-hawk-fish-&-potato-adult-dog-food,,172.72,Adult,3kg
11/21/2022, 00:31:47,SavourLife Ancient Grains Lean Chicken Adult Dog Food,159.99,N/A,N/A,N/A,savourlife-ancient-grains-lean-chicken-adult-dog-food,,145.45,Adult,3kg
11/21/2022, 00:31:47,SavourLife Ancient Grains Lean Chicken Adult Dog Food,159.99,N/A,N/A,N/A,savourlife-ancient-grains-lean-chicken-adult-dog-food,,145.45,Adult,20kg
谁能帮我找出获得预期输出的最佳方法?谢谢!更新:根据Driftr95的答案更新了代码:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import json
import csv
from datetime import datetime
from datetime import date
now = datetime.now()
today = date.today()
class PetBarnProdScraper:
all_info = []
def fetch(self, url):
print(f"HTTP GET request to URL: {url}", end="")
res = requests.get(url)
print(f" | Status Code: {res.status_code}")
return res
def parse(self, response):
soup = BeautifulSoup(response.text, "html.parser")
product_urls = [a.get("href") for a in soup.select("a.product-item-link")]
product_ids = [
pid.get("id").split("-")[-1] for pid in soup.select("div.product-item-info")
]
titles = [
a.text.replace("n", "").strip() for a in soup.select("a.product-item-link")
]
old_price = [
p.select_one("span.price").text for p in soup.select("span.old-price")
]
ratings = [r.get("title") for r in soup.select("div.rating-result")]
no_of_reviews = [review.text for review in soup.select("a.action.view")]
data = (
soup.select('script[type="text/x-magento-init"]')[3]
.text.replace("n", "")
.strip()
)
data_json = json.loads(data)
data_j = json.loads(
data_json["*"]["Overdose_AdobeAnalytics/js/view/datalayer"]["datalayer"][0]
)
for idx in range(len(titles)):
try:
ratings_count = ratings[idx]
reviews_count = no_of_reviews[idx]
last_price = old_price[idx]
except:
ratings_count = "N/A"
reviews_count = "N/A"
last_price = "N/A"
d = {
"Scraped_Date": now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[0],
"Scraped_Time": now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[1],
"product_name": titles[idx],
"price": data_j["PLP"]["products"][idx]["productPrice"],
"old_price": last_price,
"ratings": ratings_count,
"number_of_reviews": reviews_count,
"productSKU": data_j["PLP"]["products"][idx]["productSKU"],
"productSize": data_j["PLP"]["products"][idx]["productSize"],
"priceWithoutTax": data_j["PLP"]["products"][idx][
"productPriceLessTax"
],
"lifeStage": data_j["PLP"]["products"][idx]["lifeStage"],
}
for prod_id in product_ids:
details = soup.select_one(
f"script:-soup-contains('[data-role=swatch-option-{prod_id}]')"
)
if details:
json_details = json.loads(details.text.replace("n", "").strip())
dataJC = json_details[f"[data-role=swatch-option-{prod_id}]"][
"Magento_Swatches/js/swatch-renderer"
]["jsonConfig"]
productId = dataJC["productId"]
jcInfs = [
{
"productId": productId,
"optionKey": k,
"sku": "?",
"index": v["1299"] if "1299" in v else None,
}
for k, v in dataJC["index"].items()
]
orInfs = [
("optionPrices", "amount", "reverseNest"),
("dynamic", "value", "nest1"),
("labels", "", "reverseNest"),
("hasEndDate", "", "noNesting"),
]
relevInfs = []
for kk, vk, nt in orInfs:
if kk not in dataJC:
continue
if nt == "noNesting":
relevInfs += [(kk, vk, dataJC[kk])]
continue
if nt == "nest1":
relevInfs += [(kk, vk, vd) for kk, vd in dataJC[kk].items()]
continue
if nt != "reverseNest":
## can put a default action here
continue
## nt == 'reverseNest'
orInf = {}
for pk, po in dataJC[kk].items():
for kpo, vpo in po.items():
if kpo not in orInf:
orInf[kpo] = {}
orInf[kpo][pk] = vpo
relevInfs += [(kk, vk, vi) for kk, vi in orInf.items()]
for i, j in enumerate(jcInfs):
for kk, vk, vd in relevInfs:
if j["optionKey"] not in vd:
continue
relevInf = vd[j["optionKey"]]
if type(relevInf) != dict:
j[kk] = relevInf
elif vk in relevInf and relevInf[vk]:
j[kk] = relevInf[vk]
# combine with main variation
jcInfs[i] = {
k: v
for k, v in (
list(d.items())
+ [(jk, jv) for jk, jv in j.items() if jk not in d]
)
}
for j in jcInfs:
self.all_info.append(j)
self.all_info.append(d)
def to_csv(self):
df = pd.DataFrame(self.all_info).fillna("")
df.to_csv(f"{today}_petbarn.csv", index=False)
print('Stored results to "petbarn.csv"')
def run(self):
for i in range(1, 2): # total_number of pages
url = f"https://www.petbarn.com.au/dogs/dog-food/dry-dog-food?p={i}"
response = self.fetch(url)
self.parse(response)
self.to_csv()
if __name__ == "__main__":
scraper = PetBarnProdScraper()
scraper.run()
代码和输出匹配期望,但没有配对,相反,product_name行不断重复自己,另一方面,name列只有3行,所以最好让这些行彼此完整。下面是输出:
Scraped_Date,Scraped_Time,product_name,price,old_price,ratings,number_of_reviews,productSKU,productSize,priceWithoutTax,lifeStage,productId,optionKey,sku,index,baseOldPrice,oldPrice,basePrice,finalPrice,tierPrices,name,product_category,brand_filter,size,health_benefits,life_stage,flavour,australia_made,nutrition_grade,weight_control,on_sale,hasEndDate,breed,new,marketing_offer_short,sales_flag_label,activity_level
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,111105,108987,127956,6234,49.990908090909,54.99,42.718180818182,46.99,[],Black Hawk Fish And Potato Adult Dog Food - 3kg,Dry Food,Black Hawk,3kg,Total Wellbeing,Adult,Fish,No,Premium,No,Sale,True,,,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,111105,108981,127960,6780,172.71818081818,189.99,128.17272627273,140.99,[],Black Hawk Fish And Potato Adult Dog Food - 20kg,Dry Food,Black Hawk,20kg,Total Wellbeing,Adult,Fish,No,Premium,No,Sale,True,,,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,111105,116724,501600,6651,120.899999,132.99,102.71818081818,112.99,[],Black Hawk Fish & Potato Dog Food 10kg,Dry Food,Black Hawk,10kg,Total Wellbeing,Adult,Fish,No,Premium,No,Sale,True,,,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,145209,143157,140846,6234,41.809089909091,45.99,41.809089909091,45.99,[],SavourLife Ancient Grains Lean Chicken Adult Dog Food 3kg,Dry Food,SavourLife Ancient Grains,3kg,Weight Management,Adult,Chicken,No,Essential,Yes,Sale,False,All,New,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,145209,143163,140847,6780,145.44545354545,159.99,99.090908090909,109.0,[],SavourLife Ancient Grains Lean Chicken Adult Dog Food 20kg,Dry Food,SavourLife Ancient Grains,20kg,Weight Management,Adult,Chicken,No,Essential,Yes,,False,All,New,Only $109,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144663,142980,141739,6234,48.172726272727,52.99,39.990908090909,43.99,[],Black Hawk Chicken & Rice Medium Puppy Food 3kg,Dry Food,Black Hawk,3kg,Healthy Development,Puppy,Chicken,No,Premium,No,Sale,True,Medium,,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144663,142983,141740,6651,114.53636263636,125.99,101.80908990909,111.99,[],Black Hawk Chicken & Rice Medium Puppy Food 10kg,Dry Food,Black Hawk,10kg,,Puppy,Chicken,No,Premium,No,Sale,True,Medium,,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144663,142959,141741,6780,169.99090809091,186.99,135.99090809091,149.59,[],Black Hawk Chicken & Rice Medium Puppy Food 20kg,Dry Food,Black Hawk,20kg,,Puppy,Chicken,No,Premium,No,Sale,True,Medium,,20% off,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144600,144351,141490,7815,36.354544454545,39.99,30.899999,33.99,[],ProBalance Care Joint Care Adult Dog Dry Food 2.5kg,Dry Food,ProBalance,2.5kg,Hip and Joint Support,Adult,,No,Superior,No,,False,,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144600,144009,141491,17823,117.26363536364,128.99,93.809089909091,103.19,[],ProBalance Care Joint Care Adult Dog Dry Food 12kg,Dry Food,ProBalance,12kg,Hip and Joint Support,Adult,,No,Superior,No,,False,,,20% off,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,113391,63624,30318,6687,173.62727172727,190.99,115.03636263636,126.54,[],Royal Canin Maxi Puppy Dog Food 15kg,Dry Food,ROYAL CANIN,15kg,Growth Support,Puppy,Chicken,No,Superior,No,,False,"Giant, Large",,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,113391,63111,29321,84219,64.536362636364,70.99,51.809089909091,56.99,[],Royal Canin Maxi Puppy Dog Food 4kg,Dry Food,ROYAL CANIN,4kg,Growth Support,Puppy,Chicken,No,Superior,No,,False,Large,,Buy 2 for $106.49,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,113391,123852,140248,8373,20.899999,22.99,17.763635363636,19.54,[],Royal Canin Maxi Breed Puppy Food 1kg,Dry Food,ROYAL CANIN,1kg,Healthy Development,Puppy,Chicken,No,Superior,No,,False,Large,,Buy 2 for $34.49,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,145119,143145,140840,7815,45.445453545455,49.99,45.445453545455,49.99,[],SavourLife Grain Free Sensitive OFish Adult Dog Food 2.5kg,Dry Food,SavourLife,2.5kg,"Grain Free, Sensitive Skin, Sensitive Stomach",Adult,Fish,No,Superior,No,Sale,False,All,New,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,145119,143148,140841,6651,113.62727172727,124.99,113.62727172727,124.99,[],SavourLife Grain Free Sensitive OFish Adult Dog Food 10kg,Dry Food,SavourLife,10kg,"Grain Free, Sensitive Skin, Sensitive Stomach",Adult,Fish,No,Superior,No,Sale,False,All,New,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144657,142974,141735,6651,114.53636263636,125.99,99.990908090909,109.99,[],Black Hawk Lamb & Rice Large Puppy Food 10kg,Dry Food,Black Hawk,10kg,Healthy Development,Puppy,Lamb,No,Premium,No,Sale,True,Large,,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144657,142953,141736,6780,169.99090809091,186.99,135.99090809091,149.59,[],Black Hawk Lamb & Rice Large Puppy Food 20kg,Dry Food,Black Hawk,20kg,,Puppy,Lamb,No,Premium,No,Sale,True,Large,,20% off,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,112734,62448,29328,6234,49.081817181818,53.99,36.118180818182,39.73,[],Royal Canin Labrador Dog Food 3kg,Dry Food,ROYAL CANIN,3kg,,"Adult, Senior",Chicken,No,Superior,No,,False,Labrador Retriever,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,112734,62985,30327,17823,148.17272627273,162.99,92.718180818182,101.99,[],Royal Canin Labrador Dog Food 12kg,Dry Food,ROYAL CANIN,12kg,,"Adult, Senior",Chicken,No,Superior,No,,False,Labrador Retriever,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,172479,128169,140460,8514,45.445453545455,49.99,40.899999,44.99,[],Acana Light & Fit Dry Dog Food 2kg,Dry Food,Acana,2kg,Weight Management,Adult,Chicken,No,Superior,No,,False,,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,172479,128175,140469,292890,159.08181718182,174.99,149.99090809091,164.99,[],Acana Light & Fit Dry Dog Food 11.3kg,Dry Food,Acana,11.3kg,Weight Management,Adult,Chicken,No,Superior,No,,False,,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,145083,143316,140842,7815,43.627271727273,47.99,43.627271727273,47.99,[],SavourLife Grain Free Chicken 7+ Senior Dog Food 2.5kg,Dry Food,SavourLife,2.5kg,"Ageing, Grain Free",Senior,Chicken,No,Superior,No,Sale,False,All,New,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,145083,143322,140843,6651,109.08181718182,119.99,109.08181718182,119.99,[],SavourLife Grain Free Chicken 7+ Senior Dog Food 10kg,Dry Food,SavourLife,10kg,"Ageing, Grain Free",Senior,Chicken,No,Superior,No,Sale,False,All,New,,,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144618,144387,141488,7815,38.172726272727,41.99,32.445453545455,35.69,[],ProBalance Care Sensitive Skin Adult Dog Dry Food 2.5kg,Dry Food,ProBalance,2.5kg,Sensitive Skin,Adult,,No,Superior,No,,False,,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,144618,144021,141489,17823,118.17272627273,129.99,94.536362636364,103.99,[],ProBalance Care Sensitive Skin Adult Dog Dry Food 12kg,Dry Food,ProBalance,12kg,Sensitive Skin,Adult,,No,Superior,No,,False,,,20% off,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,172476,128235,140423,8514,54.536362636364,59.99,51.809089909091,56.99,[],Orijen Six Fish Dry Dog Food 2kg,Dry Food,Orijen,2kg,Grain Free,Adult,Fish,No,Superior,No,,False,,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,172476,128214,140431,2825802,190.899999,209.99,181.80908990909,199.99,[],Orijen Six Fish Dry Dog Food 11.4kg,Dry Food,Orijen,11.4kg,Grain Free,Adult,Fish,No,Superior,No,,False,,,,Great low price,
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,115260,109329,127959,6780,156.35454445455,171.99,119.99090809091,131.99,[],Black Hawk Lamb And Rice Adult Dog Food - 20kg,Dry Food,Black Hawk,20kg,,Adult,Lamb,No,Premium,No,Sale,True,,,,,High Activity
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,115260,109323,127955,6234,46.354544454545,50.99,28.172726272727,30.99,[],Black Hawk Lamb And Rice Adult Dog Food - 3kg,Dry Food,Black Hawk,3kg,,Adult,Lamb,No,Premium,No,Sale,True,,,3kg bags for $30.99,,High Activity
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,115260,109317,501601,6651,110.899999,121.99,93.627271727273,102.99,[],Black Hawk Lamb & Rice Adult Dog Food 10kg,Dry Food,Black Hawk,10kg,,Adult,Lamb,No,Premium,No,Sale,True,,,,,High Activity
11/21/2022, 14:34:56,Advance Oodles Puppy Food 2.5kg,40.77,$47.99,100%,1,143138,,37.06,Puppy,,,,,,,,,,,,,,,,,,,,,,,,,,
每次我运行代码时,label_options列总是具有相同的值这是我猜测的最后一个
你确定他们不是碰巧都有相同的一组选项吗?它不应该重复,因为你在循环中清除了labels = []
的列表-尽管你可以在的for lab...
循环之外附加label_options
:
for lab in label_option_list:
labels.append(lab["label"])
d["label_options"] = labels
而不是为label_option_list
中的每个标签更新它。
无论如何,我认为你的主要问题是你想要为每个变体单独行,我可以在那里帮助一点。
谁能帮我找出获得预期输出的最佳方法?
我不知道这是否是最好的方式,但这是我如何从脚本中的JSON中获得最大数量的信息:
对于这个例子,我从你的问题中的html片段(到一个变量pasted_jstr
)中复制了script
标签内的JSON,以及你的一些输出,用
d
和dataJC
dataj = json.loads(pasted_jstr)
dataJC = dataj['[data-role=swatch-option-111105]']['Magento_Swatches/js/swatch-renderer']['jsonConfig']
# from io import StringIO
csvStr = '''
Scraped_Date,Scraped_Time,product_name,price,old_price,ratings,number_of_reviews,productSKU,productSize,priceWithoutTax,lifeStage,label_options
11/21/2022, 00:31:47,Black Hawk Fish And Potato Adult Dog Food,189.99,N/A,N/A,N/A,black-hawk-fish-&-potato-adult-dog-food,,172.72,Adult,"['10kg', '20kg', '3kg']"
'''.split('n')
d = dict(zip(csvStr[1].split(',')[:-1], csvStr[2].split(',')))
和
productId = dataJC['productId']
jcInfs = [{
'productId': productId, 'optionKey': k, 'sku': '?',
'index': v['1299'] if '1299' in v else None
} for k, v in dataJC['index'].items()]
orInfs = [
('optionPrices', 'amount', 'reverseNest'),
('dynamic', 'value', 'nest1'),
('labels', '', 'reverseNest'),
('hasEndDate', '', 'noNesting')
]
relevInfs = []
for kk, vk, nt in orInfs:
if kk not in dataJC: continue
if nt == 'noNesting':
relevInfs += [(kk, vk, dataJC[kk])]
continue
if nt == 'nest1':
relevInfs += [(kk, vk, vd) for kk, vd in dataJC[kk].items()]
continue
if nt != 'reverseNest':
## can put a default action here
continue
## nt == 'reverseNest'
orInf = {}
for pk, po in dataJC[kk].items():
for kpo, vpo in po.items():
if kpo not in orInf: orInf[kpo] = {}
orInf[kpo][pk] = vpo
relevInfs += [(kk, vk, vi) for kk, vi in orInf.items()]
for i, j in enumerate(jcInfs):
for kk, vk, vd in relevInfs:
if j['optionKey'] not in vd: continue
relevInf = vd[j['optionKey']]
if type(relevInf) != dict: j[kk] = relevInf
elif vk in relevInf and relevInf[vk]: j[kk] = relevInf[vk]
# combine with main variation
jcInfs[i] = {k:v for k, v in (list(d.items()) + [
(jk, jv) for jk, jv in j.items() if jk not in d
])}
你可以在循环之外的某个地方设置orInfs
,并添加任何键[你想从json中获取信息],你也可以在for kk, vk, nt in orInfs
循环中添加新格式/nt
("嵌套类型")的动作。(你也可以把循环分离到它自己的函数中…)
我可以用print(pd.DataFrame(jcInfs).to_markdown(index=False))
| Scraped_Date | Scraped_Time | product_name | price | old_price | ratings | number_of_reviews | productSKU | productSize | priceWithoutTax | lifeStage | productId | optionKey | sku | index | baseOldPrice | oldPrice | basePrice | finalPrice | tierPrices | name | product_category | brand_filter | size | health_benefits | life_stage | flavour | australia_made | nutrition_grade | weight_control | sales_flag_label | hasEndDate |
|:---------------|:---------------|:------------------------------------------|--------:|:------------|:----------|:--------------------|:----------------------------------------|:--------------|------------------:|:------------|------------:|------------:|-------:|--------:|---------------:|-----------:|------------:|-------------:|:-------------|:-------------------------------------------------|:-------------------|:---------------|:-------|:------------------|:-------------|:----------|:-----------------|:------------------|:-----------------|:-------------------|:-------------|
| 11/21/2022 | 00:31:47 | Black Hawk Fish And Potato Adult Dog Food | 189.99 | N/A | N/A | N/A | black-hawk-fish-&-potato-adult-dog-food | | 172.72 | Adult | 111105 | 108987 | 127956 | 6234 | 49.9909 | 54.99 | 42.7182 | 46.99 | [] | Black Hawk Fish And Potato Adult Dog Food - 3kg | Dry Food | Black Hawk | 3kg | Total Wellbeing | Adult | Fish | No | Premium | No | Great low price | False |
| 11/21/2022 | 00:31:47 | Black Hawk Fish And Potato Adult Dog Food | 189.99 | N/A | N/A | N/A | black-hawk-fish-&-potato-adult-dog-food | | 172.72 | Adult | 111105 | 108981 | 127960 | 6780 | 172.718 | 189.99 | 128.173 | 140.99 | [] | Black Hawk Fish And Potato Adult Dog Food - 20kg | Dry Food | Black Hawk | 20kg | Total Wellbeing | Adult | Fish | No | Premium | No | Great low price | False |
| 11/21/2022 | 00:31:47 | Black Hawk Fish And Potato Adult Dog Food | 189.99 | N/A | N/A | N/A | black-hawk-fish-&-potato-adult-dog-food | | 172.72 | Adult | 111105 | 116724 | 501600 | 6651 | 120.9 | 132.99 | 102.718 | 112.99 | [] | Black Hawk Fish & Potato Dog Food 10kg | Dry Food | Black Hawk | 10kg | Total Wellbeing | Adult | Fish | No | Premium | No | Great low price | False |