我正在尝试在这里抓取百科全书。当我使用原始代码时,它工作并返回json。然而,当我试图使用它作为一个变量,它读取超时。网站:https://www.tokopedia.com/samudrasembako/regal -玛丽-辊- 230 - gram?extparam=ivf%3dfalse& src = topads下面是代码
!pip install fake_useragent
!pip install httpx
import requests
from fake_useragent import UserAgent
import httpx
tokopedia=['https://www.tokopedia.com/samudrasembako/regal-marie-roll-230-gram?extParam=ivf%3Dfalse&src=topads']
for url in tokopedia:
ua = UserAgent().random
product_key=url.split(".com")[1].split("/")[2].split("?")[0]
shopdomain=url.split(".com")[1].split("/")[1].split("?")[0]
payload={
"operationName":"PDPGetLayoutQuery",
"variables":
{"shopDomain":f"{shopdomain}",
"productKey":f"{product_key}",
"layoutID":"",
"apiVersion":1,
"userLocation":
{"cityID":"176",
"addressID":"0",
"districtID":"2274",
"postalCode":"",
"latlon":""},
"extParam":""},
"query":"fragment ProductVariant on pdpDataProductVariant {n errorCoden parentIDn defaultChildn sizeChartn totalStockFmtn variants {n productVariantIDn variantIDn namen identifiern option {n picture {n urlOriginal: urln urlThumbnail: url100n __typenamen }n productVariantOptionIDn variantUnitValueIDn valuen hexn stockn __typenamen }n __typenamen }n children {n productIDn pricen priceFmtn optionIDn optionNamen productNamen productURLn picture {n urlOriginal: urln urlThumbnail: url100n __typenamen }n stock {n stockn isBuyablen stockWordingHTMLn minimumOrdern maximumOrdern __typenamen }n isCODn isWishlistn campaignInfo {n campaignIDn campaignTypen campaignTypeNamen campaignIdentifiern backgroundn discountPercentagen originalPricen discountPricen stockn stockSoldPercentagen startDaten endDaten endDateUnixn appLinksn isAppsOnlyn isActiven hideGimmickn isCheckImein minOrdern __typenamen }n thematicCampaign {n additionalInfon backgroundn campaignNamen iconn __typenamen }n __typenamen }n __typenamen}nnfragment ProductMedia on pdpDataProductMedia {n media {n typen urlOriginal: URLOriginaln urlThumbnail: URLThumbnailn urlMaxRes: URLMaxResn videoUrl: videoURLAndroidn prefixn suffixn descriptionn variantOptionIDn __typenamen }n videos {n sourcen urln __typenamen }n __typenamen}nnfragment ProductHighlight on pdpDataProductContent {n namen price {n valuen currencyn __typenamen }n campaign {n campaignIDn campaignTypen campaignTypeNamen campaignIdentifiern backgroundn percentageAmountn originalPricen discountedPricen originalStockn stockn stockSoldPercentagen thresholdn startDaten endDaten endDateUnixn appLinksn isAppsOnlyn isActiven hideGimmickn __typenamen }n thematicCampaign {n additionalInfon backgroundn campaignNamen iconn __typenamen }n stock {n useStockn valuen stockWordingn __typenamen }n variant {n isVariantn parentIDn __typenamen }n wholesale {n minQtyn price {n valuen currencyn __typenamen }n __typenamen }n isCashback {n percentagen __typenamen }n isTradeInn isOSn isPowerMerchantn isWishlistn isCODn isFreeOngkir {n isActiven __typenamen }n preorder {n durationn timeUnitn isActiven preorderInDaysn __typenamen }n __typenamen}nnfragment ProductCustomInfo on pdpDataCustomInfo {n iconn titlen isApplinkn applinkn separatorn descriptionn __typenamen}nnfragment ProductInfo on pdpDataProductInfo {n rown content {n titlen subtitlen applinkn __typenamen }n __typenamen}nnfragment ProductDetail on pdpDataProductDetail {n content {n titlen subtitlen applinkn showAtFrontn isAnnotationn __typenamen }n __typenamen}nnfragment ProductDataInfo on pdpDataInfo {n iconn titlen isApplinkn applinkn content {n iconn textn __typenamen }n __typenamen}nnfragment ProductSocial on pdpDataSocialProof {n rown content {n iconn titlen subtitlen applinkn typen ratingn __typenamen }n __typenamen}nnquery PDPGetLayoutQuery($shopDomain: String, $productKey: String, $layoutID: String, $apiVersion: Float, $userLocation: pdpUserLocation, $extParam: String) {n pdpGetLayout(shopDomain: $shopDomain, productKey: $productKey, layoutID: $layoutID, apiVersion: $apiVersion, userLocation: $userLocation, extParam: $extParam) {n requestIDn namen pdpSessionn basicInfo {n aliasn createdAtn isQAn id: productIDn shopIDn shopNamen minOrdern maxOrdern weightn weightUnitn conditionn statusn urln needPrescriptionn catalogIDn isLeasingn isBlacklistedn menu {n idn namen urln __typenamen }n category {n idn namen titlen breadcrumbURLn isAdultn isKycn minAgen detail {n idn namen breadcrumbURLn isAdultn __typenamen }n __typenamen }n txStats {n transactionSuccessn transactionRejectn countSoldn paymentVerifiedn itemSoldFmtn __typenamen }n stats {n countViewn countReviewn countTalkn ratingn __typenamen }n __typenamen }n components {n namen typen positionn data {n ...ProductMedian ...ProductHighlightn ...ProductInfon ...ProductDetailn ...ProductSocialn ...ProductDataInfon ...ProductCustomInfon ...ProductVariantn __typenamen }n __typenamen }n __typenamen }n}n"
}
headers={
'origin': 'https://www.tokopedia.com',
'referer': f'{url}',
'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': "Windows",
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'x-device': 'desktop',
'x-source': 'tokopedia-lite',
'x-tkpd-akamai': 'pdpGetLayout',
'x-tkpd-lite-service': 'zeus',
'x-version': '53ac990'
}
client= httpx.Client()
resp=client.post("https://gql.tokopedia.com/graphql/PDPGetLayoutQuery",json=payload,headers=headers)
有人能帮帮我吗?因此,上面的代码运行并返回json.
https://www.python-httpx.org/advanced/#timeout-configuration
client = httpx.Client(timeout=None) #默认禁用所有超时