使用Python请求(eBay)在网站上使用搜索功能



我正在尝试使用请求库来创建一个Python程序,该请求库搜索eBay以查找其输入的项目。与其硬编码URL,不如使用请求库执行eBay搜索(或任何网站上的搜索(?

我相信您想要的是在搜索元素中输入文本。根据Realpython:

请求库是在Python中提出HTTP请求的事实上的标准。

我建议使用硒来控制网站的源代码,例如在元素中输入文本,然后按下网站上的按钮。

但是,如果您仍然想使用请求,请尝试找到其API端点,该端点可以处理搜索零件并使用Post方法从中获取数据。

resp = requests.post(url)

i创建和eBay开发人员帐户以访问API,然后编写一个小脚本以搜索eBay以获取项目上的历史定价。保存一个呼叫是search.py,然后将其称为:

./search.py"您正在寻找的eBay项目"

您可以将ItemFilter更改为您的喜好,目前是自2019年10月10日以来的Solditems设置的。完整列表在这里:https://developer.ebay.com/devzone/finding/callref/types/itemfiltertype.html

底部的注释显示了从eBay返回的完整字段,您可以选择自己喜欢的字段并将其添加到打印语句中。

此外,此脚本将比项目的第一页返回,并且每一页都会花费您当天的5,000个开发人员查询中的一个。无论我尝试什么,我都无法让它与沙箱一起使用。我相信eBay沙盒被打破了。

#!/usr/local/bin/python3
from ebaysdk.finding import Connection
import sys
DEBUG = False
#search_keywords = "2019 Hot Wheels Dumbo" 
search_keywords =  sys.argv[1]
print ("Search Keywords: " + search_keywords)

# Function accepts keywords for query and pageNumber of search to pull
# Ebay will only return 100 items per search
def build_request( keywords, pageNumber):
   # Create a request structure
   # Item Filter List https://developer.ebay.com/devzone/finding/callref/types/ItemFilterType.html
   request = {
        'keywords': keywords,
        'itemFilter': [
            {'name': 'condition', 'value': 'new' ,
             'name': 'SoldItemsOnly', 'value': True ,
             'name': 'EndTimeFrom',   'value': '2019-10-10T00:00:00.000Z' }
        ],
        'paginationInput': {
            'entriesPerPage': 100, # EBay limits API Calls to 100 items per page
            'pageNumber': pageNumber
        },
        'sortOrder': 'PricePlusShippingLowest',
   }
   return (request)
# Connect using yaml file to EBAY-US production site
# put in __main__  just in case we turn this into a module later
if __name__ == '__main__':
    api = Connection(config_file='ebay.yaml', debug=False, siteid="EBAY-US")
    #api = Connection(config_file='ebay.yaml', debug=False, domain="api.sandbox.ebay.com", siteid="EBAY-US")
# Run the request
query=build_request(search_keywords, 1)
query['paginationInput']['pageNumber'] = 1
response = api.execute('findCompletedItems', query)
if DEBUG:
    print (response.dict())   #Use this to see the dictionary structure
# Display how many entries and results are returned
print("API Call: findCompletedItems")
print("----------------------------")
print(f"totalEntries: {response.reply.paginationOutput.totalEntries}, totalPages: {response.reply.paginationOutput.totalPages}")
maxpage = int(str(response.reply.paginationOutput.totalPages)) + 0

# Display item information fields from the request, see below for all possible fields
for item in response.reply.searchResult.item:
    print(f"Date: {item.listingInfo.endTime} Title: {item.title}, Price: {item.sellingStatus.currentPrice.value} Shipping: {item.shippingInfo.shippingServiceCost.value}")
# Now run the request for each page and change the page in the request each time
for page in range (2,maxpage):
    print ("**** PAGE: "+str(page) +" of "+ str(maxpage)+ " ****")
    # Rebuild the Request and Update the Page Number
    # Run the request 
    query['paginationInput']['pageNumber'] = page 
    response = api.execute('findCompletedItems', query)
    # Display item information fields from the request, see below for all possible fields
    for item in response.reply.searchResult.item:
        print(f"Date: {item.listingInfo.endTime} Title: {item.title}, Price: {item.sellingStatus.currentPrice.value} Shipping: {item.shippingInfo.shippingServiceCost.value}")

#{'ack': 'Success', 'version': '1.13.0', 'timestamp': '2019-10-16T01:28:25.891Z', 
#
#searchResult': {'item': [{'itemId': '123719989207', 'title': '2019 HOT WHEELS 2 SET CORVETTE STINGRAY SUPER CHROMES 5/5 TREASURE HUNT PAIR', 'globalId': 'EBAY-US', 'primaryCategory': {'categoryId': '180506', 'categoryName': 'Contemporary Manufacture'}, 'galleryURL': 'https://thumbs4.ebaystatic.com/m/mFuyRQgYjSutGli33dqsqcA/140.jpg', 'viewItemURL': 'https://www.ebay.com/itm/2019-HOT-WHEELS-2-SET-CORVETTE-STINGRAY-SUPER-CHROMES-5-5-TREASURE-HUNT-PAIR-/123719989207', 'paymentMethod': 'PayPal', 'autoPay': 'false', 'postalCode': '54650', 'location': 'Onalaska,WI,USA', 'country': 'US', 'shippingInfo': {'shippingServiceCost': {'_currencyId': 'USD', 'value': '6.0'}, 'shippingType': 'Flat', 'shipToLocations': 'Worldwide', 'expeditedShipping': 'false', 'oneDayShippingAvailable': 'false', 'handlingTime': '2'}, 'sellingStatus': {'currentPrice': {'_currencyId': 'USD', 'value': '9.0'}, 'convertedCurrentPrice': {'_currencyId': 'USD', 'value': '9.0'}, 'sellingState': 'Ended'}, 'listingInfo': {'bestOfferEnabled': 'false', 'buyItNowAvailable': 'false', 'startTime': '2019-04-02T22:14:03.000Z', 'endTime': '2019-10-02T18:44:49.000Z', 'listingType': 'StoreInventory', 'gift': 'false', 'watchCount': '2'}, 'returnsAccepted': 'false', 'condition': {'conditionId': '1000', 'conditionDisplayName': 'New'}, 'isMultiVariationListing': 'false', 'topRatedListing': 'false'}, 
#
#
#{'itemId': '153679182310', 'title': "Hot Wheels 2019 Super Treasure Hunt '68 Mercury Cougar Loose 1/64 STH Green", 'globalId': 'EBAY-US', 'primaryCategory': {'categoryId': '73252', 'categoryName': 'Collections & Lots'}, 'galleryURL': 'https://thumbs3.ebaystatic.com/m/mEN9EsbCJY0wb6WzXjO8hNg/140.jpg', 'viewItemURL': 'https://www.ebay.com/itm/Hot-Wheels-2019-Super-Treasure-Hunt-68-Mercury-Cougar-Loose-1-64-STH-Green-/153679182310', 'paymentMethod': 'PayPal', 'autoPay': 'false', 'location': 'Malaysia', 'country': 'MY', 'shippingInfo': {'shippingServiceCost': {'_currencyId': 'USD', 'value': '9.0'}, 'shippingType': 'Flat', 'shipToLocations': 'Worldwide', 'expeditedShipping': 'false', 'oneDayShippingAvailable': 'false', 'handlingTime': '15'}, 'sellingStatus': {'currentPrice': {'_currencyId': 'USD', 'value': '9.9'}, 'convertedCurrentPrice': {'_currencyId': 'USD', 'value': '9.9'}, 'bidCount': '1', 'sellingState': 'Ended'}, 'listingInfo': {'bestOfferEnabled': 'false', 'buyItNowAvailable': 'false', 'startTime': '2019-10-10T04:13:32.000Z', 'endTime': '2019-10-15T04:13:32.000Z', 'listingType': 'Auction', 'gift': 'false', 'watchCount': '1'}, 'returnsAccepted': 'false', 'condition': {'conditionId': '3000', 'conditionDisplayName': 'Used'}, 'isMultiVariationListing': 'false', 'topRatedListing': 'false'}], 
#
#'_count': '100'}, 'paginationOutput': {'pageNumber': '3', 'entriesPerPage': '100', 'totalPages': '40', 'totalEntries': '3966'}}

您可以使用BeautifulSoup Web刮擦库来刮擦eBay。

为了不输入请求的完整URL,您可以设置params,其中将指示必要的请求参数,并且问题本身的输入以进行搜索:

query = input('Your query is: ')
params = {
    '_nkw': query,           # search query  
    '_pgn': 1                # page number
    #'LH_Sold': '1'          # shows sold items
}

如果使用requests库,则可以将请求阻止,因为requests库中的默认user-agentpython-requests,因此网站了解这是发送请求的机器人或脚本。检查您的用户代理是什么。

除了提供浏览器用户代理外,还可以旋转user-agent,例如,在PC,移动和平板电脑之间进行切换,以及在浏览器之间进行切换。铬,firefox,野生动物园,边缘等。

在线IDE中检查代码。

from bs4 import BeautifulSoup
import requests, json, lxml
# https://requests.readthedocs.io/en/latest/user/quickstart/#custom-headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
}
query = input('Your query is: ')
params = {
    '_nkw': query,           # search query  
    '_pgn': 1                # page number
    #'LH_Sold': '1'          # shows sold items
}
data = []
while True:
    page = requests.get('https://www.ebay.com/sch/i.html', params=params, headers=headers, timeout=30)
    soup = BeautifulSoup(page.text, 'lxml')
    
    print(f"Extracting page: {params['_pgn']}")
    print("-" * 10)
    
    for products in soup.select(".s-item__info"):
        title = products.select_one(".s-item__title span").text
        price = products.select_one(".s-item__price").text
        link = products.select_one(".s-item__link")["href"]
        
        data.append({
          "title" : title,
          "price" : price,
          "link" : link
        })
    if soup.select_one(".pagination__next"):
        params['_pgn'] += 1
    else:
        break
    print(json.dumps(data, indent=2, ensure_ascii=False))

示例输出:

Your query is: shirt    # query entry example
Extracting page: 1
----------
[
  {
    "title": "Men's Polo Shirt 100% Cotton Knockout Jeans NVY WHT 220 Stripe MEDIUM Free Ship",
    "price": "$11.99",
    "link": "https://www.ebay.com/itm/133992813518?hash=item1f329813ce:g:tWMAAOSwXBxhTP7Q&amdata=enc%3AAQAHAAAAwJ9%2BDbqKGCoZye6JelYY1tJHQWotUalKHQJ%2FixwyplnvOC60SofXkLVsNgRfoX09uOZLerjkBtwcW%2FQQa1wmJ6%2BYVEEagzH1GAK6Bx4rX%2BRNnj9g6SlvB2WagWETpbmrLdiFHGTIRvAL2EvfXDRqPFnEGWZ2nk%2BM0zEkiGzp%2F4ADUbPslGui3zTDJsIgVpXjAHzL2EUH3s7tiOxtd3qVTXxaE095evq5YrBgkJFJu4KB5o%2F%2BCiCURfy7xR%2FbTU7mnQ%3D%3D%7Ctkp%3ABlBMUJavlrOEYQ"
  },
  {
    "title": "5 Pack Oroblu Micromodal Perfect Line Round Neck Short Sleeve T-Shirt",
    "price": "$192.00",
    "link": "https://www.ebay.com/itm/275287531865?hash=item40186a6159:g:OtUAAOSweKFiZr2S&amdata=enc%3AAQAHAAAAsMRLg1VeYAIKHTiXXdD8xv56DpaeH6jc3EhFP26RJ66bqmlzXHQrMMxuo78x6S2i8DfxvuzjbXrpmYYdyRLhzgQCoaauMNvRwVNuhx11qorNlPoHrig%2BdIGG2RB4xHmXdB2fjOciLCsdYkL23jaH23ehXakQu%2BrBzER%2F2v94Sdg%2BkchjwWmRidsv0kPfLRcpiy%2BOeDBHEas4i9EQY%2F0VAzLGj2U%2FwLdcqjqSjgngj%2BRr%7Ctkp%3ABlBMUJavlrOEYQ"
  },
  # ...
]

作为替代方案,您可以使用serpapi的eBay有机结果API。这是一个带有免费计划的付费API,可以在其后端处理块和解析。

示例代码在所有页面上都带有输入查询:

from serpapi import EbaySearch
import os, json
query = input('Your query is: ')
params = {
    "api_key": os.getenv("API_KEY"),  # serpapi api key    
    "engine": "ebay",                 # search engine
    "ebay_domain": "ebay.com",        # ebay domain
    "_nkw": query,                    # search query
    "_pgn": 1                         # page number                    
    #"LH_Sold": "1"                   # shows sold items
}
search = EbaySearch(params)           # where data extraction happens
page_num = 0
data = []
while True:
    results = search.get_dict()     # JSON -> Python dict
    if "error" in results:
        print(results["error"])
        break
    
    for organic_result in results.get("organic_results", []):
        link = organic_result.get("link")
        price = organic_result.get("price")
        data.append({
          "price" : price,
          "link" : link
        })
                    
    page_num += 1
    print(page_num)
    
    if "next" in results.get("pagination", {}):
        params['_pgn'] += 1
    else:
        break
    print(json.dumps(data, indent=2))

输出:

[
   {
    "price": {
      "raw": "$25.99",
      "extracted": 25.99
    },
    "link": "https://www.ebay.com/itm/285018595898?hash=item425c6ea23a:g:mT0AAOSwBjljAFsl&amdata=enc%3AAQAHAAAAkI1P1C%2BE2boIutliCMWXCADm%2BXyUp2a6Q1qOjpifaAIo6%2FWD0yHCd8Mejyfc2jc%2BQ5zzVcITrcWM0XxIfiSUILMZFsMewB154skl5re5%2FS8W9kRrabjRdy%2BoC6aQoS%2FWGq%2F6A%2BZWQ1GQkcd5Tstamu%2FgzZKoL6VYfO4YpC4oO4Im23h0wiIfI0%2BxPG8uuFRMPw%3D%3D%7Ctkp%3ABk9SR_i1vbKEYQ"
  },
  {
    "price": {
      "raw": "$14.16",
      "extracted": 14.16
    },
    "link": "https://www.ebay.com/itm/234347615312?hash=item369034d450:g:hvYAAOSwNspg0TAH&amdata=enc%3AAQAHAAAA0B1m3DPC4q0R4AQp6MO8rXnKt6qFIX2p%2BaypmySYXkIvi6XE3FHzpbtN%2B%2Bvd9P3TZPYu3fuQVl5kH0ZYDO5eqtnjh1EcZ%2Fb9rZMlMx6r6RcH%2B5wOY7X65bvRcmQ7OUmoaNGAMOZpOc4hg8vHj2afxCa%2FR7F3jDr1KjnHk%2BKnln3opoiqAVMFIoXv338f70KZw8CDd%2Fg9xU0jQlzgxDpDwSL6Y6OMz0oKxh4T%2BRUMKHj03VE5E9%2B8VKzPUMWAQ%2BZWuZyGMpWxwzn%2BomggywV5RhI%3D%7Ctkp%3ABk9SR_i1vbKEYQ"
  },
  # ...
]

最新更新