我在scrapy中使用post方法但是他们给了我这些错误TypeError: __init__() got an unexpected keyword argument 'data'
是否有任何解决方案基本上我正在尝试从表中抓取数据这些是我的页面链接https://www.benrishi-navi.com/english/english1_2.php
import scrapy
from scrapy.http import Request
from scrapy.selector import Selector
class TestSpider(scrapy.Spider):
name = 'test'
url = "https://www.benrishi-navi.com/english/english1_2.php"
payload='tuusan_year=&tuusan_month=&tuusan_chk=&methodAndOr1=&methodAndOr2=&methodAndOr3=&text_sen=&text_skill=&text_business=&tokkyo_data=&fuki_day_chk=&shuju=&kensyuu_bunya=&text_kensyuu=&methodAndOr_kensyuu=&keitai_kikan=&keitai_hisu=&display_flag=1&search=2&text=&method=&methodAndOr=&area=&pref=&name=&kana=&id=&year=&month=&day=&day_chk=&exp01=&exp02=&exp03=&trip=&venture_support=&venture_flag=&university_support=&university_flag=&university1=&university2=&university=&college=&high_pref=&junior_pref=&elementary_pref=&tyosaku=&hp=&jukoureki=&experience1=&experience2=&experience3=&experience4=&sort=&fuki_year=&fuki_month=&fuki_day=&fuki_day_chk=&id_chk=&shugyou=&fuki=&address1=&address2=&trip_pref=&expref=&office=&max_count=1437&search_count=10&start_count=1&search_default=10'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'CAKEPHP=u6u40lefkqnm45j49a5i0h6bs3',
'Origin': 'https://www.benrishi-navi.com',
'Referer': 'https://www.benrishi-navi.com/english/english1_2.php',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
def start_requests(self):
yield scrapy.FormRequest(
url=self.url,
method='POST',
data=self.payload,
headers=self.headers,
callback=self.parse,
)
def parse(self,response):
t=response.xpath('table')
print(t)
-
您必须使用FormRequest.from_response代替FormRequest
使用 formdata作为参数而不是data
使用formdata/payload作为键值对,含义为字典
避免注入太多不必要的头文件
全部工作代码为例:
import scrapy
from scrapy import FormRequest
from scrapy.crawler import CrawlerProcess
class TestSpider(scrapy.Spider):
name = 'tes'
start_urls = ['https://www.benrishi-navi.com/english/english1_2.php']
def parse(self, response):
headers = {
'Content-Type': 'application/x-www-form-urlencoded'
}
formdata = {
'tuusan_year':'',
'tuusan_month':'',
'tuusan_chk':'' ,
'methodAndOr1':'' ,
'methodAndOr2':'' ,
'methodAndOr3':'',
'text_sen':'' ,
'text_skill':'' ,
'text_business':'',
'tokkyo_data':'' ,
'fuki_day_chk':'',
'shuju':'',
'kensyuu_bunya':'',
'text_kensyuu':'',
'methodAndOr_kensyuu':'',
'keitai_kikan':'',
'keitai_hisu':'',
'display_flag':'1',
'search':'2',
'text':'',
'method':'',
'methodAndOr':'',
'area':'',
'pref':'',
'name':'',
'kana':'',
'id':'',
'year':'',
'month':'',
'day':'',
'day_chk':'',
'exp01':'',
'exp02':'',
'exp03':'',
'trip':'',
'venture_support':'',
'venture_flag':'',
'university_support':'',
'university_flag':'',
'university1':'',
'university2':'',
'university':'',
'college':'',
'high_pref':'',
'junior_pref':'',
'elementary_pref':'',
'tyosaku':'',
'hp':'',
'jukoureki':'',
'experience1':'',
'experience2':'',
'experience3':'',
'experience4':'',
'sort':'',
'fuki_year':'',
'fuki_month':'',
'fuki_day':'',
'fuki_day_chk':'',
'id_chk':'',
'shugyou':'',
'fuki':'',
'address1':'',
'address2':'',
'trip_pref':'',
'expref':'',
'office':'',
'max_count': '1437',
'search_count': '10',
'start_count': '1',
'search_default': '10',
}
yield FormRequest.from_response(response,
formdata=formdata,
headers=headers,
callback=self.parse_item)
def parse_item(self, response):
pass
if __name__ == "__main__":
process =CrawlerProcess(TestSpider)
process.crawl()
process.start()