如何将spider中超出产量的结果存储到全局变量中



我知道如何将结果从yield中取出并保存到csv或json文件中,但如何将其保存到全局变量或可以作为函数参数传递的局部变量中?我是个新手。

这是我的代码:

import scrapy
from scrapy.crawler import CrawlerProcess
global_var = {} # store the data result out of yield in this variable
class TestSpider(scrapy.Spider):
name = 'test'
allowed_domains = ['worldpopulationreview.com']
start_urls = ['https://worldpopulationreview.com/countries/countries-by-national-debt/']
def parse(self, response):
country_data = response.xpath('//tbody/tr')
for data in country_data:
name = data.xpath('.//td[1]/a/text()').get()
debt = data.xpath('.//td[2]/text()').get()
population = data.xpath('.//td[3]/text()').get()
link = data.xpath('.//td[1]/a/@href').get()
yield {'country_name': name, 'country_debt': debt, 'country_population': population, 'country_url': response.urljoin(link)}

process = CrawlerProcess(
settings={
"FEEDS": {
"result.json": {"format": "json"},
"result.csv": {"format": "csv"},
},
}
)
process.crawl(TestSpider)
process.start()
from scrapy import Spider

class TestSpider(Spider):
name = 'test'
allowed_domains = ['worldpopulationreview.com']
def parse(self, response):
country_data = response.xpath('//tbody/tr')
for data in country_data:
name = data.xpath('.//td[1]/a/text()').get()
debt = data.xpath('.//td[2]/text()').get()
population = data.xpath('.//td[3]/text()').get()
link = data.xpath('.//td[1]/a/@href').get()
yield self.output_callback({
'country_name': name,
'country_debt': debt,
'country_population': population,
'country_url': response.urljoin(link)
}) # instead of yield item
from scrapy.crawler import CrawlerProcess

class Crawler:
def __init__(self):
self.process = CrawlerProcess()
self.scraped_items = []
def process_item(self, item): # similar to process_item in pipeline
item.update({
'scraped': 'yes'
})
self.scraped_items.append(item)
return item
def spawn(self, **kwargs):
self.process.crawl(crawler_or_spidercls=TestSpider,
output_callback=self.process_item,
**kwargs)
def run(self):
self.process.start()
if __name__ == '__main__':
crawler = Crawler()
crawler.spawn(
start_urls=['https://worldpopulationreview.com/countries/countries-by-national-debt/'])
crawler.run()
print(crawler.scraped_items)

输出

[
{
"country_name": None,
"country_debt": None,
"country_population": None,
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": "United States",
"country_population": "29,463,730",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": "Japan",
"country_population": "13,053,658",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "10,115,837",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "3,329,379",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "3,169,955",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "3,039,338",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "2,968,690",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "2,379,040",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "2,243,918",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "1,690,788",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "1,495,729",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "954,634",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "929,584",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "746,964",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "674,167",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": "Singapore",
"country_population": "650,630",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "649,405",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "530,350",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": None,
"country_population": "488,638",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
},
{
"country_name": None,
"country_debt": "Greece",
"country_population": "431,474",
"country_url": "https://worldpopulationreview.com/country-rankings/countries-by-national-debt",
"scraped": "yes",
}
]

process_item对于处理和存储项目都非常有用

最新更新