为什么 scrapy 不调用函数?



为什么不工作elseparse_cat? 我需要返回一步,如果search_prod变量为空。一般来说,这是可能的吗?我错在哪里?

def parse_sub_cat(self, response):
for i in response.xpath('//a[@data-track-action="CategoryOverviewNavigation"]'):
goods_url = response.urljoin(i.xpath("./@href").get())
product_parent = i.xpath("./text()").get()
yield response.follow(
url=goods_url,
callback=self.parse_cat,
cb_kwargs=dict(product_parent=product_parent)
)
def parse_cat(self, response, **cb_kwargs):
search_prod = response.css('article.m-grid-tile').css('div.m-grid-tile--image')
if search_prod is not None:
for i in search_prod:
ulr_product = response.urljoin(i.css('a::attr("href")').get())
cb_kwargs['model'] = i.css('a::attr("aria-label")').get()
yield response.follow(
url=ulr_product,
callback=self.parse_product,
cb_kwargs=cb_kwargs
)
else:
yield scrapy.Request(
url=response.url,
callback=self.parse_sub_cat,
)

你必须把你的scrapy.Request变成yield。试着

def parse_page(self, response, **cb_kwargs):
show_all = response.css('a.button.btn-small::attr("href")').get()
if show_all is not None:
yield scrapy.Request(url=show_all, callback=self.parse_page)
prod_list = response.css('ol.products.list.items.product-items')
for url in prod_list.css('a.product.photo.product-item-photo::attr("href")').getall():
yield response.follow(
url=response.urljoin(url),
callback=self.parse_product,
cb_kwargs=cb_kwargs
)
next_page = response.xpath('//a[@title="Next"]/@href').get()
if next_page is not None:
yield scrapy.Request(url=next_page, callback=self.parse_page)

最新更新