TypeError:无法混合str和非str参数


from scrapy import Spider
from scrapy.http import Request

class CourseSpider(Spider):
name = 'course'
allowed_domains = ['coursera.org']
start_urls = ['https://coursera.org/about/partners']
def parse(self, response):
listings = response.xpath('//div[@class="rc-PartnerBox vertical-box"]')
for listing in listings:
title = listing.xpath('.//div[@class="partner-box-wrapper card-one-clicker flex-1"]/p').extract_first()
relative_url = listing.xpath('.//a/@href').extract_first()
absolute_url = response.urljoin(relative_url)
yield Request(response.urljoin(relative_url), callback = self.parse_listing,meta={'title':title,'absolute_url':absolute_url})
def parse_listing(self,response):
titles = response.meta.get('title')
absolute_url = response.meta.get('absolute_url')
titles_course =  response.xpath('//div[@class="name headline-1-text"]/text()').extract()
url_link = response.xpath('//div[@class="rc-Course"]/a/@href').extract()
abs_url = response.urljoin(url_link)
yield {'title':title,
'titles':title,
'absolute_url':absolute_url,
'titles_course':titles_course,
'abs_url':abs_url}

但是,在通过cmd运行脚本时。我犯了错误。这些错误表明我不能混淆str和非str参数,并且我对如何处理这个问题感到困惑。如有任何帮助,我们将不胜感激。

Traceback (most recent call last):
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilsdefer.py", line 117, in iter_errback
yield next(it)
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilspython.py", line 345, in __next__
return next(self.data)
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilspython.py", line 345, in __next__
return next(self.data)
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresoffsite.py", line 29, in process_spider_output
for x in result:
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresreferer.py", line 338, in <genexpr>
return (_set_referer(r) for r in result or ())
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresurllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresdepth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "C:UsersNaman JoganiDesktopUdemyudemyspiderscourse.py", line 28, in parse_listing
yield {'title':title,
NameError: name 'title' is not defined
2020-08-05 00:08:48 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.coursera.org/checkpoint> (referer: https://www.coursera.org/about/partners)
Traceback (most recent call last):
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilsdefer.py", line 117, in iter_errback
yield next(it)
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilspython.py", line 345, in __next__
return next(self.data)
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilspython.py", line 345, in __next__
return next(self.data)
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresoffsite.py", line 29, in process_spider_output
for x in result:
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresreferer.py", line 338, in <genexpr>
return (_set_referer(r) for r in result or ())
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresurllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresdepth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "C:UsersNaman JoganiDesktopUdemyudemyspiderscourse.py", line 26, in parse_listing
abs_url = response.urljoin(url_link)
File "c:usersnaman joganianaconda3libsite-packagesscrapyhttpresponsetext.py", line 80, in urljoin
return urljoin(get_base_url(self), url)
File "c:usersnaman joganianaconda3liburllibparse.py", line 504, in urljoin
base, url, _coerce_result = _coerce_args(base, url)
File "c:usersnaman joganianaconda3liburllibparse.py", line 120, in _coerce_args
raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
2020-08-05 00:08:48 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.coursera.org/casewesternreserve> (referer: https://www.coursera.org/about/partners)
2020-08-05 00:08:48 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.coursera.org/casewesternreserve> (referer: https://www.coursera.org/about/partners)
Traceback (most recent call last):
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilsdefer.py", line 117, in iter_errback
yield next(it)
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilspython.py", line 345, in __next__
return next(self.data)
File "c:usersnaman joganianaconda3libsite-packagesscrapyutilspython.py", line 345, in __next__
return next(self.data)
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresoffsite.py", line 29, in process_spider_output
for x in result:
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresreferer.py", line 338, in <genexpr>
return (_set_referer(r) for r in result or ())
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresurllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "c:usersnaman joganianaconda3libsite-packagesscrapyspidermiddlewaresdepth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:usersnaman joganianaconda3libsite-packagesscrapycorespidermw.py", line 64, in _evaluate_iterable
for r in iterable:
File "C:UsersNaman JoganiDesktopUdemyudemyspiderscourse.py", line 26, in parse_listing
abs_url = response.urljoin(url_link)
File "c:usersnaman joganianaconda3libsite-packagesscrapyhttpresponsetext.py", line 80, in urljoin
return urljoin(get_base_url(self), url)
File "c:usersnaman joganianaconda3liburllibparse.py", line 504, in urljoin
base, url, _coerce_result = _coerce_args(base, url)
File "c:usersnaman joganianaconda3liburllibparse.py", line 120, in _coerce_args
raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
2020-08-05 00:08:48 [scrapy.core.engine] INFO: Closing spider (finished)

我尝试添加extract((函数,因为它在前面的列表容器上的stackoverflow问题中提到过,以消除该错误,但后来我的xpath没有得到所需的输出。

您正在查找.extract_first()或其新名称.get(),因为.extract()生成一个列表,不能在.urljoin中使用

最新更新