我是一个新手,正在尝试抓取url列表并使用python异步编程搜索单词。我的代码如下:
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
def parse(wd, html, url):
add_soup = bsoup(html,'html.parser')
res = []
for para in (add_soup.find_all("p")):
para_txt = para.text
for sent_txt in para_txt.split("."):
if wd in sent_txt:
res.append([sent_txt, url])
return res
async def scrape_urls(wd, urls):
async with aiohttp.ClientSession() as session:
return await asyncio.gather(
*(fetch_and_parse(wd, session, url) for url in urls)
)
async def fetch_and_parse(wd, session, url):
html = await fetch(wd, session, url)
loop = asyncio.get_event_loop()
paras = await loop.run_in_executor(None, parse, html)
return paras
我从这个链接写了上面的代码。但是我不清楚如何继续检索结果列表
我正试图使用这个co = scrape_urls("agriculture", urls)
得到结果。正如预期的那样,我得到一个协程对象。如何解析协程对象?
不完全确定您面临的是什么问题。使用gather
获取Future实例后,使用事件循环来执行它并获取结果。
loop = asyncio.get_event_loop()
group = scrape_urls("agriculture", urls)
results = loop.run_until_complete(group)
loop.close()
print(results)