将数据与异步HTTP请求结合使用



我已经从此处编辑了此代码:

import asyncio
import time
from aiohttp import ClientPayloadError
from aiohttp import ClientSession
COUNTER = 1
async def fetch(url, session):
    async with session.get(url) as response:
        delay = response.headers.get("DELAY")
        date = response.headers.get("DATE")
        global COUNTER
        COUNTER +=1
        print("{}. {}:{} with delay {}".format(str(COUNTER), date, response.url, delay))
        try:
            return await response.text()
        except ClientPayloadError:
            print("ERROR: ".format(url))

async def bound_fetch(sem, url, session):
    # Getter function with semaphore.
    async with sem:
        await fetch(url, session)

async def run():
    urls = [build_url(id) for id in load_ids()]
    tasks = []
    # create instance of Semaphore
    sem = asyncio.Semaphore(1000)
    # Create client session that will ensure we dont open new connection
    # per each request.
    async with ClientSession(conn_timeout=10000, read_timeout=10000) as session:
        for url in urls:
           #pass Semaphore and session to every GET request
            task = asyncio.ensure_future(bound_fetch(sem, url, session))
            tasks.append(task)
        responses = asyncio.gather(*tasks)
        await responses
def build_url(id):
    url = 'http://api.metagenomics.anl.gov/annotation/sequence/{}?source=Refseq'.format(id)
    return url
def load_ids():
    #in the "real" code I will load a file here and extract the ids
    return """
    mgm4558908.3
    mgm4484962.3
    mgm4734169.3
    mgm4558911.3
    mgm4484983.3
    mgm4558918.3
    """.strip().split()

start = time.clock()
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
run_time = (start - time.clock())/60
print("this took: {} minutes".format(run_time))

我知道我可以使用:print(await response.text())打印响应数据,但是我不喜欢异步代码,因此我无法弄清楚如何以及在何处打开文件并写入它。因为我想有某种线程会在同一时间写入同一文件时可能会引起问题(我熟悉多处理)。

async 不是多处理 threading 在您的情况下,您可以尝试这样的SMT:

with open(file, "w"):
    async for s in run():
        f.write(s)

另外,您可以尝试使用aiofilescurio进行文件AI/O

最新更新