ContentTypeError: 0, message='Attempt to decode JSON with unexpected mimetype: application/octet-stream', url=URL('https://api-reports-prod-usamazon.s3.amazonaws.com/atvpdr-a2vzay-report-data-7aaf8bfa-5cb5-4c76-b430-01d68cd7024b.json.gz?X
当我试图通过Python - asyncio方法获得位于S3 Bucket
中的gzip
文件时,我得到了以上错误。
同步代码[Working]
report = requests.get(location, headers=headers)
data = json.loads(gzip.decompress(report.content))
异步代码[Not Working]
async def get_data(session, url):
async with session.get(url,headers=headers) as resp:
data = await resp.json()
return data
async def main(req_url):
async with aiohttp.ClientSession() as session:
tasks = []
url = req_url
tasks.append(asyncio.ensure_future(get_data(session, url)))
data = await asyncio.gather(*tasks)
start_time1 = time.time()
nest_asyncio.apply()
keyword_list = asyncio.run(main(location))
print("--- %s seconds ---" % (time.time() - start_time1))
Thanks in Advance.
尝试
async def get_data(session, url):
async with session.get(url,headers=headers) as resp:
data = json.loads(gzip.decompress(resp.content))
return data
抛出错误
Traceback (most recent call last):
File "<ipython-input-397-2f3527a7a82e>", line 20, in <module>
keyword_list = asyncio.run(main(location))
File "C:Usersanaconda3libsite-packagesnest_asyncio.py", line 32, in run
return loop.run_until_complete(future)
File "C:Usersanaconda3libsite-packagesnest_asyncio.py", line 70, in run_until_complete
return f.result()
File "C:Usersanaconda3libasynciofutures.py", line 178, in result
raise self._exception
File "C:Usersanaconda3libasynciotasks.py", line 280, in __step
result = coro.send(None)
File "<ipython-input-397-2f3527a7a82e>", line 15, in main
data = await asyncio.gather(*tasks)
File "C:Usersanaconda3libasynciotasks.py", line 349, in __wakeup
future.result()
File "C:Usersanaconda3libasynciotasks.py", line 280, in __step
result = coro.send(None)
File "<ipython-input-397-2f3527a7a82e>", line 3, in get_data
data = json.loads(gzip.decompress(resp.content))
File "C:Usersanaconda3libgzip.py", line 547, in decompress
with GzipFile(fileobj=io.BytesIO(data)) as f:
TypeError: a bytes-like object is required, not 'StreamReader'
"
如果使用同步代码
report = requests.get(location, headers=headers)
data = json.loads(gzip.decompress(report.content))
那么你应该在异步代码中做类似的操作
挖掘后我发现它需要等待respr .read()而不是resp.content
async with session.get(url,headers=headers) as resp:
data = json.loads(gzip.decompress(await resp.read()))
return data
你忘了return data
在main()
我没有访问JSON数据的gzip文件,所以我测试了JSON从https://httpbin.org/get
import asyncio
import aiohttp
import time
# --- functions ---
async def get_data(session, url):
async with session.get(url, headers=headers) as resp:
#return await resp.json()
#return json.loads(gzip.decompress(await resp.read()))
return await resp.read()
async def main(url):
async with aiohttp.ClientSession() as session:
tasks = asyncio.ensure_future(get_data(session, url))
data = await asyncio.gather(tasks)
return data
# --- main ---
headers = {}
location = 'https://httpbin.org/get'
start_time = time.time()
keyword_list = asyncio.run(main(location))
print(keyword_list)
end_time = time.time()
diff_time = end_time - start_time
print("---", diff_time, "seconds ---")