通过URL从google drive下载pdf文件(不受限制)


import os 
import requests
def download_file(download_url: str, filename: str):
"""
Download resume pdf file from storage
@param download_url: URL of reusme to be downloaded
@type download_url: str
@param filename: Name and location of file to be stored
@type filename: str
@return: None
@rtype: None
"""
file_request = requests.get(download_url)
with open(f'{filename}.pdf', 'wb+') as file:
file.write(file_request.content)
cand_id = "101"
time_current = "801"
file_location = f"{cand_id}_{time_current}"
download_file("https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", file_location)
cand_id = "201"
time_current = "901"
download_file("https://drive.google.com/file/d/0B1HXnM1lBuoqMzVhZjcwNTAtZWI5OS00ZDg3LWEyMzktNzZmYWY2Y2NhNWQx/view?hl=en&resourcekey=0-5DqnTtXPFvySMiWstuAYdA", file_location)
----------
  • 第一个文件工作完全正常(即101_801.pdf)
  • 但是第二个无法在任何pdf阅读器中打开(即。201_901.pdf)(错误:我们无法打开此文件).
  • 我所理解的是我不能正确地读取和写入文件从对所有人开放的车道。如何读取和写入文件?
  • 我可以使用谷歌驱动API,但我们可以有更好的解决方案没有使用?

我试了试代码,也无法打开PDF文件。我建议试试gdown包。它很容易使用,你可以从谷歌驱动器下载甚至大文件。我用它在我的类下载. sql数据库文件(+ -20 gb)为我的作业。

如果你想在此代码上构建更多,那么你可能应该查看Drive API。它是一个文档完备的快速API。

我能够通过python中的wget找到解决方案。回答这个问题,以便将来可以帮助别人。

import os
import wget
def download_candidate_resume(email: str, resume_url: str):
"""
This function is used to download resume from google drive and store on the local system
@param email: candidate email
@type email: str
@param resume_url: url of resume on google drive
@type resume_url: str
"""
file_extension = "pdf"
current_time = datetime.now()
file_name = f'{email}_{int(current_time.timestamp())}.{file_extension}'
temp_file_path = os.path.join(
os.getcwd(),
f'{email}_{int(current_time.timestamp())}.{file_extension}',
)
downloadable_resume_url = re.sub(
r"https://drive.google.com/file/d/(.*?)/.*??usp=sharing",
r"https://drive.google.com/uc?export=download&id=1",
resume_url,
)
wget.download(downloadable_resume_url, out=temp_file_path)

最新更新