我正在编写一个Python脚本从Azure文件共享下载文件。文件共享的结构如下:
/analytics/Part1/file1.txt
/analytics/Part1/file2.txt
/analytics/mainfile.txt
/analytics/Part1/Part1_1/file11.txt
我尝试在脚本中使用以下行,但它只在根目录级别查找文件和目录。
fileshareclient = ShareClient(
account_url=args.get('AccountURL'),
credential=args.get('SASKey'),
share_name=args.get('FileShare')
)
fileLst = list(
fileshareclient.list_directories_and_files('analytics')
)
输出为:
/analytics/mainfile.txt --> File
/analytics/Part1 --> Dir
但是,我在这里寻找类似Python中的os.walk()
函数来实现这个递归目录遍历。你知道Azure文件服务Python API中是否有这样的功能吗?
Pythonazure-storage-file-share
的Azure Storage File Share客户端库内置的list_directories_and_files()
方法只列出根目录和文件。如果您想实现类似os.walk()
的东西,您应该自己编写方法。
在这里,我写了一个函数,它可以递归地列出所有的文件/目录,它工作得很好(如果它不满足你的需要,请随意修改它):
from azure.storage.fileshare import ShareServiceClient
def list_recursive(directory_client,directory_name):
sub_client = directory_client.get_subdirectory_client(directory_name)
myfiles = sub_client.list_directories_and_files()
for file in myfiles:
print(file.get('name'))
if file.get('is_directory'):
list_recursive(sub_client,file.get('name'))
if __name__ == '__main__':
conn_str="xxxx"
file_service = ShareServiceClient.from_connection_string(conn_str)
share_client = file_service.get_share_client("your_share_name")
d_client = share_client.get_directory_client("your_directory_name")
myfiles = d_client.list_directories_and_files()
for file in myfiles:
print(file.get('name'))
if file.get('is_directory'):
list_recursive(d_client,file.get('name'))
老实说,我觉得处理两种不同的方法来做"同样的事情"有点令人困惑。我更喜欢通过from_connection_string
方法实例化目录客户端,如下面的方法所示。
pip install azure-storage-file-share==12.6.0
Python脚本FileShare文件列表:
from azure.storage.fileshare import ShareServiceClient, ShareDirectoryClient, ShareFileClient
from typing import Iterator, Union
import os
def list_files(
dir_path: str,
share_name: str,
connection_string: str,
include_properties: bool = False,
recursive: bool = True
) -> Iterator[Union[str, dict]]:
"""
List files from FileShare on Azure Storage Account.
Parameters
----------
dir_path: str
Directory path to list files from.
share_name: str
FileShare name.
connection_string: str
Connection string.
include_properties: bool
Specifies that file properties to be returned in the response.
recursive: bool
Specifies whether to list files recursively.
Returns
-------
files_list: Iterator[Union[str, dict]]
List of filepaths from FileShare.
Returns a list of strings (Iterator[str]) if 'include_properties' is false.
Otherwise, return a list of dictionaries (Iterator[dict]).
Notes
-----
This method only lists files, ignoring empty directories.
References
----------
.. [1] Method 'list_files': https://stackoverflow.com/a/71088269/16109419
.. [2] Recursive files listing: https://stackoverflow.com/a/66543222/16109419
"""
dir_client = ShareDirectoryClient.from_connection_string(
conn_str=connection_string,
share_name=share_name,
directory_path=dir_path
)
# Listing files from current directory path:
for file in dir_client.list_directories_and_files():
name, is_directory = file['name'], file['is_directory']
path = os.path.join(dir_path, name)
if is_directory:
if recursive:
# Listing files recursively:
childrens = list_files(
dir_path=path,
share_name=share_name,
connection_string=connection_string,
include_properties=include_properties,
recursive=recursive
)
for child in childrens:
yield child
else:
if include_properties:
file_client = ShareFileClient.from_connection_string(
conn_str=connection_string,
share_name=share_name,
file_path=path
)
yield file_client.get_file_properties()
else:
yield path
文件共享列表示例:
def main() -> None:
connection_string = "<your-conn-str>"
share_name = "<your-share-name>"
dir_path = "" # Leave it empty to list files from root directory.
files_list = list_files(
dir_path=dir_path,
share_name=share_name,
connection_string=connection_string,
include_properties=False,
recursive=True
)
for i, f in enumerate(files_list, start=1):
print(i, f)
if __name__ == '__main__':
main()