

from azure.identity import DefaultAzureCredential
from azure.keyvault.secrets import SecretClient
from azure.storage.blob import ContainerClient
import pandas as pd
from io import StringIO
# Used for getting access to secrets on Azure key vault for authentication purposes
credential = DefaultAzureCredential()
vault_url = 'AzureKeyVaultURL'
secret_client = SecretClient(vault_url=vault_url, credential=credential)
azure_datalake_connection_str = secret_client.get_secret('Datalake_connection_string')
# Connecting to a source Azure blob storage container where multiple CSV files are stored
blob_block_source = ContainerClient.from_connection_string(
conn_str= azure_datalake_connection_str.value,
container_name= "sourceContainerName"
# Connecting to a target Azure blob storage container to where the CSV files from the source should be combined into one CSV file
blob_block_target = ContainerClient.from_connection_string(
conn_str= azure_datalake_connection_str.value,
container_name= "targetContainerName"
# Retrieve list of the blob storage names from the source Azure blob storage container, but only those that end with the .csv file extension
blobNames = [name.name for name in blob_block_source.list_blobs()]
only_csv_blob_names = list(filter(lambda x:x.endswith(".csv") , blobNames)) 
# Creating a list of dataframes - one dataframe from each CSV file found in the source Azure blob storage container 
listOfCsvDataframes = []
for csv_blobname in only_csv_blob_names:
df = pd.read_csv(StringIO(blob_block_source.download_blob(csv_blobname, encoding='utf-8').content_as_text(encoding='utf-8')), encoding = 'utf-8',header=0, low_memory=False)
# Contatenating the different dataframes into one dataframe  
df_concat = pd.concat(listOfCsvDataframes, axis=0, ignore_index=True)
# Creating a CSV object from the concatenated dataframe 
outputCSV = df_concat.to_csv(index=False, sep = ',', header = True)
# Upload the combined dataframes as a CSV file (i.e. the CSV files have been combined into one CSV file)
blob_block_target.upload_blob('combinedCSV.csv', outputCSV, blob_type="BlockBlob", overwrite = True)

您可以使用Azure数据工厂来连接文件,而不是使用Azure Function。





  • 没有找到相关文章
