如何从python中的excel文件中读取数据,输出中的文件路径为列


import pandas as pd
import numpy as np
import glob
import os
all_data = pd.DataFrame() 
rows = 0
for file in glob.glob("Ranking*.xlsx"):
xls = pd.ExcelFile(file)
sheets = xls.sheet_names
for sheet_name in sheets:
df = pd.read_excel(file, sheet_name='Output',header= 3)
all_data = all_data.append(df, ignore_index = True)
output_tab_data = all_data[['Supplier','Tariff','Region']]
output_tab_data_no_NA = output_tab_data[output_tab_data.Supplier.notnull()]
output_tab_data_no_NA ['file_source_name'] = os.path.abspath(file)
print(output_tab_data_no_NA)    

上面的代码现在生成这些列-"供应商"、"塔里夫"、"地区">

但我希望它包括一个名为file_source_name的EXTRA COLUMN,这是每个加载的excel文件的excel文件名路径

import pandas as pd
import numpy as np
import glob
import os
all_data = pd.DataFrame() #creating an empty data frame
rows = 0
for f in glob.glob("../<path where python is>/*.xlsx"): #import every file that ends in .xls
xls = pd.ExcelFile(file)
sheets = xls.sheet_names # To get names of all the sheets
for sheet_name in sheets:
df = pd.read_excel(file, sheet_name='Output',header= 3) #start copying data from line 4 in each file
df['file_source_name'] = f  #append individual file name/file path
all_data = all_data.append(df, ignore_index = True) #put all the copied data together
output_tab_data = all_data[['Supplier','Tariff','Region','file_source_name']]
output_tab_data_no_NA = output_tab_data[output_tab_data.Supplier.notnull()]
print(output_tab_data_no_NA)    

最新更新