我有12个数据帧(july_df,aug_df……june_df(
它们都共享相同的列名,我想将它们合并到第一列,";MINISTRY";,并使用df的名称重命名所有列。
例如,输出可能看起来像:
Ministry item_jul item_aug.......item_jun item2_jul item2_aug.....item2jun
xyz 1 10 12 11 22 11
abc
你可以试试这个:
import pandas as pd
def get_df_name(df):
name = [x for x in globals() if globals()[x] is df][0]
return name
# Toy dataframes
june = pd.DataFrame(
{
"ministry": ["abc", "def", "ghi", "jkl"],
"item": [1, 2, 3, 4],
"item2": [5, 6, 7, 8],
"item3": [9, 10, 11, 12],
}
)
july = pd.DataFrame(
{
"ministry": ["abc", "def", "ghi", "jkl"],
"item": [13, 14, 15, 16],
"item2": [17, 18, 19, 20],
"item3": [21, 22, 23, 24],
}
)
august = pd.DataFrame(
{
"ministry": ["abc", "def", "ghi", "jkl"],
"item": [25, 26, 27, 28],
"item2": [29, 30, 31, 32],
"item3": [33, 34, 35, 36],
}
)
dfs = [june, july, august]
# Merge dataframes on "ministry" after renaming columns
merged_dfs = dfs[0]
merged_dfs.columns = [
f"{col}_{get_df_name(merged_dfs)}" if col != "ministry" else col
for col in merged_dfs.columns
]
for df in dfs[1:]:
df.columns = [
f"{col}_{get_df_name(df)}" if col != "ministry" else col for col in df.columns
]
merged_dfs = merged_dfs.merge(df, on="ministry")
print(merged_dfs)
# Outputs
ministry item_june item2_june ... item_august item2_august item3_august
0 abc 1 5 ... 25 29 33
1 def 2 6 ... 26 30 34
2 ghi 3 7 ... 27 31 35
3 jkl 4 8 ... 28 32 36