基于第三个df3比较df1和df2列中的数据,并从df2最后一列中获得匹配行数据的数据



我在df1

>col3e3
srno col1 col2col4
1 a1 a2 a3
2 b1 c2 c3
3 d1 b2
4 e1 e2
del df1['col4']
# read every condition in df3
# and merge related df2 columns to df1
# with the condition column
for i, row in df3.iterrows():
priority = row['priority']
col_list = row['col_combination'].split(',')
df1 = pd.merge(df1, df2[col_list + ['col4']], on=col_list, how='left')
# rename every merge df1's new col4 to priority no.
df1.rename(columns={'col4':priority}, inplace=True)

print(df1)
#    srno col1 col2 col3    1    2
# 0     1   a1   a2   a3   g1  NaN
# 1     2   b1   c2   c3  NaN   g3
# 2     3   d1   b2  NaN  NaN   g2
# 3     4   e1   e2   e3  NaN  NaN
priority_list = df3['priority'].tolist()
obj = df1[priority_list[0]]
# use combine_first to merge priority(columns) 1, 2
for priority in priority_list[1:]:
obj = obj.combine_first(df1[priority])
# re-assign
df1['col4'] = obj
print(df1)
#    srno col1 col2 col3    1    2 col4
# 0     1   a1   a2   a3   g1  NaN   g1
# 1     2   b1   c2   c3  NaN   g3   g3
# 2     3   d1   b2  NaN  NaN   g2   g2
# 3     4   e1   e2   e3  NaN  NaN  NaN

# finally del priority columns
df1.drop(priority_list, axis=1, inplace=True)

相关内容

最新更新