我正在寻找一种更简单的方法来比较具有与下面代码相似输出的多个列名,或者一种使此函数更简洁的方法。我得到了错误&;IndexError:列表索引超出范围&;或者函数接受0个位置参数,但给出1个位置参数每次我尝试使用*args或**Kwargs来简化函数。如有任何建议,不胜感激。
"
application_df = pd.read_csv("application_train.csv")
bureau_df = pd.read_csv("bureau.csv")
bureau_balance_df = pd.read_csv("bureau_balance.csv")
previous_application_df = pd.read_csv("previous_application.csv")
POS_CASH_balance_df = pd.read_csv("POS_CASH_balance.csv")
installments_payments_df = pd.read_csv("installments_payments.csv")
credit_card_balance_df = pd.read_csv("credit_card_balance.csv")
sample__submission_df = pd.read_csv("sample_submission.csv")
def column_compare(df1, df2, df3, df4, df5, df6, df7, df8):
alist = []
blist = []
clist = []
dlist = []
elist = []
flist = []
glist = []
hlist = []
name1 =[x for x in globals() if globals()[x] is df1][0]
for a in df1:
alist.append(a)
name2 =[x for x in globals() if globals()[x] is df2][0]
for b in df2:
blist.append(b)
name3 =[x for x in globals() if globals()[x] is df3][0]
for c in df3:
clist.append(c)
name4 =[x for x in globals() if globals()[x] is df4][0]
for d in df4:
dlist.append(d)
name5 =[x for x in globals() if globals()[x] is df5][0]
for e in df5:
elist.append(e)
name6 =[x for x in globals() if globals()[x] is df6][0]
for f in df6:
flist.append(f)
name7 =[x for x in globals() if globals()[x] is df7][0]
for g in df7:
glist.append(g)
name8 =[x for x in globals() if globals()[x] is df8][0]
for h in df8:
hlist.append(h)
dfs = {name1: alist, name2: blist, name3: clist, name4: dlist, name5: elist, name6: flist, name7: glist, name8: hlist}
df = pd.DataFrame.from_dict(dfs, orient='index')
df=df.transpose().replace(np.nan,'')
return df
pd.set_option("max_rows", None)
column_names = column_compare(application_df, bureau_df, bureau_balance_df, previous_application_df, POS_CASH_balance_df, installments_payments_df, credit_card_balance_df, sample__submission_df)
column_names
"
不确定这是否是您感兴趣的,但是列名可以通过df.columns
访问,所以:
import pandas as pd
df1 = pd.DataFrame(columns=list('abc'))
df2 = pd.DataFrame(columns=list('abefg'))
df3 = pd.DataFrame(columns=list('bfkm'))
cols1 = pd.DataFrame(index=df1.columns).assign(df1=1).T
cols2 = pd.DataFrame(index=df2.columns).assign(df2=1).T
cols3 = pd.DataFrame(index=df3.columns).assign(df3=1).T
df_cols_compare = pd.concat([cols1, cols2, cols3]).fillna(0).astype('int')
# a b c e f g k m
# df1 1 1 1 0 0 0 0 0
# df2 1 1 0 1 1 1 0 0
# df3 0 1 0 0 1 0 1 1