用平均值替换特定列



我试图用游戏平台和类型的平均user_score替换user_score。这是我的代码:

dft = new_df.query('user_score != "tbd" & user_score.isnull()')
df_typical_user_ratio_by_platform = dft.groupby(['platform', 'genre'])['user_score'].apply(lambda x: x.sample(1).iloc[0])
def correct_user_score(row):
platform = row['platform']
genre = row['genre']
if (row['user_score'] == 'tbd' or pd.isnull(row['user_score']) or row['user_score']=='nan'):
u = df_typical_user_ratio_by_platform.loc[[platform, genre]].head(1).astype('float')
uScore = ", ".join(map(str, u)) 
else:
uScore = row['user_score']

return uScore
row = pd.Series(data=row_values, index=['user_score', 'platform', 'genre'])
correct_user_score(row)
new_df['user_score'] = new_df.apply(correct_user_score, axis=1)
new_df.sample(40)
# df['user_score'] = df['user_score'].astype('int')

这是结果。User_score目前是一个对象。我不知道该怎么取代南。我试着做if u = 'nan',但那不起作用。任何建议吗?

https://i.stack.imgur.com/g7AU4.jpg

  • 强制无效值到NaNto_numerice()
  • fillna()与计算您想要的
s = 20
df = pd.DataFrame({"userid":np.random.randint(1,5,s),
"platform":np.random.choice(["windows","macos","ios","android"],s),
"userscore":np.random.randint(1,10,s)})
# let's splat some scores...
df = df.assign(userscore=np.select([(df.userscore==7)&(df.index<10),(df.userscore==6)&(df.index<10)],["tbd",np.nan],df.userscore))
df["bad"] = df.userscore
df = df.assign(userscore=pd.to_numeric(df.userscore, errors="coerce"))
df.userscore = df.userscore.fillna(df.groupby(["userid","platform"])["userscore"].transform("mean"))

输出

最新更新