import pandas as pd
test_csv = """
time,val1,what_new_val1_should_be
2004-07-21 09:00:00,apple,1
2004-07-21 10:00:00,N,
2004-07-21 11:00:00,pear,2
2004-07-21 12:00:00,apple,1
2004-07-21 13:00:00,bread,3
2004-07-21 13:00:00,pear,2
2004-07-21 13:00:00,,
2004-07-21 13:00:00,,
"""
from io import StringIO
test_csv = StringIO(test_csv)
df = pd.read_csv(test_csv)
def coded_val(df):
"""
Create a new column "new_val1" that has an integer responding to the wor din val1
:param df: dataframe. A pandas dataframe with column val1 where the values are food items or N for none or lank for none
:return: daraframe. A pandas dataframe with a new column "new_val1"
"""
replacement_dict = {
'apple': 1,
'pear': 2,
'bread': 3
}
df['new_val1'] = df['val1'].replace(to_replace=replacement_dict, inplace=False)
return df
df = coded_val(df=df)
print(df)
如何让我的函数生成我想要的值?
time val1 what_new_val1_should_be new_val1
0 2004-07-21 09:00:00 apple 1.0 1
1 2004-07-21 10:00:00 N NaN N
2 2004-07-21 11:00:00 pear 2.0 2
3 2004-07-21 12:00:00 apple 1.0 1
4 2004-07-21 13:00:00 bread 3.0 3
5 2004-07-21 13:00:00 pear 2.0 2
6 2004-07-21 13:00:00 NaN NaN NaN
7 2004-07-21 13:00:00 NaN NaN NaN
更改为map
df['val1'].map(replacement_dict)
Out[44]:
0 1.0
1 NaN
2 2.0
3 1.0
4 3.0
5 2.0
6 NaN
7 NaN
Name: val1, dtype: float64