>假设我有一个包含以下内容的 df:cf_key是列表,CF_ref是我希望cf_key值去的地方。因此,例如,我想检查 df[Operating_Activity].lower(( 是否在 cf_key.lower(( 中,如果是,请将cf_key附加到CF_ref。
所以Net income.lower((在cf_key.lower((中,因此CF_ref[1]应该=="净收入"。
我无法让它工作,并且已经用尽了到目前为止我在 SO 上找到的所有方法。
CF_ref Operating_Activity Dec. 31, 2010 Dec. 31, 2009 Dec. 31, 2008 cf_key
0 NaN Operating activities NaN NaN NaN Net Income
1 NaN Net income 78716.0 52810.0 110316.0 Depreciation and Amortization
2 NaN Adjustments to reconcile net income to net cas... NaN NaN NaN Deferred Income Taxes
3 NaN Income from discontinued operations, net of in... NaN NaN -5784.0 Stock Based Compensation
4 NaN Depreciation and amortization 27728.0 24905.0 24638.0 Other Non-Cash Items
5 NaN Gain on disposal of discontinued operations, n... NaN NaN -44127.0 Change In Working Capital
6 NaN Provision for doubtful accounts 40.0 516.0 72.0 Cash Provided By Operating Activites
7 NaN Deferred income taxes 2662.0 573.0 -5714.0 Tangible Capital Expenditure
8 NaN Excess tax benefits from stock based compensation -759.0 -225.0 -375.0 Intangible Capital Expenditure
使用df.apply(function, axis=1)
或df['Operating_Activity'].apply(function)
您可以为每一行运行函数,此函数可以将该行中的项目与完整df["cf_key"]
进行比较,并返回可以分配给列df['Operating_Activity']
或NaN
列df['CF_ref']
def convert(row):
if any(row['Operating_Activity'].lower() == df['cf_key'].str.lower()):
return row['Operating_Activity']
else:
return row['CF_ref']
df['CF_ref'] = df.apply(convert, axis=1)
或
def convert(item):
if any(item.lower() == df['cf_key'].str.lower()):
return item
else:
return np.NaN #'NaN'
df['CF_ref'] = df['Operating_Activity'].apply(convert)
<小时 />最少的工作代码
text = ''' CF_ref Operating_Activity Dec. 31, 2010 Dec. 31, 2009 Dec. 31, 2008 cf_key
0 NaN Operating activities NaN NaN NaN Net Income
1 NaN Net income 78716.0 52810.0 110316.0 Depreciation and Amortization
2 NaN Adjustments to reconcile net income to net cas... NaN NaN NaN Deferred Income Taxes
3 NaN Income from discontinued operations, net of in... NaN NaN -5784.0 Stock Based Compensation
4 NaN Depreciation and amortization 27728.0 24905.0 24638.0 Other Non-Cash Items
5 NaN Gain on disposal of discontinued operations, n... NaN NaN -44127.0 Change In Working Capital
6 NaN Provision for doubtful accounts 40.0 516.0 72.0 Cash Provided By Operating Activites
7 NaN Deferred income taxes 2662.0 573.0 -5714.0 Tangible Capital Expenditure
8 NaN Excess tax benefits from stock based compensation -759.0 -225.0 -375.0 Intangible Capital Expenditure'''
import pandas as pd
import numpy as np
import io
# --- read data ---
df = pd.read_csv(io.StringIO(text), sep='s{2,}')
#df['lower_cf_key'] = df['cf_key'].str.lower()
#print(df['lower_cf_key'])
# --- function #1 ---
def convert(row):
#if any(row['Operating_Activity'].lower() == df['lower_cf_key']):
if any(row['Operating_Activity'].lower() == df['cf_key'].str.lower()):
return row['Operating_Activity']
else:
return row['CF_ref']
df['CF_ref'] = df.apply(convert, axis=1)
print(df['CF_ref'])
# --- function #2 ---
def convert(item):
if any(item.lower() == df['cf_key'].str.lower()):
return item
else:
return np.NaN #'NaN'
df['CF_ref'] = df['Operating_Activity'].apply(convert)
print(df['CF_ref'])
df["CF_ref"]
的结果
0 NaN
1 Net income
2 NaN
3 NaN
4 Depreciation and amortization
5 NaN
6 NaN
7 Deferred income taxes
8 NaN
Name: CF_ref, dtype: object
<小时 />编辑:
我不知道你为什么把它放在第cf_key
列.我会为此创建单独的字典:
data = {
'net income': 'Net income',
'deferred income taxes': 'Deferred income taxes',
'revenue': 'Revenue',
'net sales': 'Revenue',
'depreciation': 'Depreciation and Amortization',
'amortization': 'Depreciation and Amortization',
'amortisation': 'Depreciation and Amortization',
'depreciation and amortisation': 'Depreciation and Amortization',
'depreciation and amortization': 'Depreciation and Amortization',
}
def convert(item):
return data.get(item.lower(), np.NaN)
或
data = {
'Net income': [
'net income',
],
'Deferred income taxes': [
'deferred income taxes',
],
'Revenue': [
'revenue',
'net sales',
],
'Depreciation and amortization': [
'depreciation',
'amortization',
'amortisation',
'depreciation and amortisation',
'depreciation and amortization',
],
}
def convert(item):
for key, values in data.items():
if item.lower() in values:
return key
return np.NaN
最小工作示例
text = ''' CF_ref Operating_Activity Dec. 31, 2010 Dec. 31, 2009 Dec. 31, 2008 cf_key
0 NaN Operating activities NaN NaN NaN Net Income
1 NaN Net income 78716.0 52810.0 110316.0 Depreciation and Amortization
2 NaN Adjustments to reconcile net income to net cas... NaN NaN NaN Deferred Income Taxes
3 NaN Income from discontinued operations, net of in... NaN NaN -5784.0 Stock Based Compensation
4 NaN Depreciation and amortization 27728.0 24905.0 24638.0 Other Non-Cash Items
5 NaN Gain on disposal of discontinued operations, n... NaN NaN -44127.0 Change In Working Capital
6 NaN Provision for doubtful accounts 40.0 516.0 72.0 Cash Provided By Operating Activites
7 NaN Deferred income taxes 2662.0 573.0 -5714.0 Tangible Capital Expenditure
8 NaN Excess tax benefits from stock based compensation -759.0 -225.0 -375.0 Intangible Capital Expenditure
9 NaN Revenue -759.0 -225.0 -375.0 Intangible Capital Expenditure
10 NaN Net Sales -759.0 -225.0 -375.0 Intangible Capital Expenditure'''
import pandas as pd
import numpy as np
import io
# --- read data ---
df = pd.read_csv(io.StringIO(text), sep='s{2,}')
#df['lower_cf_key'] = df['cf_key'].str.lower()
#print(df['lower_cf_key'])
# --- function #1 ---
data = {
'net income': 'Net income',
'deferred income taxes': 'Deferred income taxes',
'revenue': 'Revenue',
'net sales': 'Revenue',
'depreciation': 'Depreciation and Amortization',
'amortization': 'Depreciation and Amortization',
'amortisation': 'Depreciation and Amortization',
'depreciation and amortisation': 'Depreciation and Amortization',
'depreciation and amortization': 'Depreciation and Amortization',
}
def convert(item):
return data.get(item.lower(), np.NaN)
df['CF_ref'] = df['Operating_Activity'].apply(convert)
print(df['CF_ref'])
# --- function #2---
data = {
'Net income': [
'net income',
],
'Deferred income taxes': [
'deferred income taxes',
],
'Revenue': [
'revenue',
'net sales',
],
'Depreciation and amortization': [
'depreciation',
'amortization',
'amortisation',
'depreciation and amortisation',
'depreciation and amortization',
],
}
def convert(item):
for key, values in data.items():
if item.lower() in values:
return key
return np.NaN
df['CF_ref'] = df['Operating_Activity'].apply(convert)
print(df['CF_ref'])