使用.loc过滤pandas中的多个列,不区分大小写



我想搜索忽略大小写差异的值。因此,例如,如果我输入'fred',我仍然可以过滤所有包含fred的值,即使F是大写的。

这是我目前拥有的:

def find(**kwargs):
result = data.loc[data.rename(columns={"FirstName": "first",
"LastName": "last", 
"City": "city",
})[list(kwargs.keys())]
.eq(list(kwargs.values())).all(axis=1)]
return result

但是,我意识到我不能在任何时候使用。lower()来强制我传入的字符串和我为

过滤的值都是小写的下面是我的数据样本:

FirstName    LastName   City
Fred           Bob       Austin
Billy          Bob       NYC

当我运行我的函数时,我期望这样:

find('fred')
Output: Fred    Bob  Austin

这里有两种方法,我相信你已经问过了,这是:

  • 根据参数firstlastcity的任意组合对df列FirstNameLastNameCity进行不区分大小写的过滤。

# 1

import pandas as pd
def find(**kwargs):
df = ( data.rename(columns={"FirstName": "first",
"LastName": "last", 
"City": "city",
})[list(kwargs.keys())]
.apply(lambda x: x.str.lower(), axis=1) )
mask = df.eq(list(val.lower() for val in kwargs.values())).all(axis=1)
return data[mask]
data = pd.DataFrame({'FirstName':['Fred','Billy'],'LastName':['Bob','Bob'],'City':['Austin','NYC']})

方法# 2

import pandas as pd
from operator import and_
from functools import reduce
def find(**kwargs):
df = data.rename(columns={"FirstName": "first",
"LastName": "last", 
"City": "city",
})[list(kwargs.keys())]
valsLower = pd.Series([val.lower() for val in kwargs.values()], index=kwargs.keys())
mask = reduce(and_, (df[col].str.lower() == valsLower[col] for col in df.columns))
return data[mask]
data = pd.DataFrame({'FirstName':['Fred','Billy'],'LastName':['Bob','Bob'],'City':['Austin','NYC']})

测试代码:

print( '',"data",data,sep='n' )
print( '',"first='fred'",find(first='fred'),sep='n' )
print( '',"first='fReD'",find(first='fred'),sep='n' )
print( '',"last='bob'",find(last='bob'),sep='n' )
print( '',"city='austin'",find(city='austin'),sep='n' )
print( '',"first='fred', city='austin'",find(first='fred', city='austin'),sep='n' )
print( '',"city='austin', first='fred'",find(first='fred', city='austin'),sep='n' )
print( '',"last='bob', city='austin'",find(last='bob', city='austin'),sep='n' )
print( '',"first='billy', city='austin'",find(first='billy', city='austin'),sep='n' )

样本输出:

data
FirstName LastName    City
0      Fred      Bob  Austin
1     Billy      Bob     NYC
first='fred'
FirstName LastName    City
0      Fred      Bob  Austin
first='fReD'
FirstName LastName    City
0      Fred      Bob  Austin
last='bob'
FirstName LastName    City
0      Fred      Bob  Austin
1     Billy      Bob     NYC
city='austin'
FirstName LastName    City
0      Fred      Bob  Austin
first='fred', city='austin'
FirstName LastName    City
0      Fred      Bob  Austin
city='austin', first='fred'
FirstName LastName    City
0      Fred      Bob  Austin
last='bob', city='austin'
FirstName LastName    City
0      Fred      Bob  Austin
first='billy', city='austin'
Empty DataFrame
Columns: [FirstName, LastName, City]
Index: []
import pandas as pd
data = pd.DataFrame({"FirstName": ['Fred', 'Billy'], 'LastName':['Bob','Bob'], 'City': ['A', 'D']} )

def find(**kwargs):
result = data.loc[data.rename(columns={"FirstName": "first",
"LastName": "last",
"City": "city",
})[list(kwargs.keys())].apply(lambda x: x.str.lower()).eq(list(kwargs.values())).all(axis=1)]
return result
print(find(first='fred'))

返回
FirstName LastName City
0      Fred      Bob    A

使用match函数

import re
from functools import reduce
def find(df, **kwargs):
# Using AND condition. Modify & to | for OR condition.
cond = reduce(lambda prev, x: prev & df[x[0]].str.match(f'{x[1]}', flags=re.IGNORECASE), 
kwargs.items(),
True)
return df[cond]
find(df, FirstName='fre', LastName='bob')
#   FirstName LastName   City
# 0      Fred      Bob Austin

最新更新