KeyError in pandas assign of masked df



我继承了一个代码库,它非常依赖于DataFrame.assign和参数的字典解包,这是我以前没有见过的。

我今天做了一些测试,我想我一定是遇到了一个极端情况,在过去的几个小时里,我一直在寻找一个解释和/或一个解决方案。

我不能共享数据,但我已经设法创建了以下MRE。

import sys
import pandas as pd
print("python", sys.version)
print("pandas", pd.__version__)
# template df for the output format
def template_df():
return pd.DataFrame(
columns=["id", "subid", "empty", "dat1", "dat2", "dat3", "dat4"]
)
# input data looks like this
df = pd.DataFrame({
"id1": [None, None, "0039"],
"id2": ["10", "12", "a1"],
"dat": [601, 482, 890],
})
# filter on id2 like 'a%'
m1 = df["id2"].str.startswith("a")
# start building output with input data and constant data
output = template_df().assign(**{
"id": df.loc[m1, "id2"],
"subid": df.loc[m1, "id1"],
"dat1": df.loc[m1, "dat"],
"dat2": "constant2",
})
# filter for id1 = '0039'
m2 = output["subid"].str.match("0039")
# add data for id1 = '0039' only
output[m2] = output[m2].assign(**{"dat3": "dependent3", "dat4": "dependent4"})

当我执行上面的代码,我得到:

% python soq.py
python 3.9.12 (main, Mar 26 2022, 15:51:15)
[Clang 13.1.6 (clang-1316.0.21.2)]
pandas 1.4.1
Traceback (most recent call last):
File "/path/to/src/dir/soq.py", line 35, in <module>
output[m2] = output[m2].assign(**{"dat3": "dependent3", "dat4": "dependent4"})
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/frame.py", line 3643, in __setitem__
self._setitem_array(key, value)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/frame.py", line 3678, in _setitem_array
self.iloc[indexer] = value
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 716, in __setitem__
iloc._setitem_with_indexer(indexer, value, self.name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1688, in _setitem_with_indexer
self._setitem_with_indexer_split_path(indexer, value, name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1724, in _setitem_with_indexer_split_path
self._setitem_with_indexer_frame_value(indexer, value, name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1813, in _setitem_with_indexer_frame_value
self._setitem_single_column(loc, val, pi)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1875, in _setitem_single_column
ser = value[np.argsort(pi)]
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/series.py", line 984, in __getitem__
return self._get_with(key)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/series.py", line 1019, in _get_with
return self.loc[key]
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 967, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1191, in _getitem_axis
return self._getitem_iterable(key, axis=axis)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1132, in _getitem_iterable
keyarr, indexer = self._get_listlike_indexer(key, axis)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1327, in _get_listlike_indexer
keyarr, indexer = ax._get_indexer_strict(key, axis_name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 5782, in _get_indexer_strict
self._raise_if_missing(keyarr, indexer, axis_name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 5842, in _raise_if_missing
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Int64Index([0], dtype='int64')] are in the [index]"

您没有正确使用assign()。你应该使用.loc:

output.loc[m2, ['dat3', 'dat4']] = ["dependent3", "dependent4"]

查看更多信息

最新更新