基于特定列的子集pandas列



如果第7列以后不为空,我想保留pandas数据帧的行。我的代码引发了TypeError: unhashable type: 'Index'

import pandas as pd
import numpy as np
merged_df = merged_df.dropna(merged_df.columns[7:])

追溯:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/ipykernel_17/1347313789.py in <module>
1 # Drop NA
----> 2 merged_df = merged_df.dropna(merged_df.columns[7:])
3 merged_df
/opt/conda/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309                     stacklevel=stacklevel,
310                 )
--> 311             return func(*args, **kwargs)
312 
313         return wrapper
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in dropna(self, axis, how, thresh, subset, inplace)
5942             raise TypeError("supplying multiple axes to axis is no longer supported.")
5943 
-> 5944         axis = self._get_axis_number(axis)
5945         agg_axis = 1 - axis
5946 
/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in _get_axis_number(cls, axis)
544     def _get_axis_number(cls, axis: Axis) -> int:
545         try:
--> 546             return cls._AXIS_TO_AXIS_NUMBER[axis]
547         except KeyError:
548             raise ValueError(f"No axis named {axis} for object type {cls.__name__}")
TypeError: unhashable type: 'Index'

我认为您需要保留所有不丢失的值-添加DataFrame.all:

merged_df[merged_df.iloc[:,7:].notna().all(axis=1)]

如果需要,至少保留一个非零值:

merged_df[merged_df.iloc[:,7:].notna().any(axis=1)]

编辑:

merged_df.insert(0,'a4',4) 
print (merged_df)
a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0
1   4   3   2   1   0  7  NaN  NaN  NaN
2   4   3   2   1   0  7  7.0  9.0  NaN

df1 = merged_df[merged_df.iloc[:,7:].notna().all(axis=1)]
print (df1)
a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0
df2 = merged_df[merged_df.iloc[:,7:].notna().any(axis=1)]
print (df2)
a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0
2   4   3   2   1   0  7  7.0  9.0  NaN
df1 = merged_df.dropna(subset=merged_df.columns[7:].tolist())
print (df1)
a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0

最新更新