代码:-
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
class Vizualizer:
def __init__(self,data,cols):
self.data=data
self.cols=cols
def box_plot(self):
for col in self.cols:
sns.boxplot(x=col,y='Ex-Showroom_Price',data=self.data[self.data['Ex-Showroom_Price']])
df=pd.read_csv('cars_engage_2022.csv')
cols=['Make','Model','City_Mileage','Highway_Mileage','ARAI_Certified_Mileage','Type','Fuel_Type','Body_Type']
d=Vizualizer(df,cols)
df["Ex-Showroom_Price"] = df["Ex-Showroom_Price"].str.replace(r'.* ([d,]+)+$', r'1',regex=True).str.replace(',', '',regex=True).astype('int32')
d.box_plot()
错误
Traceback (most recent call last):
File "c:UsersshwetaOneDriveDesktopDEMOdemosrccarsengage2022preprocessingroughVIZ.py", line 36, in <module>
d.box_plot()
File "c:UsersshwetaOneDriveDesktopDEMOdemosrccarsengage2022preprocessingroughVIZ.py", line 25, in box_plot
sns.boxplot(x=col,y='Ex-Showroom_Price',data=self.data[self.data['Ex-Showroom_Price']])
File "C:UsersshwetaAppDataLocalProgramsPythonPython39libsite-packagespandascoreframe.py", line 3511, in __getitem__
indexer = self.columns._get_indexer_strict(key, "columns")[1]
File "C:UsersshwetaAppDataLocalProgramsPythonPython39libsite-packagespandascoreindexesbase.py", line 5782, in _get_indexer_strict
self._raise_if_missing(keyarr, indexer, axis_name)
File "C:UsersshwetaAppDataLocalProgramsPythonPython39libsite-packagespandascoreindexesbase.py", line 5842, in _raise_if_missing
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Int64Index([ 292667, 236447, 296661, 334768, 272223, 314815, 279650,n 351832, 333419, 362000,n ...n
1065900, 1182000, 1312000, 1111000, 1191000, 1302000, 1421000,n 1431000, 1201000, 6862560],n dtype='int64', length=1276)] are
in the [columns]"
我试图在cols中的列和它们的展厅前价格之间绘制一个方框图。Ex Showroom Price的值是分类的,因此,我首先将它们转换为整数,然后我试图绘制方框图,但它抛出了一个错误,Key error:";[Int64Index…]dtype='int64]都不在列"中;。
我认为你需要在周围交换
df["Ex-Showroom_Price"] = df["Ex-Showroom_Price"].str.replace(r'.* ([d,]+)+$', r'1',regex=True).str.replace(',', '',regex=True).astype('int32')
d=Vizualizer(df,cols)
或者确保您正在通过以下方式编辑类中的数据帧:
d=Vizualizer(df,cols)
d.df["Ex-Showroom_Price"] = df["Ex-Showroom_Price"].str.replace(r'.* ([d,]+)+$', r'1',regex=True).str.replace(',', '',regex=True).astype('int32')