输入图片描述
,尽管我使用了虚拟环境。我得到了以下错误:
GitHub回购:
我有最新版本的sklearn
@dataclass
class DataTransformationConfig:
preprocessor_obj_file_path=os.path.join('artifacts','preprocessor.pkl')
class DataTransformation:
def __init__(self):
self.data_tranformation_config=DataTransformationConfig()
def get_transfromation_object(self):
try:
numerical_column = ["writing_score", "reading_score"]
categorical_columns = [
"gender",
"race_ethnicity",
"parental_level_of_education",
"lunch",
"test_preparation_course",
]
num_pipeline = Pipeline(
steps=[
('imputer',SimpleImputer(strategy='median')),
('scaler',StandardScaler())
]
)
cat_pipeline = Pipeline(
steps=[
('imputer',SimpleImputer(strategy='most_frequent')),
('one_hot_encoder',OneHotEncoder()),
('scaler',StandardScaler(with_mean=False))
]
)
logging.info(f'Categorical columns:{categorical_columns}')
logging.info(f'Numeric columns:{numerical_column}')
preprocessor=ColumnTransformer(
[
('num_pipeline',num_pipeline,numerical_column),
('cat_pipeline',cat_pipeline,categorical_columns)
]
)
logging.info(f"WHole pipeline{preprocessor}")
return preprocessor
except Exception as e:
raise CustomException(e)
def initiate_data_tranformation(self,train_path,test_path):
try:
train_df=pd.read_csv(train_path)
test_df=pd.read_csv(test_path)
logging.info('Read train and test data completed')
logging.info('obtaining preprocessing object')
preprocessor_obj=self.get_transfromation_object()
target_column_name="math_score"
numerical_columns = ["writing_score", "reading_score"]
input_feature_train_df=train_df.drop(columns=[target_column_name],axis=1)
target_feature_train_df=train_df[target_column_name]
input_feature_test_df=test_df.drop(columns=[target_column_name],axis=1)
target_feature_test_df=test_df[target_column_name]
logging.info(
f'Applying preprocessing object on train and test dataframe.'
)
input_feature_train_arr = preprocessor_obj.fit_transform(input_feature_train_df)
input_feature_test_arr = preprocessor_obj.transform(input_feature_test_df)
logging.info(f"train shape{input_feature_train_arr.shape} and test shape {input_feature_test_arr.shape}")
train_arr=np.c_[
input_feature_train_arr,np.array(target_feature_train_df)
]
test_arr=np.c_[
input_feature_test_arr,np.array(target_feature_test_df)
]
logging.info(f'saved preprocessing object')
save_objects(
file_path=self.data_tranformation_config.preprocessor_obj_file_path,
obj=preprocessor_obj
)
return (
train_arr,
test_arr,
self.data_tranformation_config.preprocessor_obj_file_path,
)
except Exception as e:
raise CustomException(e,sys)
data_tranformation文件我用python 3.8制作了虚拟环境,因为我在这里构建管道,但是当运行我的Flask应用程序时,我在调用的地方得到了这个错误"预处理器。水谷". 我得到这个错误,而作出预测
Column Transformer对象在Sklearn v1.2.0中获得了_name_to_fitted_passthrough
属性。您看到这个错误是因为:
- 你的pickle是用Sklearn 1.2.0之后的某个版本创建的。
- 您的应用程序环境正在运行某个版本的sklearn 1.2.0之前。
如果您在应用程序加载pickle时检查控制台日志,您可能会看到一条警告消息,告诉您使用了哪个版本的Sklearn来创建pickle,以及当前正在运行哪个版本。
您可以通过将应用程序和训练脚本固定到相同版本的Sklearn来修复它。