我自定义了一个变压器,如下所示:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import TransformerMixin
class CorrWeight(TransformerMixin):
"""Calculate the weights of metrics based on their correlations, and transform to scores based on the resulting weights."""
def fit(self, X, y, **fit_params):
self.offset = offset
self.max_weight = max_weight
corr_mat = pd.DataFrame(X).corr()
# Sum over all correlations to get an overall correlation score for each metric
metric_summary = corr_mat.sum().to_frame().rename(columns={0: 'overall correlation'})
# Assign a weight to each metric that is inversely related to the overall correlation
# so that the higher the overall correlation, the lower the weight
if self.max_weight > 1:
metric_summary['weight'] = 1 / MinMaxScaler((1 / self.max_weight, 1)).fit_transform(
metric_summary[['overall correlation']])
elif self.max_weight == 1:
metric_summary['weight'] = 1
else:
print("Max_weight has to be no smaller than 1!")
metric_summary['weight'] = 1
self.metric_summary = metric_summary
return self
def transform(self, X, **fit_params):
# Multiply the rescaled metrics together (with the weight being the exponent for each metric)
score = np.exp(pd.DataFrame(np.log(X + self.offset) * np.array(self.metric_summary['weight'])).sum(axis=1))
return pd.DataFrame(score)
def fit_transform(self, X, y, **fit_params):
self.fit(X, y, **fit_params)
return self.transform(X)
并在管道中使用它:
pipeline = Pipeline([
('transformations', QuantileTransformer()),
('rescale_metrics', MinMaxScaler()),
('weighting', CorrWeight()),
('rescale_score', MinMaxScaler())
])
但是,当我尝试将参数传递给自定义变压器时:
params = {'weighting__offset': 2,
'weighting__max_weight': 5}
pipeline.fit(metrics, [], **params)
我收到错误消息,说
NameError: name 'offset' is not defined
传入参数的正确方法是什么?
您必须在构造函数本身中设置转换器的参数。即使你想调整这些超参数,那么把它放在构造函数中也是最好的方法。
试试这个!
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import TransformerMixin
class CorrWeight(TransformerMixin):
"""Calculate the weights of metrics based on their correlations, and transform to scores based on the resulting weights."""
def __init__(self,offset,max_weight):
self.offset = offset
self.max_weight = max_weight
def fit(self, X, y):
#remove the fit_params here
corr_mat = pd.DataFrame(X).corr()
# Sum over all correlations to get an overall correlation score for each metric
metric_summary = corr_mat.sum().to_frame().rename(columns={0: 'overall correlation'})
# Assign a weight to each metric that is inversely related to the overall correlation
# so that the higher the overall correlation, the lower the weight
if self.max_weight > 1:
metric_summary['weight'] = 1 / MinMaxScaler((1 / self.max_weight, 1)).fit_transform(
metric_summary[['overall correlation']])
elif self.max_weight == 1:
metric_summary['weight'] = 1
else:
print("Max_weight has to be no smaller than 1!")
metric_summary['weight'] = 1
self.metric_summary = metric_summary
return self
def transform(self, X, **fit_params):
# Multiply the rescaled metrics together (with the weight being the exponent for each metric)
score = np.exp(pd.DataFrame(np.log(X + self.offset) * np.array(self.metric_summary['weight'])).sum(axis=1))
return pd.DataFrame(score)
def fit_transform(self, X, y, **fit_params):
self.fit(X, y, **fit_params)
return self.transform(X)
pipeline = Pipeline([
('transformations', QuantileTransformer()),
('rescale_metrics', MinMaxScaler()),
('weighting', CorrWeight(offset=2,max_weight=5)),
#feed the params value when you define the transformer
('rescale_score', MinMaxScaler())
])
pipeline.fit(np.random.rand(10,10), []) # you can remove the params here
如果你严格希望它fit_params,那么可以通过以下方式定义 fit 函数:
def fit(self, X, y, offset=None,max_weight=None):
self.offset = offset
self.max_weight = max_weight
...