自定义多输入基元错误返回"TypeError: issubclass() arg 1 must be a class"



我正在使用Featuretools库来尝试生成涉及客户交易的自定义功能。我测试了这个函数,它返回了答案,所以我不确定为什么我得到这个错误。

我尝试使用以下链接:https://featuretools.alteryx.com/en/stable/getting_started/primitives.html

谢谢!

from featuretools.primitives import make_agg_primitive
from featuretools.variable_types import DatetimeTimeIndex, Numeric, Categorical
def test_fun(categorical, datetimeindex):

x = pd.DataFrame({'store_name': categorical, 'session_start_time': datetimeindex})
x_mode = list(x['store_name'].mode())[0]
x = x[x['store_name'] == x_mode]
y = x.session_start_time.diff().fillna(pd.Timedelta(seconds=0))/np.timedelta64(1, 's')    

return y.median()

Test_Fun = make_agg_primitive(function = test_fun,
input_types = [Categorical, DatetimeTimeIndex],
return_type = [Numeric])

fm, fd = ft.dfs(
entityset = es,
target_entity = 'customers',

agg_primitives = [Test_Fun],
cutoff_time = lt,
cutoff_time_in_index = True,
include_cutoff_time = False,
verbose = True,
)

导致以下错误

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-492-358f980bb6b0> in <module>
20                                 return_type = [Numeric])
21 
---> 22 fm, fd = ft.dfs(
23     entityset = es,
24     target_entity = 'customers',
~Anaconda3libsite-packagesfeaturetoolsutilsentry_point.py in function_wrapper(*args, **kwargs)
38                     ep.on_error(error=e,
39                                 runtime=runtime)
---> 40                 raise e
41 
42             # send return value
~Anaconda3libsite-packagesfeaturetoolsutilsentry_point.py in function_wrapper(*args, **kwargs)
30                 # call function
31                 start = time.time()
---> 32                 return_value = func(*args, **kwargs)
33                 runtime = time.time() - start
34             except Exception as e:
~Anaconda3libsite-packagesfeaturetoolssynthesisdfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, primitive_options, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types, progress_callback, include_cutoff_time)
259                                       seed_features=seed_features)
260 
--> 261     features = dfs_object.build_features(
262         verbose=verbose, return_variable_types=return_variable_types)
263 
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in build_features(self, return_variable_types, verbose)
287             assert isinstance(return_variable_types, list), msg
288 
--> 289         self._run_dfs(self.es[self.target_entity_id], RelationshipPath([]),
290                       all_features, max_depth=self.max_depth)
291 
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _run_dfs(self, entity, relationship_path, all_features, max_depth)
412         """
413 
--> 414         self._build_transform_features(all_features, entity, max_depth=max_depth)
415 
416         """
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _build_transform_features(self, all_features, entity, max_depth, require_direct_input)
576                 input_types = input_types[0]
577 
--> 578             matching_inputs = self._get_matching_inputs(all_features,
579                                                         entity,
580                                                         new_max_depth,
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _get_matching_inputs(self, all_features, entity, max_depth, input_types, primitive, primitive_options, require_direct_input, feature_filter)
793                              primitive, primitive_options, require_direct_input=False,
794                              feature_filter=None):
--> 795         features = self._features_by_type(all_features=all_features,
796                                           entity=entity,
797                                           max_depth=max_depth,
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _features_by_type(self, all_features, entity, max_depth, variable_type)
768             if (variable_type == variable_types.PandasTypes._all or
769                     f.variable_type == variable_type or
--> 770                     any(issubclass(f.variable_type, vt) for vt in variable_type)):
771                 if max_depth is None or f.get_depth(stop_at=self.seed_features) <= max_depth:
772                     selected_features.append(f)
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in <genexpr>(.0)
768             if (variable_type == variable_types.PandasTypes._all or
769                     f.variable_type == variable_type or
--> 770                     any(issubclass(f.variable_type, vt) for vt in variable_type)):
771                 if max_depth is None or f.get_depth(stop_at=self.seed_features) <= max_depth:
772                     selected_features.append(f)
TypeError: issubclass() arg 1 must be a class

我想我明白了。如果有更好的方法,请告诉我!

我不确定为什么文档中的方法不起作用(它使用函数而不是类,并且没有提到类)。

我能够利用这个问题的解决方案来解决这个问题:

如何获得一个项目's组均值但排除项目本身?


from featuretools.primitives import AggregationPrimitive
class Test_Fun(AggregationPrimitive):

name = "test_fun"
input_types = [Categorical, DatetimeTimeIndex]
return_type = Numeric
stack_on_self = False

def get_function(self):

def mean_excluding_value(categorical, datetimeindex):

x = pd.DataFrame({'store_name': categorical, 'session_start_time': datetimeindex})
x_mode = list(x['store_name'].mode())[0]
x = x[x['store_name'] == x_mode]
y = x.session_start_time.diff().fillna(pd.Timedelta(seconds=0))/np.timedelta64(1, 's')    

return y.median()
return mean_excluding_value

fm, fd = ft.dfs(
entityset = es,
target_entity = 'customers',

agg_primitives = [Test_Fun],
cutoff_time = lt,
cutoff_time_in_index = True,
include_cutoff_time = False,
verbose = True,
)

在这段代码中:

Test_Fun = make_agg_primitive(function = test_fun,
input_types = [Categorical, DatetimeTimeIndex],
return_type = [Numeric])

return_type应设置为Numeric,而不是[Numeric]

这段代码为我工作:

Test_Fun = make_agg_primitive(function = test_fun,
input_types = [Categorical, DatetimeTimeIndex],
return_type = Numeric)

相关内容

最新更新