我正在使用Featuretools库来尝试生成涉及客户交易的自定义功能。我测试了这个函数,它返回了答案,所以我不确定为什么我得到这个错误。
我尝试使用以下链接:https://featuretools.alteryx.com/en/stable/getting_started/primitives.html
谢谢!
from featuretools.primitives import make_agg_primitive
from featuretools.variable_types import DatetimeTimeIndex, Numeric, Categorical
def test_fun(categorical, datetimeindex):
x = pd.DataFrame({'store_name': categorical, 'session_start_time': datetimeindex})
x_mode = list(x['store_name'].mode())[0]
x = x[x['store_name'] == x_mode]
y = x.session_start_time.diff().fillna(pd.Timedelta(seconds=0))/np.timedelta64(1, 's')
return y.median()
Test_Fun = make_agg_primitive(function = test_fun,
input_types = [Categorical, DatetimeTimeIndex],
return_type = [Numeric])
fm, fd = ft.dfs(
entityset = es,
target_entity = 'customers',
agg_primitives = [Test_Fun],
cutoff_time = lt,
cutoff_time_in_index = True,
include_cutoff_time = False,
verbose = True,
)
导致以下错误
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-492-358f980bb6b0> in <module>
20 return_type = [Numeric])
21
---> 22 fm, fd = ft.dfs(
23 entityset = es,
24 target_entity = 'customers',
~Anaconda3libsite-packagesfeaturetoolsutilsentry_point.py in function_wrapper(*args, **kwargs)
38 ep.on_error(error=e,
39 runtime=runtime)
---> 40 raise e
41
42 # send return value
~Anaconda3libsite-packagesfeaturetoolsutilsentry_point.py in function_wrapper(*args, **kwargs)
30 # call function
31 start = time.time()
---> 32 return_value = func(*args, **kwargs)
33 runtime = time.time() - start
34 except Exception as e:
~Anaconda3libsite-packagesfeaturetoolssynthesisdfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, primitive_options, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types, progress_callback, include_cutoff_time)
259 seed_features=seed_features)
260
--> 261 features = dfs_object.build_features(
262 verbose=verbose, return_variable_types=return_variable_types)
263
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in build_features(self, return_variable_types, verbose)
287 assert isinstance(return_variable_types, list), msg
288
--> 289 self._run_dfs(self.es[self.target_entity_id], RelationshipPath([]),
290 all_features, max_depth=self.max_depth)
291
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _run_dfs(self, entity, relationship_path, all_features, max_depth)
412 """
413
--> 414 self._build_transform_features(all_features, entity, max_depth=max_depth)
415
416 """
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _build_transform_features(self, all_features, entity, max_depth, require_direct_input)
576 input_types = input_types[0]
577
--> 578 matching_inputs = self._get_matching_inputs(all_features,
579 entity,
580 new_max_depth,
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _get_matching_inputs(self, all_features, entity, max_depth, input_types, primitive, primitive_options, require_direct_input, feature_filter)
793 primitive, primitive_options, require_direct_input=False,
794 feature_filter=None):
--> 795 features = self._features_by_type(all_features=all_features,
796 entity=entity,
797 max_depth=max_depth,
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in _features_by_type(self, all_features, entity, max_depth, variable_type)
768 if (variable_type == variable_types.PandasTypes._all or
769 f.variable_type == variable_type or
--> 770 any(issubclass(f.variable_type, vt) for vt in variable_type)):
771 if max_depth is None or f.get_depth(stop_at=self.seed_features) <= max_depth:
772 selected_features.append(f)
~Anaconda3libsite-packagesfeaturetoolssynthesisdeep_feature_synthesis.py in <genexpr>(.0)
768 if (variable_type == variable_types.PandasTypes._all or
769 f.variable_type == variable_type or
--> 770 any(issubclass(f.variable_type, vt) for vt in variable_type)):
771 if max_depth is None or f.get_depth(stop_at=self.seed_features) <= max_depth:
772 selected_features.append(f)
TypeError: issubclass() arg 1 must be a class
我想我明白了。如果有更好的方法,请告诉我!
我不确定为什么文档中的方法不起作用(它使用函数而不是类,并且没有提到类)。
我能够利用这个问题的解决方案来解决这个问题:
如何获得一个项目's组均值但排除项目本身?
from featuretools.primitives import AggregationPrimitive
class Test_Fun(AggregationPrimitive):
name = "test_fun"
input_types = [Categorical, DatetimeTimeIndex]
return_type = Numeric
stack_on_self = False
def get_function(self):
def mean_excluding_value(categorical, datetimeindex):
x = pd.DataFrame({'store_name': categorical, 'session_start_time': datetimeindex})
x_mode = list(x['store_name'].mode())[0]
x = x[x['store_name'] == x_mode]
y = x.session_start_time.diff().fillna(pd.Timedelta(seconds=0))/np.timedelta64(1, 's')
return y.median()
return mean_excluding_value
fm, fd = ft.dfs(
entityset = es,
target_entity = 'customers',
agg_primitives = [Test_Fun],
cutoff_time = lt,
cutoff_time_in_index = True,
include_cutoff_time = False,
verbose = True,
)
在这段代码中:
Test_Fun = make_agg_primitive(function = test_fun,
input_types = [Categorical, DatetimeTimeIndex],
return_type = [Numeric])
return_type
应设置为Numeric
,而不是[Numeric]
这段代码为我工作:
Test_Fun = make_agg_primitive(function = test_fun,
input_types = [Categorical, DatetimeTimeIndex],
return_type = Numeric)