在我的项目中,我必须使用 SVM 分类器和管道在交叉验证中进行特征选择。特征选择方法是基于关联的特征选择(我通过python-weka-wrapper使用Weka库(。所以,我用fit((和Transform((方法编写了这个类:
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from convertArff import arffOutput
import weka.core.jvm as jvm
from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
from weka.core.converters import Loader
class CorrelationFeatureSelection(BaseEstimator, TransformerMixin):
def __init__(self, names, array):
self.names = names
self.array = array
def _reset(self):
"""Reset internal data-dependent state of the scaler, if necessary.
__init__ parameters are not touched.
"""
# Checking one attribute is enough, becase they are all set together
# in partial_fit
if hasattr(self, 'attibutes_selected_'):
del self.attributes_selected_
def fit(self, X, y=None):
self._reset()
print type(self.array)
arffOutput("result", self.array, self.names)
jvm.start()
print "sono dentro"
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file("result.arff")
data.class_is_last()
print(data)
search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"])
evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"])
attsel = AttributeSelection()
attsel.search(search)
attsel.evaluator(evaluator)
attsel.select_attributes(data)
print("# attributes: " + str(attsel.number_attributes_selected))
print("attributes: " + str(attsel.selected_attributes))
print("result string:n" + attsel.results_string)
attributes = attsel.selected_attributes
jvm.stop()
print attributes
print type(attributes)
self.attributes_selected_ = attributes[0:len(attributes) - 1]
print self.attributes_selected_
return self
def transform(self, X):
col_list = []
for c in self.attributes_selected_:
col_list.append(X[:, c:c + 1])
return np.concatenate(col_list, axis=1)
当我运行我的代码时,我收到以下错误:
RuntimeError: Failed to start Java VM
这个问题有什么解决方案吗?
启动和停止 JVM 应该在启动应用程序的 main
方法中进行(即在 if __name__ == "__main__":
块中(,因为不幸的是,您不能多次启动/停止 JVM。
以下代码将在第二个jvm.start()
失败:
import weka.core.jvm as jvm
print("Starting 1")
jvm.start()
print("Stopping 1")
jvm.stop()
print("Starting 2")
jvm.start()
print("Stopping 2")
jvm.stop()