我爱MLR!在下面的代码中,我比较了四个分类器的性能。当我使用 PIMA 印度糖尿病数据运行以下代码时,我遇到了一些奇怪的错误:
library(mlbench)
library(caret)
library(randomForest)
data(PimaIndiansDiabetes)
data2<-data
## Define the task
Class.task = makeClassifTask(id = "USUBJID", data = data2, target = "Class", positive ="B")
Class.task = makeClassifTask( data = PimaIndiansDiabetes, target = "diabetes", positive ="pos")
fv = generateFilterValuesData(Class.task, method = "mrmr")
plotFilterValues(fv)
filtered.task = filterFeatures(Class.task, fval = fv, threshold = -.2)
#filtered.task = Class.task
n = getTaskSize(filtered.task)
train.set = sample(n, size = round(2/3 * n))
test.set = setdiff(seq_len(n), train.set)
lrn1 = makeLearner("classif.lda", predict.type = "prob")
mod1 = train(lrn1, filtered.task, subset = train.set)
pred1 = predict(mod1, task = filtered.task, subset = test.set)
lrn2 = makeLearner("classif.ksvm", predict.type = "prob")
mod2 = train(lrn2, filtered.task, subset = train.set)
pred2 = predict(mod2, task = filtered.task, subset = test.set)
lrn3 = makeLearner("classif.randomForest", predict.type = "prob")
mod3 = train(lrn3, Class.task, subset = train.set)
pred3 = predict(mod3, task = Class.task, subset = test.set)
lrn5 = makeLearner("classif.xgboost", predict.type = "prob")
mod5 = train(lrn5, Class.task, subset = train.set)
pred5 = predict(mod5, task = Class.task, subset = test.set)
### Tune wrapper for ksvm
rdesc.inner = makeResampleDesc("Holdout")
ms = list(auc, mmce)
ps = makeParamSet(
makeDiscreteParam("C", 2^(-1:1))
)
ctrl = makeTuneControlGrid()
lrn2 = makeTuneWrapper(lrn2, rdesc.inner,ms, ps, ctrl, show.info = FALSE)
lrns = list(lrn1, lrn2,lrn3,lrn5)
rdesc.outer = makeResampleDesc("CV", iters = 5)
bmr = benchmark(lrns, tasks = filtered.task, resampling = rdesc.outer, measures = ms, show.info = FALSE)
bmr
我得到的错误是:
Error in unique.default(x, nmax = nmax) :
unique() applies only to vectors
> pred1 = predict(mod1, task = filtered.task, subset = test.set)
Error in predict(mod1, task = filtered.task, subset = test.set) :
object 'mod1' not found
> lrn2 = makeLearner("classif.ksvm", predict.type = "prob")
> mod2 = train(lrn2, filtered.task, subset = train.set)
Error in unique.default(x, nmax = nmax) :
unique() applies only to vectors
> pred2 = predict(mod2, task = filtered.task, subset = test.set)
Error in predict(mod2, task = filtered.task, subset = test.set) :
object 'mod2' not found
> lrn3 = makeLearner("classif.randomForest", predict.type = "prob")
> mod3 = train(lrn3, Class.task, subset = train.set)
Error in unique.default(x, nmax = nmax) :
unique() applies only to vectors
> pred3 = predict(mod3, task = Class.task, subset = test.set)
Error in predict(mod3, task = Class.task, subset = test.set) :
object 'mod3' not found
>
> lrn5 = makeLearner("classif.xgboost", predict.type = "prob")
> mod5 = train(lrn5, Class.task, subset = train.set)
Error in unique.default(x, nmax = nmax) :
unique() applies only to vectors
> pred5 = predict(mod5, task = Class.task, subset = test.set)
Error in predict(mod5, task = Class.task, subset = test.set) :
我确实得到了性能结果..关于我做错了什么的任何想法?谢谢!!!
问题是你在caret
之前加载mlr
- 两者都有train
函数,一个影子另一个取决于你首先加载哪个。您需要最后加载mlr
包(这里根本不需要caret
)。
编辑:完整的工作代码
library(mlbench)
library(mlr)
data(PimaIndiansDiabetes)
Class.task = makeClassifTask( data = PimaIndiansDiabetes, target = "diabetes", positive ="pos")
fv = generateFilterValuesData(Class.task, method = "mrmr")
plotFilterValues(fv)
filtered.task = filterFeatures(Class.task, fval = fv, threshold = -.2)
#filtered.task = Class.task
n = getTaskSize(filtered.task)
train.set = sample(n, size = round(2/3 * n))
test.set = setdiff(seq_len(n), train.set)
lrn1 = makeLearner("classif.lda", predict.type = "prob")
mod1 = train(lrn1, filtered.task, subset = train.set)
pred1 = predict(mod1, task = filtered.task, subset = test.set)
lrn2 = makeLearner("classif.ksvm", predict.type = "prob")
mod2 = train(lrn2, filtered.task, subset = train.set)
pred2 = predict(mod2, task = filtered.task, subset = test.set)
lrn3 = makeLearner("classif.randomForest", predict.type = "prob")
mod3 = train(lrn3, Class.task, subset = train.set)
pred3 = predict(mod3, task = Class.task, subset = test.set)
lrn5 = makeLearner("classif.xgboost", predict.type = "prob")
mod5 = train(lrn5, Class.task, subset = train.set)
pred5 = predict(mod5, task = Class.task, subset = test.set)
### Tune wrapper for ksvm
rdesc.inner = makeResampleDesc("Holdout")
ms = list(auc, mmce)
ps = makeParamSet(
makeDiscreteParam("C", 2^(-1:1))
)
ctrl = makeTuneControlGrid()
lrn2 = makeTuneWrapper(lrn2, rdesc.inner,ms, ps, ctrl, show.info = FALSE)
lrns = list(lrn1, lrn2,lrn3,lrn5)
rdesc.outer = makeResampleDesc("CV", iters = 5)
bmr = benchmark(lrns, tasks = filtered.task, resampling = rdesc.outer, measures = ms, show.info = FALSE)
bmr