随机森林-如何使用插入符号计算训练后的ROC和ROC下的AUC



我使用caret包的train函数进行10倍交叉验证。我还通过在trControl中设置classProbs = TRUE,得到了预测类的类概率,如下:

myTrainingControl <- trainControl(method = "cv", 
                              number = 10, 
                              savePredictions = TRUE, 
                              classProbs = TRUE, 
                              verboseIter = TRUE)
randomForestFit = train(x = input[3:154], 
                        y = as.factor(input$Target), 
                        method = "rf", 
                        trControl = myTrainingControl, 
                        preProcess = c("center","scale"), 
                        ntree = 50)

我得到的输出预测如下:

  pred obs    0    1 rowIndex mtry Resample
1    0   1 0.52 0.48       28   12   Fold01
2    0   0 0.58 0.42       43   12   Fold01
3    0   1 0.58 0.42       51   12   Fold01
4    0   0 0.68 0.32       55   12   Fold01
5    0   0 0.62 0.38       59   12   Fold01
6    0   1 0.92 0.08       71   12   Fold01

现在我想用这个数据计算ROC和ROC下的AUC。我该如何做到这一点?

AUC示例:

rf_output=randomForest(x=predictor_data, y=target, importance = TRUE, ntree = 10001, proximity=TRUE, sampsize=sampsizes)
library(ROCR)
predictions=as.vector(rf_output$votes[,2])
pred=prediction(predictions,target)
perf_AUC=performance(pred,"auc") #Calculate the AUC value
AUC=perf_AUC@y.values[[1]]
perf_ROC=performance(pred,"tpr","fpr") #plot the actual ROC curve
plot(perf_ROC, main="ROC plot")
text(0.5,0.5,paste("AUC = ",format(AUC, digits=5, scientific=FALSE)))

或使用pROCcaret

library(caret)
library(pROC)
data(iris)

iris <- iris[iris$Species == "virginica" | iris$Species == "versicolor", ]
iris$Species <- factor(iris$Species)  # setosa should be removed from factor

samples <- sample(NROW(iris), NROW(iris) * .5)
data.train <- iris[samples, ]
data.test <- iris[-samples, ]
forest.model <- train(Species ~., data.train)
result.predicted.prob <- predict(forest.model, data.test, type="prob") # Prediction
result.roc <- roc(data.test$Species, result.predicted.prob$versicolor) # Draw ROC curve.
plot(result.roc, print.thres="best", print.thres.best.method="closest.topleft")
result.coords <- coords(result.roc, "best", best.method="closest.topleft", ret=c("threshold", "accuracy"))
print(result.coords)#to get threshold and accuracy

更新2019。这就是MLeval编写的目的(https://cran.r-project.org/web/packages/MLeval/index.html),它与Caret训练输出对象一起工作,以制作roc, PR曲线,校准曲线,并计算指标,如ROC-AUC,灵敏度,特异性等。它只用了一行就完成了所有这些,这对我的分析很有帮助,可能会让你感兴趣。

library(caret)
library(MLeval)
myTrainingControl <- trainControl(method = "cv", 
                                  number = 10, 
                                  savePredictions = TRUE, 
                                  classProbs = TRUE, 
                                  verboseIter = TRUE)
randomForestFit = train(x = Sonar[,1:60], 
                        y = as.factor(Sonar$Class), 
                        method = "rf", 
                        trControl = myTrainingControl, 
                        preProcess = c("center","scale"), 
                        ntree = 50)
##
x <- evalm(randomForestFit)
## get roc curve plotted in ggplot2
x$roc
## get AUC and other metrics
x$stdres

最新更新