我需要一些帮助来创建R中的SVM模型和ROC曲线。我遇到了几个错误:级别错误(data[,"pred"](:缺少参数"data",没有默认值。错误:每行至少有一个丢失的值。我该怎么修?提前感谢!!以下是数据集的谷歌驱动器链接:数据
这是我尝试过的代码:
library(caret)
library(pROC)
fitControl <- trainControl(method = "repeatedcv",
number = 10,
repeats= 10,
classProbs = TRUE,
summaryFunction = twoClassSummary())
data<-read.csv("full_train_binary.csv")
data_training<-subset(data[which(data$X==0),])
data_testing<-subset(data[which(data$X==1),])
training<-data_training
testing<-data_testing
cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
training<-training[,!(colnames(training)%in%cols_remove)]
testing<-testing[,!(colnames(testing)%in%cols_remove)]
set.seed(825)
start.time <- Sys.time()
svm_one <- train(death~., data = training,
method = 'svmRadial',
trControl = fitControl,
verbose = FALSE,
tunelength=5,
metric="ROC")
svm_one
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))
pROC::auc(roc_svm_one)
结果:
> library(caret)
> library(pROC)
> fitControl <- trainControl(method = "repeatedcv",
+ number = 10,
+ repeats= 10,
+ classProbs = TRUE,
+ summaryFunction = twoClassSummary())
Error in levels(data[, "pred"]) :
argument "data" is missing, with no default
>
> data<-read.csv("full_train_binary.csv")
>
> data_training<-subset(data[which(data$X==0),])
> data_testing<-subset(data[which(data$X==1),])
>
> training<-data_training
> testing<-data_testing
>
> cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
>
> training<-training[,!(colnames(training)%in%cols_remove)]
> testing<-testing[,!(colnames(testing)%in%cols_remove)]
>
> set.seed(825)
>
> start.time <- Sys.time()
>
> svm_one <- train(death~., data = training,
+ method = 'svmRadial',
+ trControl = fitControl,
+ verbose = FALSE,
+ tunelength=5,
+ metric="ROC")
Error: Every row has at least one missing value were found
> svm_one
Support Vector Machines with Radial Basis Function Kernel
4911 samples
1954 predictors
2 classes: 'False', 'True'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 3928, 3928, 3929, 3930, 3929
Resampling results across tuning parameters:
sigma C Accuracy Kappa
1.976927e-05 192.56972 0.7448586 -0.0004065338
2.778991e-05 242.26352 0.7446545 0.0007460142
3.273858e-05 14.39494 0.7450623 0.0000000000
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 3.273858e-05 and C
= 14.39494.
>
> end.time <- Sys.time()
> time.taken <- end.time - start.time
> time.taken
Time difference of 0.395869 secs
>
>
> svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
Error in eval(predvars, data, env) : object 'patient_sk' not found
> roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))
Error in as.vector(svm_one_pred[, 1]) : object 'svm_one_pred' not found
> pROC::auc(roc_svm_one)
Error in pROC::auc(roc_svm_one) : object 'roc_svm_one' not found
好的,我只能得到你的文件的25行,当我在full_train_binary.csv中阅读时,不知何故,我以200多列全零结束。
看看上面的输出,你似乎有4000行,所以我只是在下面对你的代码提出了一些建议(在#中添加了注释(,希望你能运行它而不会出错:
library(caret)
library(pROC)
# just twoClassSummary without ()
fitControl <- trainControl(method = "repeatedcv",
number = 10,
repeats= 10,
classProbs = TRUE,
summaryFunction = twoClassSummary)
data<-read.csv("full_train_binary.csv")
cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
# remove this here
data = data[,setdiff(colnames(data),cols_remove)]
# create an index to train 70% of your data
#your previous subset(..) just doesn't make sense
idx = sample(nrow(data),round(0.7*nrow(data)))
data_training<-data[idx,]
data_testing<-data[-idx,]
svm_one <- train(death~., data = training,
method = 'svmRadial',
trControl = fitControl,
verbose = FALSE,
tunelength=5,
metric="ROC")
svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))