在R中使用SVM创建模型

我需要一些帮助来创建R中的SVM模型和ROC曲线。我遇到了几个错误：级别错误(data[，"pred"](：缺少参数"data"，没有默认值。错误：每行至少有一个丢失的值。我该怎么修？提前感谢！！以下是数据集的谷歌驱动器链接：数据

这是我尝试过的代码：

library(caret)
library(pROC)
fitControl <- trainControl(method = "repeatedcv",
number = 10, 
repeats= 10,
classProbs =  TRUE,
summaryFunction = twoClassSummary())            
data<-read.csv("full_train_binary.csv")
data_training<-subset(data[which(data$X==0),])
data_testing<-subset(data[which(data$X==1),])
training<-data_training
testing<-data_testing
cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
training<-training[,!(colnames(training)%in%cols_remove)]
testing<-testing[,!(colnames(testing)%in%cols_remove)]
set.seed(825)
start.time <- Sys.time()
svm_one <- train(death~., data = training, 
method = 'svmRadial',                   
trControl = fitControl, 
verbose = FALSE,
tunelength=5,
metric="ROC")
svm_one
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken

svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))
pROC::auc(roc_svm_one)

结果：

> library(caret)
> library(pROC)
> fitControl <- trainControl(method = "repeatedcv",
+                            number = 10, 
+                            repeats= 10,
+                            classProbs =  TRUE,
+                            summaryFunction = twoClassSummary())            
Error in levels(data[, "pred"]) : 
argument "data" is missing, with no default
> 
> data<-read.csv("full_train_binary.csv")
> 
> data_training<-subset(data[which(data$X==0),])
> data_testing<-subset(data[which(data$X==1),])
> 
> training<-data_training
> testing<-data_testing
> 
> cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
> 
> training<-training[,!(colnames(training)%in%cols_remove)]
> testing<-testing[,!(colnames(testing)%in%cols_remove)]
> 
> set.seed(825)
> 
> start.time <- Sys.time()
> 
> svm_one <- train(death~., data = training, 
+                  method = 'svmRadial',                      
+                  trControl = fitControl, 
+                  verbose = FALSE,
+                  tunelength=5,
+                  metric="ROC")
Error: Every row has at least one missing value were found
> svm_one
Support Vector Machines with Radial Basis Function Kernel 
4911 samples
1954 predictors
2 classes: 'False', 'True' 
No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 3928, 3928, 3929, 3930, 3929 
Resampling results across tuning parameters:
sigma         C          Accuracy   Kappa        
1.976927e-05  192.56972  0.7448586  -0.0004065338
2.778991e-05  242.26352  0.7446545   0.0007460142
3.273858e-05   14.39494  0.7450623   0.0000000000
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 3.273858e-05 and C
= 14.39494.
> 
> end.time <- Sys.time()
> time.taken <- end.time - start.time
> time.taken
Time difference of 0.395869 secs
> 
> 
> svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
Error in eval(predvars, data, env) : object 'patient_sk' not found
> roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))
Error in as.vector(svm_one_pred[, 1]) : object 'svm_one_pred' not found
> pROC::auc(roc_svm_one)
Error in pROC::auc(roc_svm_one) : object 'roc_svm_one' not found

好的，我只能得到你的文件的25行，当我在full_train_binary.csv中阅读时，不知何故，我以200多列全零结束。

看看上面的输出，你似乎有4000行，所以我只是在下面对你的代码提出了一些建议(在#中添加了注释(，希望你能运行它而不会出错：

library(caret)
library(pROC)
# just twoClassSummary without ()
fitControl <- trainControl(method = "repeatedcv",
number = 10, 
repeats= 10,
classProbs =  TRUE,
summaryFunction = twoClassSummary)            
data<-read.csv("full_train_binary.csv")
cols_remove <- c("patient_sk","New_admitted_dt_tm", "New_discharge_dt_tm")
# remove this here
data = data[,setdiff(colnames(data),cols_remove)]
# create an index to train 70% of your data
#your previous subset(..) just doesn't make sense
idx = sample(nrow(data),round(0.7*nrow(data)))
data_training<-data[idx,]
data_testing<-data[-idx,]
svm_one <- train(death~., data = training, 
method = 'svmRadial',                      
trControl = fitControl, 
verbose = FALSE,
tunelength=5,
metric="ROC")
svm_one_pred <- predict(svm_one, newdata=testing,type = 'prob')
roc_svm_one <- roc(testing$death, as.vector(svm_one_pred[,1]))

相关内容

最新更新

热门标签：