我正在准备对多重插补数据集上的平均预测误差进行自举估计。我的函数似乎无法在范围内找到因变量。有没有办法规避呢?
多重插补运行平稳,但具体问题似乎是线路
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))
找不到变量CG.tot
:
eval(expr, envir, enclos) 中的错误:找不到对象 'CG.tot'
但是,如果我将公式声明为字符串:
glm.nb(formula=CG.tot~Fibrinogen)
它有效...
最小运行示例:
library(mice)
library(MASS)
#compute the mean prediction error on a dataframe with missing data
predicterr <- function(f, data, indices){
if(!(class(f)=="formula")){stop("'f' must be of the 'formula' type")}
if(!(class(data)=="data.frame")){stop("'data' must be of the 'data.frame' type")}
#recompute random sampling & multiple imputation
data.test <- data[sample(nrow(data), 15),]
data.train <- data[setdiff(rownames(data), rownames(data.test)),]
data.mi.train <- mice(data.train)
data.mi.test <- mice(data.test)
#recompute model
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))
coeffs <- summary(pool(mod.nb.train))[,"est"]
#compute prediction error on each dataset row
errvec <- apply(complete(data.mi.test, include = F, action = "long")[,c(names(coeffs)[-1], as.character(f)[2])],
1, function(x){
return(exp(sum(x[1:length(x)-1]*coeffs[-1], coeffs[1]))-x[length(x)])
})
return(mean(errvec))
}
predicterr(CG.tot~Fibrinogen, d.mi)
数据集(有点长,但这是为了插补...
d <- structure(list(Hb = c(7.5, 12.9, 12.9, 10.2, 10.5, 11.2, 12.7,
9.3, 11.7, 13.4, 151, 10.9, 5.9, 12.8, 10.2, 15.3, 13.8, 9.6,
7.6, 12.2, 11.1, 13.6, 8.9, 7.2, 7.8, 8.7, 10.3, 14, 8.8, 7.5
), Hct = c(23, 39.8, 39.4, 31.6, 32.5, 34.4, 39, 28, 35.9, 41.2,
43.8, 33.7, 18.6, 37.7, 31.7, 44, 87.3, 29.4, 23.6, 37.7, 34.3,
39.8, 27.4, 22.6, 24.2, 29.1, 31.8, 43.1, 27.3, 23.3), EXTEM.CT = c(51L,
60L, 45L, 115L, 55L, 48L, 49L, 106L, 56L, 68L, 61L, 53L, 69L,
44L, 58L, 126L, 47L, 68L, 49L, 68L, 51L, 84L, 63L, 66L, 51L,
108L, 63L, 51L, 53L, 63L), EXTEM.CFT = c(133L, 162L, 175L, 216L,
101L, 60L, 140L, 248L, 137L, 203L, 113L, 199L, 316L, 90L, 224L,
235L, 133L, 46L, 308L, 300L, 119L, 420L, 44L, 207L, 91L, 69L,
96L, 130L, 153L, 99L), EXTEM.MCF = c(59L, 55L, 50L, 46L, 64L,
72L, 52L, 46L, 50L, 50L, 60L, 40L, 40L, 56L, 46L, 47L, 52L, 67L,
40L, 35L, 83L, 30L, 82L, 47L, 61L, 76L, 63L, 51L, 58L, 58L),
INTEM.CT = c(NA, 158L, 154L, 240L, 141L, 141L, 143L, 122L,
104L, 193L, 183L, 186L, 182L, 172L, 192L, 149L, 133L, 162L,
238L, 158L, 144L, 144L, 162L, 213L, 139L, 157L, 104L, 376L,
140L, 192L), INTEM.CFT = c(NA, 91L, 119L, 165L, 97L, 51L,
118L, 190L, 84L, 90L, 82L, 114L, 226L, 90L, 89L, 209L, NA,
64L, 203L, 222L, 64L, 104L, 43L, 170L, 66L, 50L, 61L, 332L,
70L, 66L), INTEM.MCF = c(NA, 57L, 48L, 48L, 74L, 70L, 49L,
50L, 50L, 55L, 58L, 49L, 40L, 57L, 48L, 46L, 64L, 68L, 44L,
39L, 64L, 54L, 80L, 51L, 64L, 78L, 68L, 54L, 62L, 61L), FIBTEM.CT = c(50L,
62L, 101L, 123L, 58L, 49L, 49L, 74L, 77L, 117L, 61L, 54L,
79L, 41L, 69L, 189L, 49L, 67L, 55L, 56L, 57L, 59L, 56L, 62L,
57L, 65L, 51L, 58L, 68L, 67L), FIBTEM.CFT = c(NA, NA, NA,
NA, NA, 94L, NA, NA, NA, NA, NA, 615L, NA, 56L, NA, NA, NA,
79L, NA, NA, 625L, NA, 75L, NA, 892L, NA, NA, NA, NA, 1206L
), FIBTEM.MCF = c(9L, 9L, NA, 5L, 10L, 21L, 11L, 4L, 6L,
3L, 16L, 7L, 6L, 31L, NA, 4L, NA, 35L, 11L, 10L, 42L, NA,
28L, 13L, 22L, 28L, 8L, 7L, 9L, 21L), INR = c(1.14, 1, 1,
1.33, 1.01, 1.07, 1.06, 1.43, 1.22, 1.12, 1.18, 1.54, NA,
1.3, 1.13, 1.05, 1.09, 1.11, 1.49, 1.22, 1.33, 1.04, NA,
1.87, 1.67, 1, 1, 1.07, 1.12, 1.88), PTT = c(30, 28.4, 22.1,
37.8, 25.6, 28.9, 27.2, 32.7, 27.2, 28.9, 27.3, 69.9, 132,
31.9, 26.5, NA, 28.9, 44.3, 50.8, 36.6, NA, 23.5, 30, 70.6,
41.2, 30.1, 25.7, 26.7, 26, 41.9), Platelets = c(150, 193,
343, 138, 284, 216, 141, 291, 142, 230, 254, 126, NA, 249,
153, 308, 253, 66, 30, 41, 293, 208, 545, 141, 136, 256,
249, 305, 327, 112), Fibrinogen = c(1.3, NA, NA, 0.9, 2.1,
3.4, 2.3, 1.1, 1.5, 1.1, 1.8, 0.8, NA, 2.3, 2.4, NA, 2.2,
7.4, 1.8, 1.7, NA, 2.6, 7.1, 0.6, 1.2, NA, 1.1, 2.5, 1.7,
2), CG.tot = c(3L, 2L, 3L, 11L, 12L, 0L, 1L, 10L, 4L, 4L,
5L, 0L, 12L, 11L, 3L, 9L, 5L, 0L, 4L, 0L, 0L, 3L, 0L, 21L,
2L, 1L, 1L, 1L, 2L, 3L)), .Names = c("Hb", "Hct", "EXTEM.CT",
"EXTEM.CFT", "EXTEM.MCF", "INTEM.CT", "INTEM.CFT", "INTEM.MCF",
"FIBTEM.CT", "FIBTEM.CFT", "FIBTEM.MCF", "INR", "PTT", "Platelets",
"Fibrinogen", "CG.tot"), row.names = c(50L, 38L, 54L, 82L, 86L,
4L, 24L, 78L, 59L, 58L, 72L, 16L, 85L, 81L, 45L, 77L, 70L, 6L,
63L, 7L, 11L, 53L, 13L, 93L, 36L, 30L, 18L, 19L, 40L, 43L), class = "data.frame")
您在glm.nb
中缺少一个参数:
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f, environment()))
它有效。