我试图计算两组之间的绝对差异及其每行R的95%置信区间,并将其添加到名为";绝对差值_95CI";在相同的数据图中。将非常感谢任何建议
### my data ###
data <-read.table(text="
Variable Men Women Absolute.difference_95CI
n 979488 317716 NA
Family.history.of.smoking 222153 79810
Prior.MI 500340 166528
Peripheral.vascular.disease 128795 50008
Cerebrovascular.disease 173112 76815
", header=T, sep="t")
我的代码(从该链接获得(
data2<-data
for(i in 1:nrow(data2)) { # for-loop over rows
m=data2$Men
w=data2$Women
a<-prop.test(x=c(me,we), n=c(m,w), correct=FALSE);
data2$Absolute.difference_95CI <- paste0( round ( (a[["estimate"]][1]- a[["estimate"]][2]), digits=3)," (", round(a[["conf.int"]][1], digits=3),"-", round(a[["conf.int"]][2],digits=3),")")
}
首先,将数据转换为改进的较长格式,为男性和女性的n
单独设置一列。
library(data.table)
setDT(data)
data = melt(data, "Variable")[, N:=max(value[Variable=="n"]), variable][Variable!="n"]
然后,使用下面的辅助函数f()
来执行prop.test()
,并返回列表中的值。
pt = prop.test(x,n)
list("men" = round(pt$estimate[1],d), "women" = round(pt$estimate[2],d),
"diff" = round(pt$estimate[1] - pt$estimate[2],d),
"95% CI" = paste0("(",round(pt$conf.int[1],d), " : ", round(pt$conf.int[2],d),")")
)
}
然后,通过Variable
将该函数应用于您的长格式数据
data[, f(value, N),Variable]
输出:
Variable men women diff 95% CI
1: Family.history.of.smoking 0.227 0.251 -0.024 (-0.026 : -0.023)
2: Prior.MI 0.511 0.524 -0.013 (-0.015 : -0.011)
3: Peripheral.vascular.disease 0.131 0.157 -0.026 (-0.027 : -0.024)
4: Cerebrovascular.disease 0.177 0.242 -0.065 (-0.067 : -0.063)
输入:
data = structure(list(Variable = c("n", "Family.history.of.smoking",
"Prior.MI", "Peripheral.vascular.disease", "Cerebrovascular.disease"
), Men = c(979488L, 222153L, 500340L, 128795L, 173112L), Women = c(317716L,
79810L, 166528L, 50008L, 76815L)), row.names = c(NA, -5L), class = "data.frame")