我有这个数据帧(可复制(:
structure(list(age = c(62.84998, 60.33899, 52.74698, 42.38498
), death = c(0, 1, 1, 1), sex = c("male", "female", "female",
"female"), hospdead = c(0, 1, 0, 0), slos = c(5, 4, 17, 3), d.time = c(2029,
4, 47, 133), dzgroup = c("Lung Cancer", "Cirrhosis", "Cirrhosis",
"Lung Cancer"), dzclass = c("Cancer", "COPD/CHF/Cirrhosis", "COPD/CHF/Cirrhosis",
"Cancer"), num.co = c(0, 2, 2, 2), edu = c(11, 12, 12, 11), income = c("$11-$25k",
"$11-$25k", "under $11k", "under $11k"), scoma = c(0, 44, 0,
0), charges = c(9715, 34496, 41094, 3075), totcst = c(NA_real_,
NA_real_, NA_real_, NA_real_), totmcst = c(NA_real_, NA_real_,
NA_real_, NA_real_), avtisst = c(7, 29, 13, 7), race = c("other",
"white", "white", "white"), sps = c(33.8984375, 52.6953125, 20.5,
20.0976562), aps = c(20, 74, 45, 19), surv2m = c(0.262939453,
0.0009999275, 0.790893555, 0.698974609), surv6m = c(0.0369949341,
0, 0.664916992, 0.411987305), hday = c(1, 3, 4, 1), diabetes = c(0,
0, 0, 0), dementia = c(0, 0, 0, 0), ca = c("metastatic", "no",
"no", "metastatic"), prg2m = c(0.5, 0, 0.75, 0.899999619), prg6m = c(0.25,
0, 0.5, 0.5), dnr = c("no dnr", NA, "no dnr", "no dnr"), dnrday = c(5,
NA, 17, 3), meanbp = c(97, 43, 70, 75), wblc = c(6, 17.0976562,
8.5, 9.09960938), hrt = c(69, 112, 88, 88), resp = c(22, 34,
28, 32), temp = c(36, 34.59375, 37.39844, 35), pafi = c(388,
98, 231.65625, NA), alb = c(1.7998047, NA, NA, NA), bili = c(0.19998169,
NA, 2.19970703, NA), crea = c(1.19995117, 5.5, 2, 0.79992676),
sod = c(141, 132, 134, 139), ph = c(7.459961, 7.25, 7.459961,
NA), glucose = c(NA_real_, NA_real_, NA_real_, NA_real_),
bun = c(NA_real_, NA_real_, NA_real_, NA_real_), urine = c(NA_real_,
NA_real_, NA_real_, NA_real_), adlp = c(7, NA, 1, 0), adls = c(7,
1, 0, 0), sfdm2 = c(NA, "<2 mo. follow-up", "<2 mo. follow-up",
"no(M2 and SIP pres)"), adlsc = c(7, 1, 0, 0)), row.names = c(NA,
4L), class = "data.frame")
我还制作了一张SUPPORT第3天生理学评分(sps(的图表,该评分按原发疾病类别(dzclass(分组,数据帧名称为SB_xlsx。
SB_xlsx = SB_xlsx[!is.na(SB_xlsx$sps), ]
ggplot(SB_xlsx, aes(x=dzclass, y=sps)) + geom_boxplot() + ggtitle("Box Plot - sps by Primary Disease Class") + xlab("Disease Class") + ylab("sps")
现在,我想测试人群的平均支持第3天生理学分数在原发疾病类别之间是否存在差异。我认为运行成对的t.tests是一个不错的选择,但我的代码有问题。
mu.diff = 0
alpha = 0.05
combs = combn(unique(SB_xlsx$dzclass), 2)
for (s in 1:ncol(combs)) {
i = combs[1, s]
j = combs[2, s]
tt = t.test(sps ~ dzclass, mu=mu.diff, var_equal=FALSE,
conf_level=1-alpha, alternative='two.sided',
data=SB_xlsx[dzclass == i | dzclass == j, ])
print(tt)
}
这是我目前掌握的代码。理论上应该是可行的,但由于某种原因,我犯了一个错误"eval(m$data,parent.frame(((中出错:找不到对象"dzclass";。我对此感到困惑,因为dzclass显然在我的数据集中,所以我不确定它是怎么找不到的。
dzclass不是已知对象,但如果在子集参数中使用,则是已知对象。请注意,双面是默认设置,因此不必指定。
tt = t.test(sps ~ dzclass, data = SB_xlsx, subset = dzclass == i | dzclass == j,
mu = mu.diff, var_equal = FALSE, conf_level = 1 - alpha)
更进一步,我们可以这样缩短代码:
for(s in as.data.frame(combs)) {
tt = t.test(sps ~ dzclass, data = SB_xlsx, subset = dzclass %in% s,
mu = mu.diff, var_equal = FALSE, conf_level = 1 - alpha)
print(tt)
}