如何在 R 中自动执行嵌套变量的 t 检验?



我想自动收集来自 t 检验的汇总统计数据。在下面的示例中,我嵌套了变量AgeLocationTreatment。对于每个AgeLocation,我想运行一个基于Treatment的t检验,该t检验具有两个分类名称Control&Treatment。换句话说,我想知道每个Age在每个Location的控制和治疗手段之间的区别。

我想在matrixTests中使用col_t_welch函数运行 t 检验,因为输出已经有几个我正在寻找的汇总统计数据(即mean.diffstderrpvalue)。如何设置数据帧 (df1) 以便能够为嵌套 t 检验提供 for 循环?

可重现的示例:

library(matrixTests)
library(ggplot2)
set.seed(123)
df1 <- data.frame(matrix(ncol = 4, nrow = 36))
x <- c("Age","Location","Treatment","Value")
colnames(df1) <- x
df1$Age <- as.factor(rep(c(1,2,3), each = 12))
df1$Location <- as.factor(rep(c("Central","North"), each = 6))
df1$Treatment <- as.factor(rep(c("Control","Treatment"), each = 3))
df1$Value <- round(rnorm(36,200,25),0)
# I can't get the for-loop below to work because I'm not sure how to set up the data frame, but I was thinking something along these lines.
i <- 1
p <- numeric(length = 3*2)
mean_diff <- numeric(length = 3*2)
SE_diff <- numeric(length = 3*2)
for(j in c("1", "2", "3")){
for(k in c("Control", "Treatment")){
ttest <- col_t_welch(Value, data = df1, subset = Age == j & Treatment == k))
p[i] <- a$pvalue
mean_diff[i] <- ttest$mean.diff
SE_diff[i] <- ttest$stderr
i <- i + 1
}
}

理想的最终数据框如下所示d2

d2 <- expand.grid(Age = rep(c(1,2,3), 1),
Location = rep(c("Central","North"), 1), 
mean_diff = NA,
SE_diff = NA,
pvalue = NA)
C1 <- df1[c(1:6),3:4]
N1 <- df1[c(7:12),3:4]
C2 <- df1[c(13:18),3:4]
N2 <- df1[c(19:24),3:4]
C3 <- df1[c(25:30),3:4]
N3 <- df1[c(31:36),3:4]
c1_mod <- col_t_welch(x=C1[1:3,2], y=C1[4:6,2])
n1_mod <- col_t_welch(x=N1[1:3,2], y=N1[4:6,2])
c2_mod <- col_t_welch(x=C2[1:3,2], y=C2[4:6,2])
n2_mod <- col_t_welch(x=N2[1:3,2], y=N2[4:6,2])
c3_mod <- col_t_welch(x=C3[1:3,2], y=C3[4:6,2])
n3_mod <- col_t_welch(x=N3[1:3,2], y=N3[4:6,2])
d2[1,3] <- c1_mod$mean.diff
d2[1,4] <- c1_mod$stderr
d2[1,5] <- c1_mod$pvalue
d2[2,3] <- c2_mod$mean.diff
d2[2,4] <- c2_mod$stderr
d2[2,5] <- c2_mod$pvalue
d2[3,3] <- c3_mod$mean.diff
d2[3,4] <- c3_mod$stderr
d2[3,5] <- c3_mod$pvalue
d2[4,3] <- n1_mod$mean.diff
d2[4,4] <- n1_mod$stderr
d2[4,5] <- n1_mod$pvalue
d2[5,3] <- n2_mod$mean.diff
d2[5,4] <- n2_mod$stderr
d2[5,5] <- n2_mod$pvalue
d2[6,3] <- n3_mod$mean.diff
d2[6,4] <- n3_mod$stderr
d2[6,5] <- n3_mod$pvalue
d2

我认为这可能会对您有所帮助

图书馆

library(matrixTests)
library(tidyverse)

数据

set.seed(123)
df1 <- data.frame(matrix(ncol = 4, nrow = 36))
x <- c("Age","Location","Treatment","Value")
colnames(df1) <- x
df1$Age <- as.factor(rep(c(1,2,3), each = 12))
df1$Location <- as.factor(rep(c("Central","North"), each = 6))
df1$Treatment <- as.factor(rep(c("Control","Treatment"), each = 3))
df1$Value <- round(rnorm(36,200,25),0)

如何

df1 %>% 
group_nest(Age,Location,Treatment) %>% 
pivot_wider(names_from = Treatment,values_from = data) %>% 
mutate(
test = map2(
.x = Control,
.y = Treatment,
.f = ~col_t_welch(.x,.y)
)
) %>% 
unnest(test) %>% 
select(Age,Location,pvalue,mean.diff,stderr)

结果

# A tibble: 6 x 5
Age   Location pvalue mean.diff stderr
<fct> <fct>     <dbl>     <dbl>  <dbl>
1 1     Central   0.675     -9.67  21.3 
2 1     North     0.282    -22     17.7 
3 2     Central   0.925     -3     28.4 
4 2     North     0.570      9.33  14.6 
5 3     Central   0.589    -14.7   25.0 
6 3     North     0.311    -11.3    8.59

最新更新