我试图生成1000个随机样本,并且需要将每个样本的测试p值是否大于reject_collect对象中的alpha,以及10的真实平均值是否在生成的每个样本的CI中。我的对象目前只有一个值。
reject_collect <- NULL
CI_include_collect <- NULL
for ( i in c(1:1000)) {
random_vector_index <- rnorm( 50, mean = 10, sd = 2)
alpha <- 0.05
mean(random_vector_index)
test_results_index <- t.test(random_vector_index, mu=10,
alternative = "two.sided",
conf.level = 0.95)
test_results_index$p.value
reject_collect <- test_results_index$p.value < alpha
CI_include_collect <- between(10,
test_results_index$conf.int[1],
test_results_index$conf.int[2])
}
@margusl向您展示了如何在循环中正确索引。下面是不带循环的选项:
library(tidyverse)
tibble(replicate = 1:1000) |>
mutate(random_vector_index = map(replicate, (x) {
set.seed(x)
rnorm( 50, mean = 10, sd = 2)
}),
test_results_index = map(random_vector_index,
(x) t.test(x,
mu=10,
alternative = "two.sided",
conf.level = 0.95)),
p.value = map_dbl(test_results_index, (x) x$p.value),
reject_collect = map_lgl(p.value, (x) x < 0.05),
CI_include_collect = map_lgl(test_results_index,
(x) between(10,
x$conf.int[1],
x$conf.int[2]))) |>
select(replicate, p.value, reject_collect, reject_collect, CI_include_collect)
#> # A tibble: 1,000 x 4
#> replicate p.value reject_collect CI_include_collect
#> <int> <dbl> <lgl> <lgl>
#> 1 1 0.397 FALSE TRUE
#> 2 2 0.667 FALSE TRUE
#> 3 3 0.614 FALSE TRUE
#> 4 4 0.0767 FALSE TRUE
#> 5 5 0.669 FALSE TRUE
#> 6 6 0.562 FALSE TRUE
#> 7 7 0.101 FALSE TRUE
#> 8 8 0.678 FALSE TRUE
#> 9 9 0.503 FALSE TRUE
#> 10 10 0.00758 TRUE FALSE
#> # ... with 990 more rows
你的循环正在覆盖那些相同的对象(reject_collect
&CI_include_collect
)。对于这种方法,您应该首先创建具有所需长度的向量(或列表),并在每次迭代中将结果存储在特定的索引处:
runs <- 1000
reject_collect <- vector(length = runs)
CI_include_collect <- vector(length = runs)
for (i in c(1:runs)) {
random_vector_index <- rnorm(50, mean = 10, sd = 2)
alpha <- 0.05
mean(random_vector_index)
test_results_index <- t.test(random_vector_index,
mu = 10,
alternative = "two.sided",
conf.level = 0.95
)
test_results_index$p.value
reject_collect[i] <- test_results_index$p.value < alpha
CI_include_collect[i] <- between(
10,
test_results_index$conf.int[1],
test_results_index$conf.int[2]
)
}
或者您可以考虑这样做,apply将所有结果收集到一个列表中,该列表稍后可以转换为数据帧,例如:
alpha <- 0.05
result <- lapply(1:1000, function(i){
random_vector_index <- rnorm( 50, mean = 10, sd = 2)
test_results_index <- t.test(random_vector_index, mu=10, alternative = "two.sided", conf.level = 0.95)
return(list(
"mean" = mean(random_vector_index),
"p" = test_results_index$p.value,
"reject" = test_results_index$p.value < alpha,
"CI_include" = dplyr::between(10, test_results_index$conf.int[1], test_results_index$conf.int[2])))
})
do.call(rbind.data.frame, result)
#> mean p reject CI_include
#> 1 10.311684 0.2765610597 FALSE TRUE
#> 2 10.183697 0.4474821536 FALSE TRUE
#> 3 10.041988 0.8781323640 FALSE TRUE
#> 4 9.700637 0.3162029238 FALSE TRUE
#> 5 10.335990 0.2313467109 FALSE TRUE
#> ...
#> 39 10.203395 0.4525939583 FALSE TRUE
#> 40 9.299052 0.0437908734 TRUE FALSE
#> 41 9.994436 0.9817983485 FALSE TRUE
#> ...