r语言 - 使用for循环生成1000个随机样本



我试图生成1000个随机样本,并且需要将每个样本的测试p值是否大于reject_collect对象中的alpha,以及10的真实平均值是否在生成的每个样本的CI中。我的对象目前只有一个值。

reject_collect <- NULL
CI_include_collect <- NULL
for ( i in c(1:1000)) { 
random_vector_index <- rnorm( 50, mean = 10, sd = 2)
alpha <- 0.05
mean(random_vector_index)
test_results_index <- t.test(random_vector_index, mu=10, 
alternative = "two.sided", 
conf.level = 0.95)
test_results_index$p.value
reject_collect <- test_results_index$p.value < alpha
CI_include_collect <- between(10, 
test_results_index$conf.int[1], 
test_results_index$conf.int[2])
}

@margusl向您展示了如何在循环中正确索引。下面是不带循环的选项:

library(tidyverse)

tibble(replicate = 1:1000) |>
mutate(random_vector_index = map(replicate, (x) {
set.seed(x)
rnorm( 50, mean = 10, sd = 2)
}),
test_results_index = map(random_vector_index, 
(x) t.test(x,
mu=10,
alternative = "two.sided",
conf.level = 0.95)),
p.value = map_dbl(test_results_index, (x) x$p.value),
reject_collect = map_lgl(p.value, (x) x < 0.05),
CI_include_collect = map_lgl(test_results_index, 
(x) between(10,
x$conf.int[1],
x$conf.int[2]))) |>
select(replicate, p.value, reject_collect, reject_collect, CI_include_collect)
#> # A tibble: 1,000 x 4
#>    replicate p.value reject_collect CI_include_collect
#>        <int>   <dbl> <lgl>          <lgl>             
#>  1         1 0.397   FALSE          TRUE              
#>  2         2 0.667   FALSE          TRUE              
#>  3         3 0.614   FALSE          TRUE              
#>  4         4 0.0767  FALSE          TRUE              
#>  5         5 0.669   FALSE          TRUE              
#>  6         6 0.562   FALSE          TRUE              
#>  7         7 0.101   FALSE          TRUE              
#>  8         8 0.678   FALSE          TRUE              
#>  9         9 0.503   FALSE          TRUE              
#> 10        10 0.00758 TRUE           FALSE             
#> # ... with 990 more rows

你的循环正在覆盖那些相同的对象(reject_collect&CI_include_collect)。对于这种方法,您应该首先创建具有所需长度的向量(或列表),并在每次迭代中将结果存储在特定的索引处:

runs <- 1000
reject_collect <- vector(length = runs)
CI_include_collect <- vector(length = runs)
for (i in c(1:runs)) {
random_vector_index <- rnorm(50, mean = 10, sd = 2)
alpha <- 0.05
mean(random_vector_index)
test_results_index <- t.test(random_vector_index,
mu = 10,
alternative = "two.sided",
conf.level = 0.95
)
test_results_index$p.value
reject_collect[i] <- test_results_index$p.value < alpha
CI_include_collect[i] <- between(
10,
test_results_index$conf.int[1],
test_results_index$conf.int[2]
)
}

或者您可以考虑这样做,apply将所有结果收集到一个列表中,该列表稍后可以转换为数据帧,例如:

alpha <- 0.05
result <- lapply(1:1000, function(i){
random_vector_index <- rnorm( 50, mean = 10, sd = 2)
test_results_index <- t.test(random_vector_index, mu=10, alternative = "two.sided", conf.level = 0.95)
return(list(
"mean" = mean(random_vector_index),
"p" = test_results_index$p.value,
"reject" = test_results_index$p.value < alpha,
"CI_include" =  dplyr::between(10, test_results_index$conf.int[1], test_results_index$conf.int[2])))
})
do.call(rbind.data.frame, result)
#>           mean            p reject CI_include
#> 1    10.311684 0.2765610597  FALSE       TRUE
#> 2    10.183697 0.4474821536  FALSE       TRUE
#> 3    10.041988 0.8781323640  FALSE       TRUE
#> 4     9.700637 0.3162029238  FALSE       TRUE
#> 5    10.335990 0.2313467109  FALSE       TRUE
#> ...
#> 39   10.203395 0.4525939583  FALSE       TRUE
#> 40    9.299052 0.0437908734   TRUE      FALSE
#> 41    9.994436 0.9817983485  FALSE       TRUE
#> ...

最新更新