如何优化R中"expand.grid"或"combn"的使用



我有一个字符向量v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre"),我想将它们组合起来,准备一个完整的实验设计。所以我想为每一行生成一个data.frame,其中有一组n元素和尽可能多的行

v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
n <- 12
## TEST 1 : crach R
tmp <- data.frame(matrix(rep(v,n), ncol = n))
expand.grid(tmp)
## TEST 2 : 
temp = t(combn(rep(v,nbslot), nbslot))
#Error in matrix(r, nrow = len.r, ncol = count) : 
#  valeur 'ncol' incorrecte (trop grande ou NA)
#De plus : Warning message:
#In combn(rep(v, nbslot), nbslot) :
#  NAs introduced by coercion to integer range

似乎对n <- 8有效,但对n <- 12无效。如何跨越这个问题

您可以使用gtools包中的combinations

作为r=5的示例,也适用于r=12:

library(gtools)
combinations(length(v),v,r=5,repeats.allowed = T)
[,1]           [,2]           [,3]           [,4]           [,5]          
[1,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "aubergine"   
[2,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "carotte"     
[3,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "chou"        
[4,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "oignon"      
[5,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "piment"      
[6,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "pommeDeTerre"
[7,] "aubergine"    "aubergine"    "aubergine"    "carotte"      "carotte"     
...

也许这对您有所帮助。它包含6个变量的所有组合(至少1个,最多所有(,总共63:

# variables
vars <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# combinations by number of variables
L <- lapply(seq(vars), FUN = function(m){combn(x = seq(vars), m = m)})
# total number of combinations
(S <- sum(unlist(lapply(L, ncol)))) # 63
# a data.frame of the combinations
L2 <- lapply(L, FUN = function(x){apply(x, 2, function(y){
z <- rep(0, length(vars))
z[y] <- 1
z
})})
df <- as.data.frame(t(do.call("cbind", L2)))
df
#    piment aubergine carotte oignon chou pommeDeTerre
# 1       1         0       0      0    0            0
# 2       0         1       0      0    0            0
# 3       0         0       1      0    0            0
# 4       0         0       0      1    0            0
# 5       0         0       0      0    1            0
# 6       0         0       0      0    0            1
# 7       1         1       0      0    0            0
# 8       1         0       1      0    0            0
# 9       1         0       0      1    0            0
# 10      1         0       0      0    1            0
# 11      1         0       0      0    0            1
# 12      0         1       1      0    0            0
# 13      0         1       0      1    0            0
# 14      0         1       0      0    1            0
# 15      0         1       0      0    0            1
# 16      0         0       1      1    0            0
# 17      0         0       1      0    1            0
# 18      0         0       1      0    0            1
# 19      0         0       0      1    1            0
# 20      0         0       0      1    0            1
# 21      0         0       0      0    1            1
# 22      1         1       1      0    0            0
# 23      1         1       0      1    0            0
# 24      1         1       0      0    1            0
# 25      1         1       0      0    0            1
# 26      1         0       1      1    0            0
# 27      1         0       1      0    1            0
# 28      1         0       1      0    0            1
# 29      1         0       0      1    1            0
# 30      1         0       0      1    0            1
# 31      1         0       0      0    1            1
# 32      0         1       1      1    0            0
# 33      0         1       1      0    1            0
# 34      0         1       1      0    0            1
# 35      0         1       0      1    1            0
# 36      0         1       0      1    0            1
# 37      0         1       0      0    1            1
# 38      0         0       1      1    1            0
# 39      0         0       1      1    0            1
# 40      0         0       1      0    1            1
# 41      0         0       0      1    1            1
# 42      1         1       1      1    0            0
# 43      1         1       1      0    1            0
# 44      1         1       1      0    0            1
# 45      1         1       0      1    1            0
# 46      1         1       0      1    0            1
# 47      1         1       0      0    1            1
# 48      1         0       1      1    1            0
# 49      1         0       1      1    0            1
# 50      1         0       1      0    1            1
# 51      1         0       0      1    1            1
# 52      0         1       1      1    1            0
# 53      0         1       1      1    0            1
# 54      0         1       1      0    1            1
# 55      0         1       0      1    1            1
# 56      0         0       1      1    1            1
# 57      1         1       1      1    1            0
# 58      1         1       1      1    0            1
# 59      1         1       1      0    1            1
# 60      1         1       0      1    1            1
# 61      1         0       1      1    1            1
# 62      0         1       1      1    1            1
# 63      1         1       1      1    1            1

我根据您想要的行和n做出了一个假设,即您想要一个带有替换的集合,这样所有的事情都可以发生不止一次。我还假设,n为12的情况下,你永远不会使用所有的十亿个组合。那么我的功能是什么呢?

它会给你一个唯一的随机样本,所有样本都不同。

mysamples <- function(options, build = list(), samples, rows) {
if (length(build) < rows) {
build[[length(build) + 1]] <- sample(options, samples, replace = T)
build <- unique(build)
mysamples(options, build, samples, rows)
} else {
return(build)
}
}
v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# note that trying all combinations first would be 11441304000 combinations!
mysamples(options = v, samples = 50, rows = 1000)
# smaller sample allowing to show all results below
mysamples(options = v, samples = 5, rows = 10)
[[1]]
[1] "carotte"      "aubergine"    "piment"       "pommeDeTerre" "pommeDeTerre"
[[2]]
[1] "carotte"   "oignon"    "aubergine" "chou"      "oignon"   
[[3]]
[1] "piment"       "carotte"      "chou"         "pommeDeTerre" "carotte"     
[[4]]
[1] "oignon"       "oignon"       "aubergine"    "carotte"      "pommeDeTerre"
[[5]]
[1] "oignon"    "chou"      "piment"    "aubergine" "piment"   
[[6]]
[1] "chou"      "aubergine" "chou"      "aubergine" "oignon"   
[[7]]
[1] "chou"      "aubergine" "carotte"   "carotte"   "carotte"  
[[8]]
[1] "aubergine" "aubergine" "carotte"   "carotte"   "oignon"   
[[9]]
[1] "carotte"   "carotte"   "carotte"   "carotte"   "aubergine"
[[10]]
[1] "piment"    "aubergine" "aubergine" "chou"      "oignon" 

mysamples(options = v, samples = 50, rows = 2)
[[1]]
[1] "pommeDeTerre" "carotte"      "aubergine"    "aubergine"    "pommeDeTerre" "oignon"       "carotte"      "aubergine"    "pommeDeTerre" "chou"         "chou"         "carotte"      "pommeDeTerre"
[14] "piment"       "carotte"      "oignon"       "piment"       "chou"         "chou"         "pommeDeTerre" "piment"       "oignon"       "carotte"      "aubergine"    "pommeDeTerre" "piment"      
[27] "aubergine"    "pommeDeTerre" "chou"         "pommeDeTerre" "pommeDeTerre" "carotte"      "oignon"       "piment"       "oignon"       "piment"       "chou"         "pommeDeTerre" "carotte"     
[40] "carotte"      "oignon"       "chou"         "oignon"       "pommeDeTerre" "chou"         "oignon"       "oignon"       "oignon"       "carotte"      "chou"        
[[2]]
[1] "aubergine"    "piment"       "oignon"       "piment"       "oignon"       "oignon"       "piment"       "chou"         "chou"         "carotte"      "chou"         "pommeDeTerre" "piment"      
[14] "chou"         "chou"         "piment"       "aubergine"    "pommeDeTerre" "chou"         "aubergine"    "chou"         "piment"       "carotte"      "pommeDeTerre" "chou"         "pommeDeTerre"
[27] "oignon"       "pommeDeTerre" "piment"       "oignon"       "piment"       "oignon"       "carotte"      "oignon"       "pommeDeTerre" "oignon"       "piment"       "piment"       "carotte"     
[40] "piment"       "aubergine"    "chou"         "oignon"       "oignon"       "pommeDeTerre" "oignon"       "oignon"       "aubergine"    "piment"       "aubergine"   

最新更新