r-如何一次运行多个外循环



有没有一种方法可以让我同时运行外循环而不干扰内循环?

我正在失去数据。如果我以这种方式使用循环,表的性能如何?

注意:我知道有一些data.table函数可以像我做的那样进行组合,但它们不适用于我计划做的事情。

library(data.table)
library(MASS)
data(Insurance)
x <- setDT(Insurance)
names(x) <- tolower(names(x))
x[,`:=`(district_group = 'A')][district == 1 | district == 4, `:=` (district_group = 'B')]
vars <- c('group','age')
comb <- do.call("c", lapply(seq_along(vars), function(i) combn(vars, i, FUN = list)))
comb[[length(comb)+1]] <- character(0)
d <- c('district','district_group')
tmp <- list()
for(i in seq_along(d)){
for(j in seq_along(comb)){
y <- copy(x)
y <- y[,.(nclaims=sum(claims), region = d[i]), by = c(d[i],comb[[j]])]
setnames(y,old = d[i], new = 'region_id')
tmp[[j]] <- y

}
if(i==1){
tbl <- rbindlist(tmp, fill = TRUE)
}
if(i > 1){
tbl <- rbindlist(list(tbl, rbindlist(tmp, fill = TRUE)), fill = TRUE)
}
}
for(j in seq_along(tbl)){
set(tbl, which(is.na(tbl[[j]])),j,'#')
}

以下是使用foreach&doParallel

library(data.table)
library(MASS)
library(foreach)
data(Insurance)
x <- setDT(Insurance)
names(x) <- tolower(names(x))
x[,`:=`(district_group = 'A')][district == 1 | district == 4, `:=` (district_group = 'B')]
vars <- c('group','age')
comb <- do.call("c", lapply(seq_along(vars), function(i) combn(vars, i, FUN = list)))
comb[[length(comb)+1]] <- character(0)
d <- c('district','district_group')
# as foreach return a list of df I wrap them in rbindlist which combine all the 
# df in list into one data.table
tbl <- rbindlist(foreach(i = seq_along(d)) %do% {
tmp <- list()
for(j in seq_along(comb)){
y <- copy(x)
y <- y[,.(nclaims=sum(claims), region = d[i]), by = c(d[i],comb[[j]])]
setnames(y,old = d[i], new = 'region_id')
tmp[[j]] <- y

}

tbl <- rbindlist(tmp, fill = TRUE)

tbl
})
for(j in seq_along(tbl)){
set(tbl, which(is.na(tbl[[j]])),j,'#')
}

这是输出

tbl
#>      region_id  group nclaims         region   age
#>   1:         1    <1l     249       district     #
#>   2:         1 1-1.5l     636       district     #
#>   3:         1 1.5-2l     378       district     #
#>   4:         1    >2l     118       district     #
#>   5:         2    <1l     150       district     #
#>  ---                                              
#> 146:         A    >2l      17 district_group 25-29
#> 147:         A    >2l      20 district_group 30-35
#> 148:         A    >2l      90 district_group   >35
#> 149:         B      #    1707 district_group     #
#> 150:         A      #    1444 district_group     #

创建于2021-05-28由reprex包(v2.0.0(

如果这证实了你想要什么,那么你可以使用doParallel包运行for循环并行,如下所示

library(data.table)
library(MASS)
library(doParallel)
# register parallel cores
registerDoParallel(detectCores())
# running outerloop in parallel using foreach & %dopar%
tbl <- rbindlist(foreach(i = seq_along(d)) %dopar% {
tmp <- list()
for(j in seq_along(comb)){
y <- copy(x)
y <- y[,.(nclaims=sum(claims), region = d[i]), by = c(d[i],comb[[j]])]
setnames(y,old = d[i], new = 'region_id')
tmp[[j]] <- y

}

tbl <- rbindlist(tmp, fill = TRUE)

tbl
})
for(j in seq_along(tbl)){
set(tbl, which(is.na(tbl[[j]])),j,'#')
}
tbl
#>      region_id  group nclaims         region   age
#>   1:         1    <1l     249       district     #
#>   2:         1 1-1.5l     636       district     #
#>   3:         1 1.5-2l     378       district     #
#>   4:         1    >2l     118       district     #
#>   5:         2    <1l     150       district     #
#>  ---                                              
#> 146:         A    >2l      17 district_group 25-29
#> 147:         A    >2l      20 district_group 30-35
#> 148:         A    >2l      90 district_group   >35
#> 149:         B      #    1707 district_group     #
#> 150:         A      #    1444 district_group     #

创建于2021-05-28由reprex包(v2.0.0(

快速比较将原始x数据乘以1000倍

您的代码

user  system elapsed 
26.046   8.298  19.034 

foreach&%do%

user  system elapsed 
28.471   8.128  20.577

用CCD_ 6&%dopar%

user  system elapsed 
0.058   0.081  15.413

最新更新