有没有一种方法可以让我同时运行外循环而不干扰内循环?
我正在失去数据。如果我以这种方式使用循环,表的性能如何?
注意:我知道有一些data.table函数可以像我做的那样进行组合,但它们不适用于我计划做的事情。
library(data.table)
library(MASS)
data(Insurance)
x <- setDT(Insurance)
names(x) <- tolower(names(x))
x[,`:=`(district_group = 'A')][district == 1 | district == 4, `:=` (district_group = 'B')]
vars <- c('group','age')
comb <- do.call("c", lapply(seq_along(vars), function(i) combn(vars, i, FUN = list)))
comb[[length(comb)+1]] <- character(0)
d <- c('district','district_group')
tmp <- list()
for(i in seq_along(d)){
for(j in seq_along(comb)){
y <- copy(x)
y <- y[,.(nclaims=sum(claims), region = d[i]), by = c(d[i],comb[[j]])]
setnames(y,old = d[i], new = 'region_id')
tmp[[j]] <- y
}
if(i==1){
tbl <- rbindlist(tmp, fill = TRUE)
}
if(i > 1){
tbl <- rbindlist(list(tbl, rbindlist(tmp, fill = TRUE)), fill = TRUE)
}
}
for(j in seq_along(tbl)){
set(tbl, which(is.na(tbl[[j]])),j,'#')
}
以下是使用foreach
&doParallel
library(data.table)
library(MASS)
library(foreach)
data(Insurance)
x <- setDT(Insurance)
names(x) <- tolower(names(x))
x[,`:=`(district_group = 'A')][district == 1 | district == 4, `:=` (district_group = 'B')]
vars <- c('group','age')
comb <- do.call("c", lapply(seq_along(vars), function(i) combn(vars, i, FUN = list)))
comb[[length(comb)+1]] <- character(0)
d <- c('district','district_group')
# as foreach return a list of df I wrap them in rbindlist which combine all the
# df in list into one data.table
tbl <- rbindlist(foreach(i = seq_along(d)) %do% {
tmp <- list()
for(j in seq_along(comb)){
y <- copy(x)
y <- y[,.(nclaims=sum(claims), region = d[i]), by = c(d[i],comb[[j]])]
setnames(y,old = d[i], new = 'region_id')
tmp[[j]] <- y
}
tbl <- rbindlist(tmp, fill = TRUE)
tbl
})
for(j in seq_along(tbl)){
set(tbl, which(is.na(tbl[[j]])),j,'#')
}
这是输出
tbl
#> region_id group nclaims region age
#> 1: 1 <1l 249 district #
#> 2: 1 1-1.5l 636 district #
#> 3: 1 1.5-2l 378 district #
#> 4: 1 >2l 118 district #
#> 5: 2 <1l 150 district #
#> ---
#> 146: A >2l 17 district_group 25-29
#> 147: A >2l 20 district_group 30-35
#> 148: A >2l 90 district_group >35
#> 149: B # 1707 district_group #
#> 150: A # 1444 district_group #
创建于2021-05-28由reprex包(v2.0.0(
如果这证实了你想要什么,那么你可以使用doParallel
包运行for循环并行,如下所示
library(data.table)
library(MASS)
library(doParallel)
# register parallel cores
registerDoParallel(detectCores())
# running outerloop in parallel using foreach & %dopar%
tbl <- rbindlist(foreach(i = seq_along(d)) %dopar% {
tmp <- list()
for(j in seq_along(comb)){
y <- copy(x)
y <- y[,.(nclaims=sum(claims), region = d[i]), by = c(d[i],comb[[j]])]
setnames(y,old = d[i], new = 'region_id')
tmp[[j]] <- y
}
tbl <- rbindlist(tmp, fill = TRUE)
tbl
})
for(j in seq_along(tbl)){
set(tbl, which(is.na(tbl[[j]])),j,'#')
}
tbl
#> region_id group nclaims region age
#> 1: 1 <1l 249 district #
#> 2: 1 1-1.5l 636 district #
#> 3: 1 1.5-2l 378 district #
#> 4: 1 >2l 118 district #
#> 5: 2 <1l 150 district #
#> ---
#> 146: A >2l 17 district_group 25-29
#> 147: A >2l 20 district_group 30-35
#> 148: A >2l 90 district_group >35
#> 149: B # 1707 district_group #
#> 150: A # 1444 district_group #
创建于2021-05-28由reprex包(v2.0.0(
快速比较将原始x数据乘以1000倍
您的代码
user system elapsed
26.046 8.298 19.034
用foreach
&%do%
user system elapsed
28.471 8.128 20.577
用CCD_ 6&%dopar%
user system elapsed
0.058 0.081 15.413