r语言 - 如何强制lapply绑定输出,就像apply对具有相同值的变量所做的那样?


apply(mtcars[,c('vs','am')],2,table)

生产

vs am
0 18 19
1 14 13

,

lapply(mtcars[,c('vs','am')],table)

生产

$vs
0  1 
18 14 
$am
0  1 
19 13 

我可以强制lapply像apply那样生成一个表吗?

最后,我想使用具有相同值的不同因变量来计算均值。我使用lapply,但不想在最后执行绑定:

func.break <- function(indy){
t(as.data.frame(mtcars 
%>% group_by(get(indy)) 
%>% summarise_at(depy, funs(mean))
)
)
}
indy <- c('vs','am') 
depy <- c('mpg','qsec')
res.list <- lapply(indy,func.break)
res.list
[[1]]
[,1]     [,2]
get(indy)  0.00000  1.00000
mpg       16.61667 24.55714
qsec      16.69389 19.33357
[[2]]
[,1]     [,2]
get(indy)  0.00000  1.00000
mpg       17.14737 24.39231
qsec      18.18316 17.36000
cbind(as.data.frame(res.list[1]),as.data.frame(res.list[1]))
X1       X2       X1       X2
get(indy)  0.00000  1.00000  0.00000  1.00000
mpg       16.61667 24.55714 16.61667 24.55714
qsec      16.69389 19.33357 16.69389 19.33357

我想有更优雅的方式吗?如何申请工作呢?

尝试list2DF

list2DF(lapply(mtcars[, c("vs", "am")], table))

,

vs am
1 18 19
2 14 13

更新

或者你可以试试as.data.frame

> as.data.frame(lapply(indy, func.break))
X1       X2     X1.1     X2.1
get(indy)  0.00000  1.00000  0.00000  1.00000
mpg       16.61667 24.55714 17.14737 24.39231
qsec      16.69389 19.33357 18.18316 17.36000

使用aggregate

FUN <- function(x) t(with(mtcars, aggregate(mtcars[, depy], list(y=get(x)), mean)))
do.call(cbind.data.frame, lapply(indy, FUN))
#             1        2        1        2
# y     0.00000  1.00000  0.00000  1.00000
# mpg  16.61667 24.55714 17.14737 24.39231
# qsec 16.69389 19.33357 18.18316 17.36000
计时

(See discussion in comments.)
set.seed(42)
mtcars <- mtcars[sample(nrow(mtcars), 5e5, replace=TRUE), ]
library(magrittr)
microbenchmark::microbenchmark(
list2DF=list2DF(lapply(mtcars[, c("vs", "am")], table))
apply=apply(mtcars[,c('vs','am')], 2, table),
rbind.lapply=t(do.call(rbind, lapply(mtcars[,c('vs','am')], table))),
sapply=sapply(mtcars[,c('vs','am')],table),
pipe=lapply(mtcars[,c('vs','am')],table) %>% do.call(rbind, .) %>% t(),
times=100L)
# Unit: seconds
#         expr      min       lq     mean   median       uq      max neval cld
#      list2DF 1.160465 1.170537 1.196288 1.202672 1.211612 1.278879   100  a 
#        apply 1.221822 1.264967 1.279215 1.270300 1.293056 1.391812   100   b
# rbind.lapply 1.163678 1.172187 1.198325 1.204445 1.214805 1.290071   100  a 
#       sapply 1.168295 1.174507 1.199146 1.207050 1.213422 1.315810   100  a 
#         pipe 1.167020 1.173511 1.203780 1.207331 1.215454 1.519427   100  a 

我建议func.break的另一种方法。

library(dplyr)
purrr::map_dfc(indy, ~mtcars %>% 
group_by(.data[[.x]]) %>% 
summarise(across(all_of(depy), mean, .names = '{.x}_{col}')))
#    vs vs_mpg vs_qsec    am am_mpg am_qsec
#  <dbl>  <dbl>   <dbl> <dbl>  <dbl>   <dbl>
#1     0   16.6    16.7     0   17.1    18.2
#2     1   24.6    19.3     1   24.4    17.4

相关内容

  • 没有找到相关文章

最新更新