r语言 - summarise_each() with across() for dplyr package



我有这个脚本,我想知道如何用across()函数替换summarise_each()?

common_bw_elements = df %>% 
group_by(range_of_commons = cut(common_IDs, 
breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>% 
summarise_each(funs(sum), sum_of_instances = frequent)

我问这个,因为我得到以下消息:

Warning message: summarise_each() is deprecated as of dplyr 0.7.0. Please use across() instead.

我的代码与下面的帖子非常相似:使用dplyr

将组总结为间隔如有任何线索,我将不胜感激。

作为参考,您可以使用以下dput()

dput(df)
structure(list(common_IDs = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 17L, 18L, 25L, 26L, 27L, 37L, 51L, 55L, 56L, 63L, 68L, 69L, 70L, 71L, 74L, 76L, 81L, 84L, 86L, 87L, 89L, 90L, 91L, 92L, 101L, 
103L, 108L, 109L, 110L, 113L, 114L, 115L, 116L, 129L, 130L, 131L, 133L, 135L, 136L, 137L, 138L, 139L, 141L, 152L, 153L, 154L, 177L, 178L, 190L, 191L, 196L, 199L, 202L, 203L, 208L, 209L, 210L, 211L, 213L, 214L, 215L, 216L, 218L, 219L, 222L, 223L, 229L, 230L, 231L, 
232L, 239L, 251L, 252L, 254L, 257L, 264L, 265L, 271L, 272L, 273L, 275L, 276L, 277L, 280L, 293L, 294L, 297L, 298L, 299L, 300L, 301L, 304L, 317L, 320L, 337L, 346L, 347L, 364L, 371L, 373L, 386L, 387L, 389L, 412L, 417L, 419L, 420L, 432L, 440L, 441L, 442L, 443L, 451L, 
452L, 453L, 455L, 456L, 457L, 458L, 462L, 463L, 464L, 469L, 470L, 474L, 476L, 477L, 478L, 487L, 488L, 492L, 1484L, 1534L, 1546L, 1561L, 1629L, 1642L, 1670L, 1672L, 1681L, 1698L, 1723L, 1725L, 
1736L, 1738L, 1745L, 1753L, 1759L, 1764L, 1766L, 1767L, 1770L, 1772L, 1775L, 1776L, 1781L, 1784L, 1787L, 1791L, 1802L, 1807L, 1813L, 1815L, 1817L, 1821L, 1823L, 1825L, 1846L, 1850L, 1852L, 
1853L, 1854L, 1857L, 1858L, 1859L, 1868L, 1899L, 1904L, 1911L, 1913L, 1977L, 1997L, 1999L, 2023L, 2079L),
frequent = c(81L, 75L, 10L, 17L, 4L, 4L, 33L, 13L, 31L, 3L, 19L, 22L, 6L, 1L, 11L, 2L, 
1L, 1L, 3L, 14L, 1L, 2L, 1L, 14L, 1L, 9L, 6L, 9L, 2L, 5L, 13L, 4L, 4L, 1L, 4L, 1L, 3L, 1L, 6L, 2L, 1L, 3L, 2L, 5L, 2L, 1L, 17L, 5L, 4L, 4L, 1L, 4L, 7L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 6L, 
16L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 5L, 13L, 6L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 3L, 5L, 1L, 3L, 1L, 3L, 4L, 1L, 1L, 2L, 3L, 4L, 3L, 3L, 1L, 3L, 2L, 2L, 1L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), 
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -193L))

您可以使用summarise,因为您只按组对一个变量求和。

library(tidyverse)
common_bw_elements = df %>%
group_by(range_of_commons = cut(common_IDs,
breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>%
summarise(sum_of_instances = sum(frequent))

range_of_commons  sum_of_instances
<fct>                        <int>
1 (-Inf,0]                        81
2 (0,5]                          110
3 (5,10]                          46
4 (10,20]                         34
5 (20,30]                         47
6 (30,60]                         15
7 (60,100]                        85
8 (100,200]                       87
9 (200,300]                       92
10 (300,600]                       75
11 (1.2e+03,1.8e+03]               29
12 (1.8e+03, Inf]                  28

如果您有多个列要求和,那么我们将使用across(或者如果您只有几列,那么您可以提供列名称向量(例如c(common_IDs, frequent))而不是everything()):

df %>%
group_by(range_of_commons = cut(common_IDs,
breaks= c(-Inf,0, 5, 10, 20, 30, 60, 100, 200, 300, 600, 1200, 1800, Inf))) %>%
summarise(across(everything(), ~ sum(.x))) %>%
rename(sum_of_instances = frequent)

输出

range_of_commons  common_IDs sum_of_instances
<fct>                  <int>            <int>
1 (-Inf,0]                   0               81
2 (0,5]                     15              110
3 (5,10]                    13               46
4 (10,20]                   35               34
5 (20,30]                   78               47
6 (30,60]                  199               15
7 (60,100]                1191               85
8 (100,200]               3928               87
9 (200,300]               9392               92
10 (300,600]              17290               75
11 (1.2e+03,1.8e+03]      47829               29
12 (1.8e+03, Inf]         48922               28

最新更新