r-group_by(跨(all_of(vars,YEARS))-用固定的YEAR变量按变量分组

  • 本文关键字:变量 YEAR of all vars r-group YEARS by r dplyr
  • 更新时间 :
  • 英文 :


我有一些数据要总结。我想对所有列进行总结,固定YEAR列。即,对于我能做的一个变量:

df %>% 
group_by(LG1, YEAR) %>% 
summarise(Freq = n())

然而,我想对每个变量都这样做。以下内容无法按我的要求工作,因为它没有按YEAR变量进行分组。我已尝试包含返回错误的group_by(across(all_of(c(vars, YEAR)))) %>%

vars <- c("LG1", "AA1", "FNB1", "RE1", "PE1", "LG2", "AA2", "FNB2", "RE2", "PE2", "LG3", "AA3", "FNB3", "RE3", "PE3")
df %>% 
select(c(all_of(vars), "YEAR")) %>% 
group_by(across()) %>% 
summarise(Freq = n())

预期产出将是一个数据框架,每个变量的频率按年份排列。

数据:

df <- structure(list(ï..N.QUESTIONAIRE = c(119L, 122L, 137L, 59L, 121L, 
19L, 50L, 40L, 124L, 108L, 26L, 193L, 94L, 27L, 49L, 82L, 149L, 
88L, 133L, 150L, 5L, 28L, 175L, 91L, 151L, 97L, 70L, 42L, 21L, 
155L), LG1 = c(4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 3L, 4L, 5L, 
4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 5L, 5L, 5L, 3L, 
3L, 3L), AA1 = c(1L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 3L, 
1L, 1L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 1L, 2L, 3L, 2L, 2L, 2L, 2L, 
2L, 4L, 1L), FNB1 = c(4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 
4L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 5L, 2L, 
5L, 4L, 3L, 4L), RE1 = c(2L, 3L, 1L, 2L, 1L, 3L, 3L, 2L, 1L, 
3L, 3L, 4L, 1L, 2L, 3L, 2L, 2L, 2L, 3L, 4L, 2L, 2L, 3L, 2L, 5L, 
3L, 1L, 2L, 2L, 3L), PE1 = c(5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 
4L, 5L, 4L, 5L, 4L, 4L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 
5L, 4L, 4L, 4L, 4L, 4L), LG2 = c(4L, 3L, 5L, 5L, 2L, 4L, 3L, 
3L, 4L, 3L, 2L, 5L, 3L, 3L, 2L, 5L, 5L, 5L, 4L, 4L, 1L, 5L, 2L, 
4L, 1L, 5L, 5L, 4L, 4L, 5L), AA2 = c(4L, 5L, 5L, 4L, 3L, 4L, 
5L, 3L, 5L, 4L, 5L, 5L, 5L, 2L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 
3L, 5L, 5L, 5L, 5L, 5L, 4L, 4L), FNB2 = c(1L, 2L, 1L, 2L, 3L, 
1L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 1L, 2L, 1L, 
2L, 2L, 3L, 5L, 1L, 3L, 3L, 2L, 1L), RE2 = c(4L, 3L, 3L, 3L, 
3L, 4L, 3L, 3L, 4L, 3L, 2L, 5L, 4L, 3L, 4L, 4L, 5L, 3L, 2L, 2L, 
4L, 2L, 4L, 1L, 5L, 5L, 4L, 1L, 3L, 4L), PE2 = c(2L, 4L, 1L, 
3L, 3L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 4L, 
1L, 1L, 2L, 2L, 4L, 1L, 1L, 2L, 4L, 1L, 1L), LG3 = c(4L, 3L, 
3L, 4L, 2L, 4L, 4L, 2L, 5L, 3L, 3L, 4L, 4L, 2L, 4L, 3L, 3L, 4L, 
4L, 3L, 5L, 4L, 4L, 2L, 5L, 5L, 3L, 4L, 5L, 4L), AA3 = c(1L, 
3L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 1L, 
3L, 2L, 3L, 1L, 1L, 4L, 2L, 4L, 4L, 1L, 1L, 3L, 2L), FNB3 = c(5L, 
5L, 5L, 5L, 5L, 2L, 4L, 4L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 
4L, 5L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), RE3 = c(2L, 
2L, 2L, 2L, 3L, 4L, 4L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 2L, 1L, 
4L, 4L, 1L, 3L, 1L, 1L, 3L, 5L, 1L, 2L, 4L, 3L, 2L), PE3 = c(5L, 
3L, 4L, 4L, 4L, 4L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 
4L, 4L, 4L, 5L, 4L, 4L, 3L, 5L, 5L, 4L, 3L, 4L, 3L), YEAR = c(2L, 
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L), NATIONALITY = c(2L, 
2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 
3L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 2L, 3L, 1L), GENDER = c("F", 
"F", "M", "M", "F", "M", "M", "F", "F", "M", "F", "M", "M", "F", 
"M", "F", "F", "F", "M", "F", "M", "F", "M", "M", "F", "M", "M", 
"F", "M", "F"), AGE = c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 
1L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 1L, 2L, 2L, 3L, 2L, 3L, 
2L, 2L, 1L, 2L)), class = "data.frame", row.names = c(NA, -30L
))

我们可以在group_by中使用across来包括所有vars列以及YEAR

library(dplyr)
df %>%  group_by(across(c(all_of(vars), "YEAR"))) %>% summarise(Freq = n())

我们也可以在这里使用count-

df %>% count(across(c(all_of(vars), "YEAR")))

如果要为vars中的每个值使用count,则可以使用map

purrr::map(vars, ~df %>% count(YEAR, .data[[.x]]))

我们也可以在count中将YEAR指定为未引用

library(dplyr)
df %>%
count(across(c(all_of(vars), YEAR)))

或者另一个选项是转换为symbols,然后评估(!!!

df %>%
group_by(!!! rlang::syms(vars), YEAR) %>%
summarise(Freq = n(), .groups = 'drop')

count中的类似选项

df %>%
count(!!! rlang::syms(vars), YEAR)

最新更新