我有一些数据要总结。我想对所有列进行总结,固定YEAR列。即,对于我能做的一个变量:
df %>%
group_by(LG1, YEAR) %>%
summarise(Freq = n())
然而,我想对每个变量都这样做。以下内容无法按我的要求工作,因为它没有按YEAR
变量进行分组。我已尝试包含返回错误的group_by(across(all_of(c(vars, YEAR)))) %>%
。
vars <- c("LG1", "AA1", "FNB1", "RE1", "PE1", "LG2", "AA2", "FNB2", "RE2", "PE2", "LG3", "AA3", "FNB3", "RE3", "PE3")
df %>%
select(c(all_of(vars), "YEAR")) %>%
group_by(across()) %>%
summarise(Freq = n())
预期产出将是一个数据框架,每个变量的频率按年份排列。
数据:
df <- structure(list(ï..N.QUESTIONAIRE = c(119L, 122L, 137L, 59L, 121L,
19L, 50L, 40L, 124L, 108L, 26L, 193L, 94L, 27L, 49L, 82L, 149L,
88L, 133L, 150L, 5L, 28L, 175L, 91L, 151L, 97L, 70L, 42L, 21L,
155L), LG1 = c(4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 3L, 4L, 5L,
4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 5L, 5L, 5L, 3L,
3L, 3L), AA1 = c(1L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 3L,
1L, 1L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 1L, 2L, 3L, 2L, 2L, 2L, 2L,
2L, 4L, 1L), FNB1 = c(4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 3L, 4L,
4L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 5L, 2L,
5L, 4L, 3L, 4L), RE1 = c(2L, 3L, 1L, 2L, 1L, 3L, 3L, 2L, 1L,
3L, 3L, 4L, 1L, 2L, 3L, 2L, 2L, 2L, 3L, 4L, 2L, 2L, 3L, 2L, 5L,
3L, 1L, 2L, 2L, 3L), PE1 = c(5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L,
4L, 5L, 4L, 5L, 4L, 4L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L,
5L, 4L, 4L, 4L, 4L, 4L), LG2 = c(4L, 3L, 5L, 5L, 2L, 4L, 3L,
3L, 4L, 3L, 2L, 5L, 3L, 3L, 2L, 5L, 5L, 5L, 4L, 4L, 1L, 5L, 2L,
4L, 1L, 5L, 5L, 4L, 4L, 5L), AA2 = c(4L, 5L, 5L, 4L, 3L, 4L,
5L, 3L, 5L, 4L, 5L, 5L, 5L, 2L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L,
3L, 5L, 5L, 5L, 5L, 5L, 4L, 4L), FNB2 = c(1L, 2L, 1L, 2L, 3L,
1L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 3L, 5L, 1L, 3L, 3L, 2L, 1L), RE2 = c(4L, 3L, 3L, 3L,
3L, 4L, 3L, 3L, 4L, 3L, 2L, 5L, 4L, 3L, 4L, 4L, 5L, 3L, 2L, 2L,
4L, 2L, 4L, 1L, 5L, 5L, 4L, 1L, 3L, 4L), PE2 = c(2L, 4L, 1L,
3L, 3L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 4L,
1L, 1L, 2L, 2L, 4L, 1L, 1L, 2L, 4L, 1L, 1L), LG3 = c(4L, 3L,
3L, 4L, 2L, 4L, 4L, 2L, 5L, 3L, 3L, 4L, 4L, 2L, 4L, 3L, 3L, 4L,
4L, 3L, 5L, 4L, 4L, 2L, 5L, 5L, 3L, 4L, 5L, 4L), AA3 = c(1L,
3L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 1L,
3L, 2L, 3L, 1L, 1L, 4L, 2L, 4L, 4L, 1L, 1L, 3L, 2L), FNB3 = c(5L,
5L, 5L, 5L, 5L, 2L, 4L, 4L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 5L, 5L,
4L, 5L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), RE3 = c(2L,
2L, 2L, 2L, 3L, 4L, 4L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 2L, 1L,
4L, 4L, 1L, 3L, 1L, 1L, 3L, 5L, 1L, 2L, 4L, 3L, 2L), PE3 = c(5L,
3L, 4L, 4L, 4L, 4L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 4L, 4L,
4L, 4L, 4L, 5L, 4L, 4L, 3L, 5L, 5L, 4L, 3L, 4L, 3L), YEAR = c(2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L), NATIONALITY = c(2L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 1L, 2L, 1L, 1L,
3L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 2L, 3L, 1L), GENDER = c("F",
"F", "M", "M", "F", "M", "M", "F", "F", "M", "F", "M", "M", "F",
"M", "F", "F", "F", "M", "F", "M", "F", "M", "M", "F", "M", "M",
"F", "M", "F"), AGE = c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 3L,
1L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 1L, 2L, 2L, 3L, 2L, 3L,
2L, 2L, 1L, 2L)), class = "data.frame", row.names = c(NA, -30L
))
我们可以在group_by
中使用across
来包括所有vars
列以及YEAR
。
library(dplyr)
df %>% group_by(across(c(all_of(vars), "YEAR"))) %>% summarise(Freq = n())
我们也可以在这里使用count
-
df %>% count(across(c(all_of(vars), "YEAR")))
如果要为vars
中的每个值使用count
,则可以使用map
。
purrr::map(vars, ~df %>% count(YEAR, .data[[.x]]))
我们也可以在count
中将YEAR
指定为未引用
library(dplyr)
df %>%
count(across(c(all_of(vars), YEAR)))
或者另一个选项是转换为sym
bols,然后评估(!!!
df %>%
group_by(!!! rlang::syms(vars), YEAR) %>%
summarise(Freq = n(), .groups = 'drop')
count
中的类似选项
df %>%
count(!!! rlang::syms(vars), YEAR)