我有重复测量的数据,目前是长格式的。
我想做的只是得到一些汇总统计数据,比如每个时间点的计数和百分比。
示例数据:
questiondata <- structure(list(id = c(2, 2, 6, 6, 9, 9, 22, 22, 23, 23, 25, 25,
30, 30, 31, 31, 33, 33, 34, 34), time = structure(c(1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("time1", "time2"), class = "factor"), age = c(65,
69.17, 76.75, 81.05, 58.64, 62.71, 59.37, 63.56, 58, 61.69, 55.78,
59.95, 59.3, 63.36, 60.45, 64.39, 56.3, 60.08, 59.53, 63.84),
sex = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("men",
"women"), class = "factor"), hypert_drug = structure(c(1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 1L), .Label = c("no", "yes"), class = "factor")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
其对应于以下tibble:
# A tibble: 20 x 5
id time age sex hypert_drug
<dbl> <fct> <dbl> <fct> <fct>
1 2 time1 65 men no
2 2 time2 69.2 men yes
3 6 time1 76.8 women yes
4 6 time2 81.0 women yes
5 9 time1 58.6 men no
6 9 time2 62.7 men no
7 22 time1 59.4 men no
8 22 time2 63.6 men no
9 23 time1 58 women no
10 23 time2 61.7 women no
11 25 time1 55.8 men no
12 25 time2 60.0 men no
13 30 time1 59.3 women no
14 30 time2 63.4 women yes
15 31 time1 60.4 men yes
16 31 time2 64.4 men yes
17 33 time1 56.3 men no
18 33 time2 60.1 men no
19 34 time1 59.5 women no
20 34 time2 63.8 women no
我尝试了以下方法来简单统计男性和女性:
questiondata %>%
group_by(time) %>%
summarise(n_sex=n_distinct(sex))
但这给出了:
# A tibble: 2 x 2
time n_sex
* <fct> <int>
1 time1 2
2 time2 2
然后我尝试了
questiondata %>%
group_by(time) %>%
mutate(n_sex=count(sex))
这给出了一个错误:
Error: Problem with `mutate()` input `n_sex`.
x no applicable method for 'count' applied to an object of class "factor"
i Input `n_sex` is `count(sex)`.
i The error occurred in group 1: time = "time1".
Run `rlang::last_error()` to see where the error occurred.
有什么帮助吗?谢谢
questiondata %>%group_by(time,sex)%>% summarise(count = n(), .groups = "keep")
# A tibble: 4 x 3
# Groups: time, sex [4]
time sex count
<fct> <fct> <int>
1 time1 men 6
2 time1 women 4
3 time2 men 6
4 time2 women 4
这使用时间和性别作为分组变量,n列表示时间和性别的每个组合的观察次数。
library(dplyr)
questiondata %>%
group_by(time, sex) %>%
summarize(n=n())
`summarise()` has grouped output by 'time'. You can override using the `.groups` argument.
# A tibble: 4 x 3
# Groups: time [2]
time sex n
<fct> <fct> <int>
1 time1 men 6
2 time1 women 4
3 time2 men 6
4 time2 women 4