r语言 - 推广一个数据帧子集函数



我有一个玩具数据框架,它有4列(study,outcome,group,time)。比如说,一个用户想知道在哪个唯一的study值中,任何其他选定的列值是恒定的还是变化的。

例如,如果用户想知道studyoutcomegroup列的值是不变的还是变化的,那么我们知道有4种可能的情况:

  1. group不变,outcome变化
  2. outcome不变,group变化。
  3. outcome&group各不相同
  4. outcome&group都没有变化

下面的foo函数,正是基于上面的例子。

问题:我想知道如何概括foo,这样用户可以在函数中输入他所选列的名称(例如,outcomegroup),并且foo自动检查所选列中哪些唯一的study值是恒定的或变化的?

p。在下面的示例中,我的广义函数将产生如下所示的相同输出:

h = "
study outcome group time
a     1       1     0
a     2       1     1
b     1       1     0
b     1       2     0
c     2       1     0
c     3       2     1
d     1       1     0
d     1       1     0
e     1       1     0"
h = read.table(text=h,h=T)
foo <- function(dat, cond) {

switch(cond, 

`1` = dat %>% 
group_by(study) %>%
filter(n_distinct(group) == 1, n_distinct(outcome) > 1) %>%
ungroup,
`2` = dat %>% 
group_by(study) %>%
filter(n_distinct(group) > 1, n_distinct(outcome) == 1) %>%
ungroup,

`3` =  dat %>% 
group_by(study) %>%
filter(n_distinct(group) > 1, n_distinct(outcome) > 1) %>%
ungroup,
`4` = dat %>% 
group_by(study) %>%
filter(n_distinct(group) == 1, n_distinct(outcome) == 1) %>%
ungroup )  } 
#------------------- EXAMPLE OF USE:
> foo(h, 1)
# A tibble: 2 x 3
study outcome group
<chr>   <int> <int>
1 a           1     1
2 a           2     1
> foo(h, 2)
# A tibble: 2 x 3
study outcome group
<chr>   <int> <int>
1 b           1     1
2 b           1     2
> foo(h, 3)
# A tibble: 2 x 3
study outcome group
<chr>   <int> <int>
1 c           2     1
2 c           3     2
> foo(h, 4)
# A tibble: 3 x 3
study outcome group
<chr>   <int> <int>
1 d           1     1
2 d           1     1
3 e           1     1

如果输入参数未加引号,则使用{{}}

foo <- function(dat, study_col, group_col, outcome_col) {

fn1 <- function(cond) {
switch(cond, 

`1` = dat %>% 
group_by({{study_col}}) %>%
filter(n_distinct({{group_col}}) == 1, n_distinct({{outcome_col}}) > 1) %>%
ungroup,
`2` = dat %>% 
group_by({{study_col}}) %>%
filter(n_distinct({{group_col}}) > 1, n_distinct({{outcome_col}}) == 1) %>%
ungroup,
`3` = dat %>% 
group_by({{study_col}}) %>%
filter(n_distinct({{group_col}}) > 1, n_distinct({{outcome_col}}) > 1) %>%
ungroup,

`4` = dat %>% 
group_by({{study_col}}) %>%
filter(n_distinct({{group_col}}) == 1, n_distinct({{outcome_col}}) == 1) %>%
ungroup
)  }
purrr::map(1:4, ~ fn1(.x))
}

测试

> foo(h, study, group, outcome)
[[1]]
# A tibble: 2 x 4
study outcome group  time
<chr>   <int> <int> <int>
1 a           1     1     0
2 a           2     1     1
[[2]]
# A tibble: 2 x 4
study outcome group  time
<chr>   <int> <int> <int>
1 b           1     1     0
2 b           1     2     0
[[3]]
# A tibble: 2 x 4
study outcome group  time
<chr>   <int> <int> <int>
1 c           2     1     0
2 c           3     2     1
[[4]]
# A tibble: 3 x 4
study outcome group  time
<chr>   <int> <int> <int>
1 d           1     1     0
2 d           1     1     0
3 e           1     1     0

或使用

foo2 <- function(dat, study_col, group_col, outcome_col) {
dat %>%
dplyr::select({{study_col}}, {{group_col}}, {{outcome_col}}) %>%
dplyr::group_by({{study_col}}) %>%
dplyr::mutate(grp = stringr::str_c(n_distinct({{group_col}}) == 1, 
n_distinct({{outcome_col}}) == 1 ))   %>%
dplyr::ungroup(.) %>%
dplyr::group_split(grp, .keep = FALSE)  

}

测试

> foo2(h, study, group, outcome)
<list_of<
tbl_df<
study  : character
group  : integer
outcome: integer
>
>[4]>
[[1]]
# A tibble: 2 x 3
study group outcome
<chr> <int>   <int>
1 c         1       2
2 c         2       3
[[2]]
# A tibble: 2 x 3
study group outcome
<chr> <int>   <int>
1 b         1       1
2 b         2       1
[[3]]
# A tibble: 2 x 3
study group outcome
<chr> <int>   <int>
1 a         1       1
2 a         1       2
[[4]]
# A tibble: 3 x 3
study group outcome
<chr> <int>   <int>
1 d         1       1
2 d         1       1
3 e         1       1

最新更新