r-按组获取最后一个条件的日期



考虑这个数据帧:

idPerson idAppt decision date      
1 A             1 a        2021-09-10
2 A             1 b        2021-09-11
3 A             1 c        2021-09-12
4 A             1 d        2021-09-13
5 A             2 a        2021-09-20
6 A             2 b        2021-09-21
7 A             3 a        2021-09-10
8 A             3 b        2021-09-11
9 B             1 a        2021-09-10
10 B             1 b        2021-09-11
11 B             1 c        2021-09-12
12 B             1 d        2021-09-13
13 B             2 a        2021-09-11
14 B             2 b        2021-09-12
15 B             3 a        2021-09-14
16 B             3 b        2021-09-15

对于每组idPersonidAppt,我想得到一个date2列,但有一定的条件:

  • 对于decision == "a"开始时间晚于具有相同idPerson的任何其他idAppt组的decision == "d"日期的任何组(idPersonxidAppt(,报告该组的decision == "d"的日期
  • 对于不符合此要求的任何其他组,date2应为该给定idPerson的最早日期

哪个产生这个数据帧:

idPerson idAppt decision date       date2     
1 A             1 a        2021-09-10 2021-09-10
2 A             1 b        2021-09-11 2021-09-10
3 A             1 c        2021-09-12 2021-09-10
4 A             1 d        2021-09-13 2021-09-10
5 A             2 a        2021-09-20 2021-09-13
6 A             2 b        2021-09-21 2021-09-13
7 A             3 a        2021-09-10 2021-09-10
8 A             3 b        2021-09-11 2021-09-10
9 B             1 a        2021-09-10 2021-09-10
10 B             1 b        2021-09-11 2021-09-10
11 B             1 c        2021-09-12 2021-09-10
12 B             1 d        2021-09-13 2021-09-10
13 B             2 a        2021-09-11 2021-09-10
14 B             2 b        2021-09-12 2021-09-10
15 B             3 a        2021-09-14 2021-09-13
16 B             3 b        2021-09-15 2021-09-13

数据:

df <- structure(list(idPerson = c("A", "A", "A", "A", "A", "A", "A", 
"A", "B", "B", "B", "B", "B", "B", "B", "B"), idAppt = c(1L, 
1L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L), 
decision = c("a", "b", "c", "d", "a", "b", "a", "b", "a", 
"b", "c", "d", "a", "b", "a", "b"), date = structure(c(18880, 
18881, 18882, 18883, 18890, 18891, 18880, 18881, 18880, 18881, 
18882, 18883, 18881, 18882, 18884, 18885), class = "Date")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -16L))
EO <- structure(list(idPerson = c("A", "A", "A", "A", "A", "A", "A", 
"A", "B", "B", "B", "B", "B", "B", "B", "B"), idAppt = c(1L, 
1L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L), 
decision = c("a", "b", "c", "d", "a", "b", "a", "b", "a", 
"b", "c", "d", "a", "b", "a", "b"), date = structure(c(18880, 
18881, 18882, 18883, 18890, 18891, 18880, 18881, 18880, 18881, 
18882, 18883, 18881, 18882, 18884, 18885), class = "Date"), 
date2 = c("2021-09-10", "2021-09-10", "2021-09-10", "2021-09-10", 
"2021-09-13", "2021-09-13", "2021-09-10", "2021-09-10", "2021-09-10", 
"2021-09-10", "2021-09-10", "2021-09-10", "2021-09-10", "2021-09-10", 
"2021-09-13", "2021-09-13")), row.names = c(NA, -16L), class = c("tbl_df", 
"tbl", "data.frame"))
df %>%
group_by(idPerson) %>%
mutate(d_date = min(date[decision == "d"]), min_date_person = min(date)) %>%
group_by(idPerson, idAppt) %>%
mutate(date2 = if_else(date[decision == "a"] > d_date, d_date, min_date_person)) %>%
ungroup()
# # A tibble: 16 × 7
#    idPerson idAppt decision date       d_date     min_date_person date2     
#    <chr>     <int> <chr>    <date>     <date>     <date>          <date>    
#  1 A             1 a        2021-09-10 2021-09-13 2021-09-10      2021-09-10
#  2 A             1 b        2021-09-11 2021-09-13 2021-09-10      2021-09-10
#  3 A             1 c        2021-09-12 2021-09-13 2021-09-10      2021-09-10
#  4 A             1 d        2021-09-13 2021-09-13 2021-09-10      2021-09-10
#  5 A             2 a        2021-09-20 2021-09-13 2021-09-10      2021-09-13
#  6 A             2 b        2021-09-21 2021-09-13 2021-09-10      2021-09-13
#  7 A             3 a        2021-09-10 2021-09-13 2021-09-10      2021-09-10
#  8 A             3 b        2021-09-11 2021-09-13 2021-09-10      2021-09-10
#  9 B             1 a        2021-09-10 2021-09-13 2021-09-10      2021-09-10
# 10 B             1 b        2021-09-11 2021-09-13 2021-09-10      2021-09-10
# 11 B             1 c        2021-09-12 2021-09-13 2021-09-10      2021-09-10
# 12 B             1 d        2021-09-13 2021-09-13 2021-09-10      2021-09-10
# 13 B             2 a        2021-09-11 2021-09-13 2021-09-10      2021-09-10
# 14 B             2 b        2021-09-12 2021-09-13 2021-09-10      2021-09-10
# 15 B             3 a        2021-09-14 2021-09-13 2021-09-10      2021-09-13
# 16 B             3 b        2021-09-15 2021-09-13 2021-09-10      2021-09-13
#Select earlist date for decision d by idPerson
df_d <- df%>%
filter(decision=="d")%>%
group_by(idPerson)%>%
arrange(date)%>%
slice(1)%>%
rename(earliest_d=date)%>%
select(idPerson,earliest_d)%>%
ungroup
#Select earlist date for decision a
df_a <- df%>%
filter(decision=="a")%>%
rename(earliest_a=date)%>%
select(-decision)
# merge above datasets with df, create date2 by given conditions
df2 <- df%>%
base::merge(df_d,by=c("idPerson"))%>%
base::merge(df_a,by=c("idPerson","idAppt"))%>%
mutate(date2=case_when(earliest_d>earliest_a~earliest_a,
T~earliest_d))%>%
select(-earliest_d,-earliest_a)
df2

输出:

> df2
idPerson idAppt decision       date      date2
1         A      1        a 2021-09-10 2021-09-10
2         A      1        b 2021-09-11 2021-09-10
3         A      1        c 2021-09-12 2021-09-10
4         A      1        d 2021-09-13 2021-09-10
5         A      2        a 2021-09-20 2021-09-13
6         A      2        b 2021-09-21 2021-09-13
7         A      3        a 2021-09-10 2021-09-10
8         A      3        b 2021-09-11 2021-09-10
9         B      1        a 2021-09-10 2021-09-10
10        B      1        b 2021-09-11 2021-09-10
11        B      1        c 2021-09-12 2021-09-10
12        B      1        d 2021-09-13 2021-09-10
13        B      2        a 2021-09-11 2021-09-11
14        B      2        b 2021-09-12 2021-09-11
15        B      3        a 2021-09-14 2021-09-13
16        B      3        b 2021-09-15 2021-09-13

相关内容

  • 没有找到相关文章

最新更新