如何跨多行提取最高/最近日期?这个类似的线程没有提供解决方案
我对同一个病人做了多次核磁共振扫描。我想知道最近的日期是什么。每位患者可能有多达20次随访,但不一定每次随访都要进行MRI扫描。
一个问题是,所有MRI日期列的格式都是as.character
。是否可以将列名中包含mr_daterd
的所有列格式化为as.Date("all columns containing mr_daterd in the name_id", format = '%d.%m.%Y')
?
在每个日期列上写mutate(MRI_first = as.Date(mr_daterd_first, format = '%d.%m.%Y')
似乎太有创意了。
随后,我想找到最近的日期,这里定义为p$max_date
:
> head(p)
mr_daterd mr_daterd_fu1 mr_daterd_fu1_v2 mr_daterd_fu1_v2_v3 mr_daterd_fu1_v2_v3_v4 ...
1 07.03.2011 20.03.2012 05.06.2013 10.09.2014 18.01.2017 ...
2 15.04.1999 <NA> <NA> 22.02.2000 <NA> ...
3 xx/xx/2007 25.05.2012 <NA> <NA> <NA> ...
4 18.09.2006 <NA> <NA> <NA> <NA> ...
预期输出
> head(p)
mr_daterd mr_daterd_fu1 mr_daterd_fu1_v2 mr_daterd_fu1_v2_v3 ... max_date
1 07.03.2011 20.03.2012 05.06.2013 10.09.2014 ... 13.04.2018
2 15.04.1999 <NA> <NA> 22.02.2000 ... 30.05.2007
3 xx/xx/2007 25.05.2012 <NA> <NA> ... 25.05.2012
4 18.09.2006 <NA> <NA> <NA> ... 18.09.2006
优选dplyr
溶液。
数据
p <- structure(list(mr_daterd = c("07.03.2011", "15.04.1999", "xx/xx/2007",
"18.09.2006"), mr_daterd_fu1 = c("20.03.2012", NA, "25.05.2012",
NA), mr_daterd_fu1_v2 = c("05.06.2013", NA, NA, NA), mr_daterd_fu1_v2_v3 = c("10.09.2014",
"22.02.2000", NA, NA), mr_daterd_fu1_v2_v3_v4 = c("18.01.2017",
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5 = c(NA_character_, NA_character_,
NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6 = c("18.01.2017",
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7 = c(NA, "09.04.2001",
NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8 = c("13.04.2018",
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9 = c(NA_character_,
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10 = c(NA,
"07.03.2002", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11 = c(NA_character_,
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12 = c(NA,
"05.02.2003", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13 = c(NA_character_,
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14 = c(NA_character_,
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15 = c(NA,
"29.03.2004", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16 = c(NA_character_,
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17 = c(NA,
"13.04.2005", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18 = c(NA,
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19 = c(NA,
"30.05.2007", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19_v20 = c(NA,
NA, NA, NA), n_mri = c(7, 8, 2, 1)), row.names = c(NA, -4L), class = "data.frame")
试试这个。您可以使用across()
和starts_with()
转换为日期,然后在行级别应用函数c_across()
可以帮助获得最大日期。这里的代码:
library(dplyr)
#Code
p %>% mutate(across(starts_with('mr_daterd'),~as.Date(.,'%d.%m.%Y'))) %>%
rowwise() %>%
mutate(Recent=max(c_across(starts_with('mr_daterd')),na.rm = T)) -> p1
输出:
# A tibble: 4 x 23
# Rowwise:
mr_daterd mr_daterd_fu1 mr_daterd_fu1_v2 mr_daterd_fu1_v~ mr_daterd_fu1_v~
<date> <date> <date> <date> <date>
1 2011-03-07 2012-03-20 2013-06-05 2014-09-10 2017-01-18
2 1999-04-15 NA NA 2000-02-22 NA
3 NA 2012-05-25 NA NA NA
4 2006-09-18 NA NA NA NA
# ... with 18 more variables: mr_daterd_fu1_v2_v3_v4_v5 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6 <date>, mr_daterd_fu1_v2_v3_v4_v5_v6_v7 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19 <date>,
# mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19_v20 <date>,
# n_mri <dbl>, Recent <date>
更新:如果不需要按行排列,请尝试将其整形为long以获得最大值,然后通过行定义的公共id合并:
#Code 2
new <- p %>%
mutate(id=row_number(),
across(starts_with('mr_daterd'),~as.Date(.,'%d.%m.%Y'))) %>%
left_join(
p %>%
mutate(id=row_number(),
across(starts_with('mr_daterd'),~as.Date(.,'%d.%m.%Y'))) %>%
pivot_longer(cols = starts_with('mr_daterd')) %>%
group_by(id) %>%
summarise(Recent=max(value,na.rm=T))
) %>% select(-id)