r:如何根据从"as.character"到"as.Date"的特定列名称一次格式化多个列,然后找到最近的日期?



如何跨多行提取最高/最近日期?这个类似的线程没有提供解决方案

我对同一个病人做了多次核磁共振扫描。我想知道最近的日期是什么。每位患者可能有多达20次随访,但不一定每次随访都要进行MRI扫描。

一个问题是,所有MRI日期列的格式都是as.character。是否可以将列名中包含mr_daterd的所有列格式化为as.Date("all columns containing mr_daterd in the name_id", format = '%d.%m.%Y')

在每个日期列上写mutate(MRI_first = as.Date(mr_daterd_first, format = '%d.%m.%Y')似乎太有创意了。

随后,我想找到最近的日期,这里定义为p$max_date:

> head(p)
mr_daterd mr_daterd_fu1 mr_daterd_fu1_v2 mr_daterd_fu1_v2_v3 mr_daterd_fu1_v2_v3_v4     ...
1 07.03.2011    20.03.2012       05.06.2013          10.09.2014             18.01.2017     ...
2 15.04.1999          <NA>             <NA>          22.02.2000                   <NA>     ...
3 xx/xx/2007    25.05.2012             <NA>                <NA>                   <NA>     ...
4 18.09.2006          <NA>             <NA>                <NA>                   <NA>     ...

预期输出

> head(p)
mr_daterd mr_daterd_fu1 mr_daterd_fu1_v2 mr_daterd_fu1_v2_v3    ...   max_date
1 07.03.2011    20.03.2012       05.06.2013          10.09.2014    ...   13.04.2018
2 15.04.1999          <NA>             <NA>          22.02.2000    ...   30.05.2007
3 xx/xx/2007    25.05.2012             <NA>                <NA>    ...   25.05.2012
4 18.09.2006          <NA>             <NA>                <NA>    ...   18.09.2006

优选dplyr溶液。

数据

p <- structure(list(mr_daterd = c("07.03.2011", "15.04.1999", "xx/xx/2007", 
"18.09.2006"), mr_daterd_fu1 = c("20.03.2012", NA, "25.05.2012", 
NA), mr_daterd_fu1_v2 = c("05.06.2013", NA, NA, NA), mr_daterd_fu1_v2_v3 = c("10.09.2014", 
"22.02.2000", NA, NA), mr_daterd_fu1_v2_v3_v4 = c("18.01.2017", 
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5 = c(NA_character_, NA_character_, 
NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6 = c("18.01.2017", 
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7 = c(NA, "09.04.2001", 
NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8 = c("13.04.2018", 
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10 = c(NA, 
"07.03.2002", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12 = c(NA, 
"05.02.2003", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15 = c(NA, 
"29.03.2004", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17 = c(NA, 
"13.04.2005", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18 = c(NA, 
NA, NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19 = c(NA, 
"30.05.2007", NA, NA), mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19_v20 = c(NA, 
NA, NA, NA), n_mri = c(7, 8, 2, 1)), row.names = c(NA, -4L), class = "data.frame")

试试这个。您可以使用across()starts_with()转换为日期,然后在行级别应用函数c_across()可以帮助获得最大日期。这里的代码:

library(dplyr)
#Code
p %>% mutate(across(starts_with('mr_daterd'),~as.Date(.,'%d.%m.%Y'))) %>%
rowwise() %>%
mutate(Recent=max(c_across(starts_with('mr_daterd')),na.rm = T)) -> p1

输出:

# A tibble: 4 x 23
# Rowwise: 
mr_daterd  mr_daterd_fu1 mr_daterd_fu1_v2 mr_daterd_fu1_v~ mr_daterd_fu1_v~
<date>     <date>        <date>           <date>           <date>          
1 2011-03-07 2012-03-20    2013-06-05       2014-09-10       2017-01-18      
2 1999-04-15 NA            NA               2000-02-22       NA              
3 NA         2012-05-25    NA               NA               NA              
4 2006-09-18 NA            NA               NA               NA              
# ... with 18 more variables: mr_daterd_fu1_v2_v3_v4_v5 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6 <date>, mr_daterd_fu1_v2_v3_v4_v5_v6_v7 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19 <date>,
#   mr_daterd_fu1_v2_v3_v4_v5_v6_v7_v8_v9_v10_v11_v12_v13_v14_v15_v16_v17_v18_v19_v20 <date>,
#   n_mri <dbl>, Recent <date>

更新:如果不需要按行排列,请尝试将其整形为long以获得最大值,然后通过行定义的公共id合并:

#Code 2
new <- p %>%
mutate(id=row_number(),
across(starts_with('mr_daterd'),~as.Date(.,'%d.%m.%Y'))) %>%
left_join(
p %>%
mutate(id=row_number(),
across(starts_with('mr_daterd'),~as.Date(.,'%d.%m.%Y'))) %>%
pivot_longer(cols = starts_with('mr_daterd')) %>%
group_by(id) %>%
summarise(Recent=max(value,na.rm=T))
) %>% select(-id)

最新更新