r-在数据框中选择间隔不超过一年的日期



尊敬的Stackoverflow社区,

如果可以的话,我想选择间隔不超过1年的CT扫描(方法=1(和胃镜检查(方法=100(的日期。

请查收附件中的数据集。Number是参与者,c7是方法,其中1是CT,100是胃镜。

非常感谢你的帮助!

structure(list(Number = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 
28, 29, 30, 31, 32, 33, 34, 35, 36), c7_1 = c(1, 100, 1, 100, 
100, 100, 1, 1, 1, 100, 100, 100, 1, 100, 1, 1, 1, 100, 1, 100, 
1, 1, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1, 100, 
100, 1), c7_2 = c(1, 1, 1, 1, 100, 100, 1, 100, 1, 100, 1, 100, 
1, 100, 1, 1, 1, 100, 1, 1, 1, 1, 100, 100, 100, 100, 1, 1, 100, 
100, 100, 100, 100, 100, 100, 100), c7_3 = c(1, 100, 1, 1, 100, 
100, 100, 100, 100, 100, 100, 100, 100, 1, 1, 100, 1, 100, 100, 
100, 1, 1, 100, 1, 100, 100, 1, 100, 100, 1, 100, 100, 100, 100, 
1, 100), c7_4 = c(100, NA, 1, 1, 100, NA, 1, 100, 100, 100, 100, 
100, 1, 1, 1, NA, 100, 100, 100, NA, 1, 1, 100, 1, 100, 1, 100, 
100, 100, 1, 100, 100, 100, 1, 100, 100), c7_5 = c(100, NA, 1, 
NA, 1, NA, 100, 1, 100, 100, 100, 1, 1, 1, 1, NA, 100, 100, 1, 
NA, 1, 100, 100, 100, 1, NA, 100, 1, 100, NA, 100, NA, 100, 1, 
NA, 1), c7_6 = c(1, NA, 1, NA, 100, NA, 100, 1, 100, 100, 1, 
100, 100, 1, 1, NA, 100, 100, 1, NA, 1, 1, 100, 100, NA, NA, 
100, NA, NA, NA, 100, NA, 100, NA, NA, NA), c7_7 = c(NA, NA, 
1, NA, 100, NA, 100, NA, 100, 100, 100, 100, 1, 1, 100, NA, 100, 
100, 1, NA, 100, 1, NA, 1, NA, NA, 100, NA, NA, NA, 100, NA, 
100, NA, NA, NA), c7_8 = c(NA, NA, 1, NA, 100, NA, NA, NA, NA, 
NA, 100, NA, 1, 100, 1, NA, NA, 100, NA, NA, NA, NA, NA, 100, 
NA, NA, 100, NA, NA, NA, 100, NA, 100, NA, NA, NA), STUDY_DATE_1 = c("2014-12-04", 
"2003-04-14", "2007-06-11", "2016-03-02", "2012-11-30", "2017-10-23", 
"2014-03-14", "2008-04-09", "2010-08-05", "1998-03-05", "2013-05-28", 
"2019-12-09", "2017-01-18", "2010-06-18", "2009-12-28", "2003-08-25", 
"2009-04-07", "2011-12-15", "2016-10-18", "2016-07-27", "2016-09-15", 
"2016-02-11", "2012-01-31", "2016-02-11", "2014-09-23", "2016-11-17", 
"2011-02-04", "2013-03-18", "2011-03-21", "2003-10-29", "2010-06-11", 
"2014-06-06", "2014-05-08", "2018-01-30", "2005-02-26", "2015-07-17"
), STUDY_DATE_2 = c("2019-05-17", "2003-04-14", "2016-12-12", 
"2019-12-03", "2014-10-17", "2019-02-19", "2006-10-27", "2013-05-02", 
"2012-08-15", "2007-07-06", "2015-08-24", "2019-05-11", "2019-05-13", 
"2016-11-22", "2015-01-02", "2004-06-04", "2003-10-16", "2010-11-10", 
"2015-04-28", "2016-05-11", "2019-08-20", "2012-01-06", "2012-04-27", 
"2017-08-01", "2019-03-02", "2017-04-13", "2009-08-13", "2011-04-15", 
"2013-04-18", "2015-05-04", "2015-03-16", "2006-08-22", "2011-10-06", 
"2019-10-10", "2004-07-31", "2018-07-06"), STUDY_DATE_3 = c("2013-07-18", 
"2003-04-14", "2005-04-12", "2016-02-18", "2016-05-18", "2020-02-11", 
"2014-03-26", "2017-11-29", "2010-02-12", "2000-02-08", "2015-08-11", 
"2016-08-04", "2016-02-03", "2008-10-10", "2012-01-09", "2004-06-22", 
"2017-11-21", "2015-06-23", "2006-05-01", "2016-07-12", "2019-09-21", 
"2010-04-09", "2015-10-05", "2016-11-22", "2017-10-30", "2016-07-08", 
"2009-04-14", "2011-05-24", "2015-04-24", "2016-10-21", "2013-01-28", 
"2005-10-06", "2008-11-13", "2019-01-22", "2004-07-06", "2019-06-15"
), STUDY_DATE_4 = c("2019-08-03", NA, "2004-05-05", "2019-04-30", 
"2013-10-18", NA, "2011-08-09", "2011-11-23", "2012-02-21", "2002-01-10", 
"2011-08-11", "2020-05-15", "2019-11-11", "2003-10-30", "2017-06-06", 
NA, "2019-07-26", "2009-11-04", "2017-12-16", NA, "2018-11-02", 
"2015-05-01", "2015-10-05", "2015-06-10", "2020-01-14", "2017-05-30", 
"2009-09-10", "2007-12-17", "2011-09-20", "2015-04-29", "2014-09-02", 
"2005-02-28", "2010-06-08", "2014-06-24", "2006-09-18", "2019-12-16"
), STUDY_DATE_5 = c("2013-08-30", NA, "2012-02-29", NA, "2011-04-05", 
NA, "2015-07-28", "2010-09-10", "2011-02-09", "2015-10-08", "2012-04-30", 
"2017-07-26", "2015-01-30", "2007-07-19", "2016-06-02", NA, "2016-06-14", 
"2007-09-28", "2005-03-29", NA, "2019-11-02", "2015-06-25", "2011-12-01", 
"2015-07-01", "2017-03-13", NA, "2002-05-15", "2007-09-25", "2014-04-25", 
NA, "2011-04-01", NA, "2009-06-11", "2016-02-23", NA, "2015-07-17"
), STUDY_DATE_6 = c("2018-06-22", NA, "2020-01-07", NA, "2011-11-21", 
NA, "2019-08-30", "2012-09-26", "2011-08-02", "1998-09-30", "2011-12-23", 
"2017-08-31", "2018-01-15", "2010-08-05", "2004-03-02", NA, "2018-07-26", 
"2016-06-03", "2003-02-21", NA, "2007-02-09", "2008-08-26", "2011-10-31", 
"2016-02-11", NA, NA, "2013-03-05", NA, NA, NA, "2014-03-07", 
NA, "2011-01-25", NA, NA, NA), STUDY_DATE_7 = c(NA, NA, "2006-12-04", 
NA, "2015-06-26", NA, "2019-04-02", NA, "2012-11-15", "1998-04-22", 
"2012-10-29", "2018-09-27", "2012-09-06", "2011-04-29", "2005-09-10", 
NA, "2017-06-26", "2017-06-07", "2004-11-01", NA, "2019-11-05", 
"2013-10-30", NA, "2020-03-18", NA, NA, "2009-04-20", NA, NA, 
NA, "2013-11-25", NA, "2012-10-15", NA, NA, NA), STUDY_DATE_8 = c(NA, 
NA, "2008-06-05", NA, "2013-03-01", NA, NA, NA, NA, NA, "2014-02-10", 
NA, "2016-08-09", "2017-07-11", "2011-01-26", NA, NA, "2008-10-21", 
NA, NA, NA, NA, NA, "2016-02-11", NA, NA, "2012-02-24", NA, NA, 
NA, "2016-05-10", NA, "2008-05-20", NA, NA, NA)), row.names = c(NA, 
-36L), class = c("tbl_df", "tbl", "data.frame"))

如果您对tidyverse解决方案感兴趣,可以使用pivot_longer将数据转换为长格式。然后,安排你的约会对象检查彼此接近的测试。创建一个标志,查看日期对,其中方法是1后跟100,或100后跟1,日期差在365天内。

library(tidyverse)
df %>%
pivot_longer(cols = -Number, names_to = c(".value", "group"), names_pattern = "(\w+)_(\d+)$") %>%
mutate(STUDY_DATE = as.Date(STUDY_DATE)) %>%
arrange(Number, STUDY_DATE) %>%
group_by(Number) %>%
mutate(flag = ((c7 == 1 & lead(c7) == 100) | (c7 == 100 & lead(c7) == 1)) & lead(STUDY_DATE) - STUDY_DATE <= 365) %>%
filter(flag == TRUE | lag(flag) == TRUE) %>%
select(Number, c7, STUDY_DATE)

如果这是你的想法,请告诉我。

输出

# A tibble: 69 x 3
# Groups:   Number [23]
Number    c7 STUDY_DATE
<dbl> <dbl> <date>    
1      1     1 2013-07-18
2      1   100 2013-08-30
3      1     1 2019-05-17
4      1   100 2019-08-03
5      2   100 2003-04-14
6      2     1 2003-04-14
7      2   100 2003-04-14
8      4     1 2016-02-18
9      4   100 2016-03-02
10      5     1 2011-04-05
# … with 59 more rows

最新更新