尊敬的Stackoverflow社区,
如果可以的话,我想选择间隔不超过1年的CT扫描(方法=1(和胃镜检查(方法=100(的日期。
请查收附件中的数据集。Number是参与者,c7是方法,其中1是CT,100是胃镜。
非常感谢你的帮助!
structure(list(Number = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36), c7_1 = c(1, 100, 1, 100,
100, 100, 1, 1, 1, 100, 100, 100, 1, 100, 1, 1, 1, 100, 1, 100,
1, 1, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1, 100,
100, 1), c7_2 = c(1, 1, 1, 1, 100, 100, 1, 100, 1, 100, 1, 100,
1, 100, 1, 1, 1, 100, 1, 1, 1, 1, 100, 100, 100, 100, 1, 1, 100,
100, 100, 100, 100, 100, 100, 100), c7_3 = c(1, 100, 1, 1, 100,
100, 100, 100, 100, 100, 100, 100, 100, 1, 1, 100, 1, 100, 100,
100, 1, 1, 100, 1, 100, 100, 1, 100, 100, 1, 100, 100, 100, 100,
1, 100), c7_4 = c(100, NA, 1, 1, 100, NA, 1, 100, 100, 100, 100,
100, 1, 1, 1, NA, 100, 100, 100, NA, 1, 1, 100, 1, 100, 1, 100,
100, 100, 1, 100, 100, 100, 1, 100, 100), c7_5 = c(100, NA, 1,
NA, 1, NA, 100, 1, 100, 100, 100, 1, 1, 1, 1, NA, 100, 100, 1,
NA, 1, 100, 100, 100, 1, NA, 100, 1, 100, NA, 100, NA, 100, 1,
NA, 1), c7_6 = c(1, NA, 1, NA, 100, NA, 100, 1, 100, 100, 1,
100, 100, 1, 1, NA, 100, 100, 1, NA, 1, 1, 100, 100, NA, NA,
100, NA, NA, NA, 100, NA, 100, NA, NA, NA), c7_7 = c(NA, NA,
1, NA, 100, NA, 100, NA, 100, 100, 100, 100, 1, 1, 100, NA, 100,
100, 1, NA, 100, 1, NA, 1, NA, NA, 100, NA, NA, NA, 100, NA,
100, NA, NA, NA), c7_8 = c(NA, NA, 1, NA, 100, NA, NA, NA, NA,
NA, 100, NA, 1, 100, 1, NA, NA, 100, NA, NA, NA, NA, NA, 100,
NA, NA, 100, NA, NA, NA, 100, NA, 100, NA, NA, NA), STUDY_DATE_1 = c("2014-12-04",
"2003-04-14", "2007-06-11", "2016-03-02", "2012-11-30", "2017-10-23",
"2014-03-14", "2008-04-09", "2010-08-05", "1998-03-05", "2013-05-28",
"2019-12-09", "2017-01-18", "2010-06-18", "2009-12-28", "2003-08-25",
"2009-04-07", "2011-12-15", "2016-10-18", "2016-07-27", "2016-09-15",
"2016-02-11", "2012-01-31", "2016-02-11", "2014-09-23", "2016-11-17",
"2011-02-04", "2013-03-18", "2011-03-21", "2003-10-29", "2010-06-11",
"2014-06-06", "2014-05-08", "2018-01-30", "2005-02-26", "2015-07-17"
), STUDY_DATE_2 = c("2019-05-17", "2003-04-14", "2016-12-12",
"2019-12-03", "2014-10-17", "2019-02-19", "2006-10-27", "2013-05-02",
"2012-08-15", "2007-07-06", "2015-08-24", "2019-05-11", "2019-05-13",
"2016-11-22", "2015-01-02", "2004-06-04", "2003-10-16", "2010-11-10",
"2015-04-28", "2016-05-11", "2019-08-20", "2012-01-06", "2012-04-27",
"2017-08-01", "2019-03-02", "2017-04-13", "2009-08-13", "2011-04-15",
"2013-04-18", "2015-05-04", "2015-03-16", "2006-08-22", "2011-10-06",
"2019-10-10", "2004-07-31", "2018-07-06"), STUDY_DATE_3 = c("2013-07-18",
"2003-04-14", "2005-04-12", "2016-02-18", "2016-05-18", "2020-02-11",
"2014-03-26", "2017-11-29", "2010-02-12", "2000-02-08", "2015-08-11",
"2016-08-04", "2016-02-03", "2008-10-10", "2012-01-09", "2004-06-22",
"2017-11-21", "2015-06-23", "2006-05-01", "2016-07-12", "2019-09-21",
"2010-04-09", "2015-10-05", "2016-11-22", "2017-10-30", "2016-07-08",
"2009-04-14", "2011-05-24", "2015-04-24", "2016-10-21", "2013-01-28",
"2005-10-06", "2008-11-13", "2019-01-22", "2004-07-06", "2019-06-15"
), STUDY_DATE_4 = c("2019-08-03", NA, "2004-05-05", "2019-04-30",
"2013-10-18", NA, "2011-08-09", "2011-11-23", "2012-02-21", "2002-01-10",
"2011-08-11", "2020-05-15", "2019-11-11", "2003-10-30", "2017-06-06",
NA, "2019-07-26", "2009-11-04", "2017-12-16", NA, "2018-11-02",
"2015-05-01", "2015-10-05", "2015-06-10", "2020-01-14", "2017-05-30",
"2009-09-10", "2007-12-17", "2011-09-20", "2015-04-29", "2014-09-02",
"2005-02-28", "2010-06-08", "2014-06-24", "2006-09-18", "2019-12-16"
), STUDY_DATE_5 = c("2013-08-30", NA, "2012-02-29", NA, "2011-04-05",
NA, "2015-07-28", "2010-09-10", "2011-02-09", "2015-10-08", "2012-04-30",
"2017-07-26", "2015-01-30", "2007-07-19", "2016-06-02", NA, "2016-06-14",
"2007-09-28", "2005-03-29", NA, "2019-11-02", "2015-06-25", "2011-12-01",
"2015-07-01", "2017-03-13", NA, "2002-05-15", "2007-09-25", "2014-04-25",
NA, "2011-04-01", NA, "2009-06-11", "2016-02-23", NA, "2015-07-17"
), STUDY_DATE_6 = c("2018-06-22", NA, "2020-01-07", NA, "2011-11-21",
NA, "2019-08-30", "2012-09-26", "2011-08-02", "1998-09-30", "2011-12-23",
"2017-08-31", "2018-01-15", "2010-08-05", "2004-03-02", NA, "2018-07-26",
"2016-06-03", "2003-02-21", NA, "2007-02-09", "2008-08-26", "2011-10-31",
"2016-02-11", NA, NA, "2013-03-05", NA, NA, NA, "2014-03-07",
NA, "2011-01-25", NA, NA, NA), STUDY_DATE_7 = c(NA, NA, "2006-12-04",
NA, "2015-06-26", NA, "2019-04-02", NA, "2012-11-15", "1998-04-22",
"2012-10-29", "2018-09-27", "2012-09-06", "2011-04-29", "2005-09-10",
NA, "2017-06-26", "2017-06-07", "2004-11-01", NA, "2019-11-05",
"2013-10-30", NA, "2020-03-18", NA, NA, "2009-04-20", NA, NA,
NA, "2013-11-25", NA, "2012-10-15", NA, NA, NA), STUDY_DATE_8 = c(NA,
NA, "2008-06-05", NA, "2013-03-01", NA, NA, NA, NA, NA, "2014-02-10",
NA, "2016-08-09", "2017-07-11", "2011-01-26", NA, NA, "2008-10-21",
NA, NA, NA, NA, NA, "2016-02-11", NA, NA, "2012-02-24", NA, NA,
NA, "2016-05-10", NA, "2008-05-20", NA, NA, NA)), row.names = c(NA,
-36L), class = c("tbl_df", "tbl", "data.frame"))
如果您对tidyverse
解决方案感兴趣,可以使用pivot_longer
将数据转换为长格式。然后,安排你的约会对象检查彼此接近的测试。创建一个标志,查看日期对,其中方法是1后跟100,或100后跟1,日期差在365天内。
library(tidyverse)
df %>%
pivot_longer(cols = -Number, names_to = c(".value", "group"), names_pattern = "(\w+)_(\d+)$") %>%
mutate(STUDY_DATE = as.Date(STUDY_DATE)) %>%
arrange(Number, STUDY_DATE) %>%
group_by(Number) %>%
mutate(flag = ((c7 == 1 & lead(c7) == 100) | (c7 == 100 & lead(c7) == 1)) & lead(STUDY_DATE) - STUDY_DATE <= 365) %>%
filter(flag == TRUE | lag(flag) == TRUE) %>%
select(Number, c7, STUDY_DATE)
如果这是你的想法,请告诉我。
输出
# A tibble: 69 x 3
# Groups: Number [23]
Number c7 STUDY_DATE
<dbl> <dbl> <date>
1 1 1 2013-07-18
2 1 100 2013-08-30
3 1 1 2019-05-17
4 1 100 2019-08-03
5 2 100 2003-04-14
6 2 1 2003-04-14
7 2 100 2003-04-14
8 4 1 2016-02-18
9 4 100 2016-03-02
10 5 1 2011-04-05
# … with 59 more rows