我下面有一个例子,我想保留他们的癌症日期(最后一列)发生在10 years or less
(即最多10年)之前的行(个人)。
d <- structure(list(ind = c(1, 2, 3, 4, 5), `inclusion date0` = structure(c(1414800000,NA, NA, NA, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`inclusion date2` = structure(c(NA, 1453075200, 1453075200,NA, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`inclusion dat4` = structure(c(NA, NA, NA, NA, 1544486400),
class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`inclusion date6` = structure(c(NA,NA, NA, 1594425600, NA),
class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`cancer date` = structure(c(1099267200, 965174400, 1294963200,1458086400, 1230854400),
class = c("POSIXct", "POSIXt"), tzone = "UTC")),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L))
d$`inclusion date0` <- as.Date(d$`inclusion date0`)
d$`inclusion date2` <- as.Date(d$`inclusion date2`)
d$`inclusion date2` <- as.Date(d$`inclusion date2`)
d$`inclusion dat4` <- as.Date(d$`inclusion dat4`)
d$`inclusion date6` <- as.Date(d$`inclusion date6`)
d$`cancer date` <- as.Date(d$`cancer date`)
我很感激你的帮助。
library(tidyverse)
library(lubridate)
一些额外的数据清理
d %>%
janitor::clean_names() %>%
as_tibble() %>%
select(ind, cancer_date, everything()) %>%
mutate(across(2:6, ~ as.Date(.x))) %>%
gather(-c(ind, cancer_date), key = "inclusion_id", value = "inclusion_date") %>%
drop_na() %>%
mutate(diff = interval(cancer_date, inclusion_date) %>%
as.numeric('years'))
计算癌症日期与纳入日期的时间差,以年为单位
# A tibble: 5 x 5
ind cancer_date inclusion_id inclusion_date diff
<dbl> <date> <chr> <date> <dbl>
1 1 2004-11-01 inclusion_date0 2014-11-01 10.0
2 2 2000-08-02 inclusion_date2 2016-01-18 15.5
3 3 2011-01-14 inclusion_date2 2016-01-18 5.01
4 5 2009-01-02 inclusion_dat4 2018-12-11 9.94
5 4 2016-03-16 inclusion_date6 2020-07-11 4.32
过滤d %>%
filter(diff <= 10)
# A tibble: 4 x 5
ind cancer_date inclusion_id inclusion_date diff
<dbl> <date> <chr> <date> <dbl>
1 1 2004-11-01 inclusion_date0 2014-11-01 10.0
2 3 2011-01-14 inclusion_date2 2016-01-18 5.01
3 5 2009-01-02 inclusion_dat4 2018-12-11 9.94
4 4 2016-03-16 inclusion_date6 2020-07-11 4.32