我试图返回日期最接近(但早于(特定日期列的列的索引号。
library(dplyr)
library(lubridate)
TimeK <- c("12/31/2017", "12/13/2017", "12/01/2017", "12/01/2017",
"12/05/2017")
Time1 <- c("01/01/2018", "12/03/2017", "11/17/2018", "11/01/2017",
"12/05/2017")
Time2 <- c("12/28/2017", "12/13/2017", "12/01/2017", "10/01/2017",
"09/12/2017")
Time3 <- c("01/08/2018", "12/01/2017", "11/23/2017", "06/01/2017", "12/31/2017")
d <- data.frame(TimeK,Time1,Time2,Time3) %>% mutate_all(funs(mdy))
closest <- function(g) {(which.min(interval(g,d$Timek)<0))}
d$closest_date <- apply(d[,-c(1)], MARGIN = 1, FUN= closest)
在这种情况下,我希望Time1:Time3的每一行中的日期列索引最接近(但早于(时间K。新变量的目标输出将是(2,1,3,1,2(
这里有一个dplyr解决方案:
d = d%>% mutate(rowid=1:n());
d1 = d %>% gather(TimeID,value,-c(rowid,TimeK)) %>% mutate(DateDiff = as.integer(TimeK-value))
d2 = d1 %>% group_by(rowid) %>% filter(DateDiff > 0);
# TimeID
d2 %>% group_by(rowid) %>% arrange(DateDiff) %>% filter(1:n() == 1) %>% arrange(rowid) %>% select(TimeID)
for( i in c(2:4)) {
print(which.min(ifelse( (d[,1] - d[,i]) > 0 , (d[,1] - d[,i]),999999)))
}
也许这段代码可以帮助您我不太明白你提到的目标输出(2,1,3,1,2(,尽管
difftime(d[2,1],d[2,3], units="days")
#Time difference of 0 days