近似日期合并R-滚动联接/日期差

  • 本文关键字:日期 合并 滚动 r
  • 更新时间 :
  • 英文 :


我试图将两个数据帧合并在一起,但需要近似数据的连接(按日期和键(。例如,在"2000-03-01"上,df2$X1和df2$X2具有需要加入df1的信息,但是df1中没有"2000-03-02">如果不创建新行,我如何近似相关联的df2$X1&将df1上的X2数据写入每个键的"2000-02-01"行?

df1 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
df1$Key <- LETTERS[1:2]
df1$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-02-01", "2000-04-01", "2000-05-01", 
"2000-05-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))
print(df1)
X1 X2 Key       Date
1   4  2   A 2000-01-01
2   3  2   B 2000-02-01
3   2  4   A 2000-02-01
4   3  5   B 2000-04-01
5   1  3   A 2000-05-01
6   2  0   B 2000-05-01
7   1  1   A 2000-07-01
8   5  3   B 2000-08-01
9   2  0   A 2000-09-01
10  5  4   B 2000-10-01
df2 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
df2$Key <- c('A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B')
df2$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-03-01", "2000-04-01", "2000-05-01", 
"2000-06-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))
print(df2)
X1 X2 Key       Date
1   4  5   A 2000-01-01
2   3  2   B 2000-02-01
3   1  5   A 2000-03-01
4   1  0   B 2000-04-01
5   0  4   A 2000-05-01
6   5  5   B 2000-06-01
7   2  2   A 2000-07-01
8   1  0   B 2000-08-01
9   0  4   A 2000-09-01
10  3  2   B 2000-10-01
library(dplyr)
join <- left_join(df1, df2, by = c("Key", "Date")); join
X1.x X2.x Key       Date X1.y X2.y
1     4    2   A 2000-01-01    4    5
2     3    2   B 2000-02-01    3    2
3     2    4   A 2000-02-01   NA   NA
4     3    5   B 2000-04-01    1    0
5     1    3   A 2000-05-01    0    4
6     2    0   B 2000-05-01   NA   NA
7     1    1   A 2000-07-01    2    2
8     5    3   B 2000-08-01    1    0
9     2    0   A 2000-09-01    0    4
10    5    4   B 2000-10-01    3    2
> 
join <- left_join(df1, df2, by = c("Key", "Date")) %>%
filter(abs(difftime(day,day,units = "days"))) <= as.difftime(2, format = "%d", unit = "days")

library(data.table)
as.data.table(df1)[as.data.table(df2), on = "Date", roll = "nearest"]
X1 X2 Key       Date i.X1 i.X2 i.Key
1:  4  2   A 2000-01-01    4    5     A
2:  3  2   B 2000-02-01    3    2     B
3:  2  4   A 2000-02-01    3    2     B
4:  2  4   A 2000-03-01    1    5     A
5:  3  5   B 2000-04-01    1    0     B
6:  1  3   A 2000-05-01    0    4     A
7:  2  0   B 2000-05-01    0    4     A
8:  1  1   A 2000-06-01    5    5     B
9:  1  1   A 2000-07-01    2    2     A
10:  5  3   B 2000-08-01    1    0     B
11:  2  0   A 2000-09-01    0    4     A
12:  5  4   B 2000-10-01    3    2     B

这个怎么样?

df2$FakeDate <- df2$Date[ findInterval(df1$Date, df2$Date) ]
df2
#    X1 X2 Key       Date   FakeDate
# 1   4  3   A 2000-01-01 2000-01-01
# 2   4  0   B 2000-02-01 2000-02-01
# 3   2  5   A 2000-03-01 2000-02-01
# 4   3  1   B 2000-04-01 2000-04-01
# 5   2  5   A 2000-05-01 2000-05-01
# 6   5  3   B 2000-06-01 2000-05-01
# 7   5  0   A 2000-07-01 2000-07-01
# 8   3  0   B 2000-08-01 2000-08-01
# 9   4  5   A 2000-09-01 2000-09-01
# 10  4  0   B 2000-10-01 2000-10-01
df2$Date <- NULL # o/w merge will complain about 'Date' in both
merge(df1, df2, by.x=c("Key","Date"), by.y=c("Key","FakeDate"))
#    Key       Date X1.x X2.x X1.y X2.y
# 1    A 2000-01-01    0    5    4    3
# 2    A 2000-02-01    4    0    2    5
# 3    A 2000-05-01    4    1    2    5
# 4    A 2000-07-01    2    0    5    0
# 5    A 2000-09-01    0    3    4    5
# 6    B 2000-02-01    0    4    4    0
# 7    B 2000-04-01    1    5    3    1
# 8    B 2000-05-01    5    2    5    3
# 9    B 2000-08-01    3    5    3    0
# 10   B 2000-10-01    4    2    4    0

最新更新