我有两个带有时间戳的数据集要合并。对于df2
中X
列中的每个值,我希望从df1
中获得Jar
和Treatment
的值。从df1
中,我可以看到在特定时间测量了哪个Jar
以及Treatment
是什么。在df2
中,我可以看到X
在特定时间的值,并且当我看到X
的特定值时,我需要知道测量了什么Jar
(以及哪个Treatment
(。
我已经尝试过full_join
,但由于时间序列中存在间隙,所以这不起作用。例如,在df2
中,在时间戳:2020-12-16 14:31:05处存在X
的值,但该时间戳不存在于df1
中。然而,基于df1,我知道在这个时间戳Jar=Soil_dry
和Treatment=None
。
关于如何制作一个表,让我在df2
中的每个X
值都能在df1
中获得Jar
和Treatment
的值,有什么建议吗?
这是df1:
df1 <- structure(list(Jar = c("Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry",
"Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry",
"Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry",
"Soil_dry", "Soil_dry", "Soil_dry", "soil_wet", "soil_wet", "soil_wet",
"soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet",
"soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet",
"soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet", "Soil_dry",
"Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry",
"Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry", "Soil_dry",
"Soil_dry", "soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet",
"soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet", "soil_wet",
"soil_wet", "soil_wet", "soil_wet", "soil_wet"), Treatment = c("None",
"None", "None", "None", "None", "None", "None", "None", "None",
"None", "None", "None", "None", "None", "None", "None", "None",
"None", "None", "None", "None", "None", "None", "None", "None",
"None", "None", "None", "None", "None", "None", "None", "None",
"None", "None", "None", "None", "None", "None", "ul5", "ul5",
"ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5",
"ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5",
"ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5", "ul5"
), Timestamp = structure(c(1608128877, 1608128933, 1608128997,
1608129058, 1608129063, 1608129112, 1608129117, 1608129122, 1608129127,
1608129238, 1608129243, 1608129348, 1608129353, 1608129358, 1608129363,
1608129368, 1608129373, 1608129473, 1608129478, 1608129483, 1608129488,
1608129598, 1608129603, 1608129717, 1608129723, 1608129837, 1608129842,
1608129957, 1608129962, 1608130072, 1608130077, 1608130082, 1608130192,
1608130197, 1608130202, 1608130318, 1608130323, 1608130418, 1608130423,
1608130428, 1608130492, 1608130497, 1608130502, 1608130507, 1608130612,
1608130617, 1608130622, 1608130627, 1608130732, 1608130737, 1608130742,
1608130747, 1608130853, 1608130858, 1608130863, 1608130978, 1608130983,
1608131093, 1608131098, 1608131103, 1608131213, 1608131218, 1608131223,
1608131337, 1608131343, 1608131457, 1608131462, 1608131467), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -68L), class = "data.frame")
df2:
df2 <-structure(list(X = c(5L, 3L, 34L, 4L, 65L, 5L, 7L, 5L, 8L, 9L,
8L, 5L, 78L, 9L, 5L, 78L, 9L, 5L, 78L, 9L, 5L, 7L, 4L, 34L, 8L,
5L, 4L, 9L, 78L, 59L, 5L, 5L, 6L, 3L, 3L, 7L, 5L, 47L, 2L, 67L,
34L, 76L, 5L, 76L, 5L, 6L, 5L, 7L, 2L, 13L, 1L, 54L, 32L, 4L,
3L, 45L, 1L, 1L), Timestamp = structure(c(1608129065, 1608129122,
1608129127, 1608129238, 1608129263, 1608129288, 1608129353, 1608129358,
1608129363, 1608129368, 1608129373, 1608129473, 1608129478, 1608129483,
1608129488, 1608129598, 1608129663, 1608129717, 1608129723, 1608129831,
1608129842, 1608129957, 1608129962, 1608130072, 1608130073, 1608130082,
1608130132, 1608130197, 1608130202, 1608130318, 1608130323, 1608130418,
1608130423, 1608130428, 1608130492, 1608130497, 1608130502, 1608130507,
1608130612, 1608130617, 1608130622, 1608130627, 1608130732, 1608130737,
1608130742, 1608130747, 1608130853, 1608130858, 1608130863, 1608130978,
1608130983, 1608131093, 1608131098, 1608131103, 1608131213, 1608131218,
1608131223, 1608131337), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), row.names = c(NA, -58L), class = "data.frame")
为什么df3 <- df1 %>% full_join(df2, by = 'Timestamp')
不能产生您想要的结果?
此外,您声明
当我看到X的特定值时,我需要知道测量了什么Jar/Treatment。
在某些情况下,对于给定的X值,您有多个Timestamp值。换句话说,您无法避免为每个X值获得多个Jar/Ttreatment度量。
示例:
时间戳 | Jar | 治疗X.df1X.df2 | ||
---|---|---|---|---|
2020-12-16 15:03:03 | soil_whit | ul5<1>1 | ||
2020年12月16日15:07:03 | 土壤_湿 | ul5<1>1 | ||
2020年12月16日15:08:57 | soil_whit | ul5<1>1 | ||
2020-12-16 14:56:52 | Soil_dry | ul52 | 2 | |
2020年12月16日15:01:03 | 土壤_湿 | ul52 | 2 | |
2020-12-16 14:32:02 | Soil_dry | 无3 | ||
2020-12-16 14:53:48 | Soil_dry | ul5 | 3 | 3 |
2020-12-16 14:54:52 | Soil_dry | ul5 | 3 | 3 |
2020-12-16 15:06:53 | soil_whit | ul5 | 3 | |
2020-12-16 15:11:02 | soil_whit | ul5>3 | 3 |
您可以通过前面指出的merge
或dplyr
:来实现这一点
left_join(df2, df1, "Timestamp")
# X Timestamp Jar Treatment
# 1 34 2020-12-16 14:27:52 <NA> <NA>
# 2 5 2020-12-16 14:28:54 <NA> <NA>
# 3 5 2020-12-16 14:22:57 <NA> <NA>
# 4 24 2020-12-16 14:30:34 <NA> <NA>
# 5 45 2020-12-16 14:31:03 Soil_dry None
# 6 66 2020-12-16 14:31:52 Soil_dry None