我得到了一个大数据文件(x(,其中包含数据帧、频率、文件名、标头和缺失等属性。数据帧(data(有4列(日期、X、Y和Z(,包含超过2000万个数据点。我想根据日期间隔将数据帧拆分为一个新的日期帧。例如,2000万个数据点的范围从2016-09-28 09:30:00到2016-10-06 20:30:00,我只想保留日期在2016-09-30 08:00:00到2016-10-02 20:15:00之间的数据点X、Y和Z。我用以下代码尝试了一下:
这是为了创建一个只包含数据而不包含其他属性的新数据帧
df1 = x$data
as.POSIXct(df1$time[1],format="%Y-%m-%dT%H:%M")
然后我尝试创建一个日期间隔,其中新的df2应该只包含该日期区域内的值
df2 <- df1[df1$time > "2016-09-30 09:30:00" & df1$time <= "2016-10-03 20:15:00",]
但是,新的数据帧不是从09:30:01开始,而是在20:14:59结束。所以问题是,我如何才能更改代码,使我处于这个间隔之间。
structure(list(time = structure(c(1475220600.03333, 1475220600.06667,
1475220600.1, 1475220600.13333, 1475220600.16667, 1475220600.2,
1475220600.23333, 1475220600.26667, 1475220600.3, 1475220600.33333,
1475220600.36667, 1475220600.4, 1475220600.43333, 1475220600.46667,
1475220600.5, 1475220600.53333, 1475220600.56667, 1475220600.6,
1475220600.63333, 1475220600.66667, 1475220600.7, 1475220600.73333,
1475220600.76667, 1475220600.8, 1475220600.83333, 1475220600.86667,
1475220600.9, 1475220600.93333, 1475220600.96667, 1475220601,
1475220601.03333, 1475220601.06667, 1475220601.1, 1475220601.13333,
1475220601.16667, 1475220601.2, 1475220601.23333, 1475220601.26667,
1475220601.3, 1475220601.33333), class = c("POSIXct", "POSIXt"
), tzone = "GMT"), X = c(0.039, 0.043, 0.043, 0.043, 0.039, 0.043,
0.035, 0.039, 0.039, 0.043, 0.039, 0.039, 0.043, 0.043, 0.035,
0.043, 0.035, 0.043, 0.035, 0.039, 0.039, 0.043, 0.043, 0.039,
0.035, 0.035, 0.039, 0.039, 0.039, 0.031, 0.035, 0.035, 0.035,
0.039, 0.035, 0.035, 0.039, 0.035, 0.039, 0.043), Y = c(0.016,
0.012, 0.012, 0.02, 0.016, 0.02, 0.016, 0.012, 0.012, 0.02, 0.012,
0.016, 0.012, 0.016, 0.02, 0.02, 0.012, 0.02, 0.016, 0.012, 0.02,
0.012, 0.02, 0.023, 0.016, 0.016, 0.016, 0.02, 0.016, 0.012,
0.016, 0.012, 0.016, 0.012, 0.016, 0.016, 0.02, 0.016, 0.012,
0.012), Z = c(-0.977, -0.977, -0.969, -0.977, -0.969, -0.969,
-0.977, -0.969, -0.973, -0.965, -0.973, -0.977, -0.977, -0.973,
-0.969, -0.977, -0.973, -0.973, -0.973, -0.977, -0.973, -0.969,
-0.969, -0.969, -0.973, -0.969, -0.969, -0.973, -0.973, -0.977,
-0.973, -0.969, -0.973, -0.973, -0.973, -0.977, -0.973, -0.977,
-0.973, -0.973)), subject_name = "1", time_zone = "02:00:00", missingness = structure(list(
time = structure(c(1475747248, 1475747249, 1475747250, 1475747292,
1475747293, 1475747294), class = c("POSIXct", "POSIXt"), tzone = "GMT"),
n_missing = c(30L, 30L, 1230L, 30L, 30L, 32490L)), class = "data.frame", row.names = c("1475747248",
"1475747249", "1475747250", "1475747292", "1475747293", "1475747294"
)), old_version = FALSE, firmware = "1.5.0", last_sample_time = structure(1475748377, tzone = "GMT", class = c("POSIXct",
"POSIXt")), serial_prefix = "TAS", sample_rate = 30L, acceleration_min = "-8.0", acceleration_max = "8.0", header = structure(list(
Field = c("Serial Number", "Device Type", "Firmware", "Battery Voltage",
"Sample Rate", "Start Date", "Stop Date", "Last Sample Time",
"TimeZone", "Download Date", "Board Revision", "Unexpected Resets",
"Acceleration Scale", "Acceleration Min", "Acceleration Max",
"Mass", "Age", "Limb", "DateOfBirth", "Subject Name", "Serial Prefix"
), Value = c(`Serial Number` = "TAS1E44150325", `Device Type` = "Link",
Firmware = "1.5.0", `Battery Voltage` = "3,88", `Sample Rate` = "30",
`Start Date` = "2016-09-28 08:00:00", `Stop Date` = "2016-10-07 17:00:00",
`Last Sample Time` = "2016-10-06 10:06:17", TimeZone = "02:00:00",
`Download Date` = "2016-10-06 10:06:17", `Board Revision` = "5",
`Unexpected Resets` = "0", `Acceleration Scale` = "256",
`Acceleration Min` = "-8.0", `Acceleration Max` = "8.0",
Mass = "77,1107028999572", Age = "22", Limb = "Waist", DateOfBirth = "627890912111111100",
`Subject Name` = "1", `Serial Prefix` = "TAS")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -21L)), start_time = structure(1475049600, tzone = "GMT", class = c("POSIXct",
"POSIXt")), stop_time = structure(1475859600, tzone = "GMT", class = c("POSIXct",
"POSIXt")), total_records = 20930820L, bad_samples = FALSE, row.names = 5130002:14067001, class = c("activity_df",
"activity_df", "data.frame"), n_head = 40)
要扩展我的评论。。。
您的测试数据框架不包括您指定的日期范围内的任何数据;嗯,它仍然不起作用"信息量不大。因此,我将用一些人工数据进行演示。
library(tidyverse)
library(lubridate)
df1 <- tibble(
time=seq.POSIXt(
ymd_hms("2016-09-30 07:30:00"),
ymd_hms("2016-10-06 20:30:00"),
120
)
)
df1
# A tibble: 4,711 × 1
time
<dttm>
1 2016-09-30 07:30:00
2 2016-09-30 07:32:00
3 2016-09-30 07:34:00
4 2016-09-30 07:36:00
5 2016-09-30 07:38:00
6 2016-09-30 07:40:00
7 2016-09-30 07:42:00
8 2016-09-30 07:44:00
9 2016-09-30 07:46:00
10 2016-09-30 07:48:00
# … with 4,701 more rows
因此,在2016-09-30 07:30:00
和2016-10-06 20:30:00
之间,每两分钟有4711行的日期时间。
根据需要进行过滤。
df1 %>%
filter(
time > ymd_hms("2016-09-30 09:30:00") &
time < ymd_hms("2016-10-03 20:15:00")
)
# A tibble: 2,482 × 1
time
<dttm>
1 2016-09-30 09:32:00
2 2016-09-30 09:34:00
3 2016-09-30 09:36:00
4 2016-09-30 09:38:00
5 2016-09-30 09:40:00
6 2016-09-30 09:42:00
7 2016-09-30 09:44:00
8 2016-09-30 09:46:00
9 2016-09-30 09:48:00
10 2016-09-30 09:50:00
# … with 2,472 more rows