使用R在时间序列中的其他变量的基础上创建一个新变量



全部,

我想通过确定传感器是否在设定的时间段(2小时(内产生一个值来量化遥感器的工作时间,这将表明传感器在该时间段内是否正常工作。我的数据帧有一个datetime变量,格式为Y-M-D H-M-S(例如:2020-04-06 09:50:00(,还有一个站点变量(有6个不同的站点(,我想评估它的操作时间。

感谢所有的帮助。

编辑*

这是我的数据头的dput。我不确定我是否应该这样提供。

structure(list(datetime = structure(c(1564618522, 1564618874, 1564618933, 
1564618994, 1564619054, 1564622122), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), fracsec = c(0.75, 0.33, 0.57, 0.1, 
0.07, 0.95), duration = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), tagtype = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
PITnum = c("999000000007426", "985121002397230", "985121002397230", 
"985121002397230", "985121002397230", "999000000007426"), 
consdetc = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_), arrint = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_), site = c("DSDS", 
"DSDS", "DSDS", "DSDS", "DSDS", "DSDS"), manuf = c("Biomark", 
"Biomark", "Biomark", "Biomark", "Biomark", "Biomark"), srcfile = c("C:\Users\jrjohnson\Documents\MoraPIT\julyAllArraysAWformat\dsds\Archive\2020-04-01_DSDS_08092019.txt", 
"C:\Users\jrjohnson\Documents\MoraPIT\julyAllArraysAWformat\dsds\Archive\2020-04-01_DSDS_08092019.txt", 
"C:\Users\jrjohnson\Documents\MoraPIT\julyAllArraysAWformat\dsds\Archive\2020-04-01_DSDS_08092019.txt", 
"C:\Users\jrjohnson\Documents\MoraPIT\julyAllArraysAWformat\dsds\Archive\2020-04-01_DSDS_08092019.txt", 
"C:\Users\jrjohnson\Documents\MoraPIT\julyAllArraysAWformat\dsds\Archive\2020-04-01_DSDS_08092019.txt", 
"C:\Users\jrjohnson\Documents\MoraPIT\julyAllArraysAWformat\dsds\Archive\2020-04-01_DSDS_08092019.txt"
), srcline = 21:26, compdate = structure(c(18353, 18353, 
18353, 18353, 18353, 18353), class = "Date")), spec = structure(list(
cols = list(datetime = structure(list(format = ""), class = 
c("collector_datetime", 
"collector")), fracsec = structure(list(), class = c("collector_double", 
"collector")), duration = structure(list(), class = c("collector_double", 
"collector")), tagtype = structure(list(), class = 
c("collector_character", 
"collector")), PITnum = structure(list(), class = c("collector_character", 
"collector")), consdetc = structure(list(), class = c("collector_integer", 
"collector")), arrint = structure(list(), class = c("collector_integer", 
"collector")), site = structure(list(), class = c("collector_character", 
"collector")), manuf = structure(list(), class = c("collector_character", 
"collector")), srcfile = structure(list(), class = 
c("collector_character", 
"collector")), srcline = structure(list(), class = c("collector_integer", 
"collector")), compdate = structure(list(format = "%Y-%m-%d"), class = 
c("collector_date", 
"collector"))), default = structure(list(), class = c("collector_guess", 
"collector")), skip = 0), class = "col_spec"), row.names = 23803:23808, 
class = "data.frame")

以下是在data.table:中使用:=运算符的方法

样本数据

library(data.table)
time_threshold <- Sys.time() + 180
dat <- data.table(
time = seq.POSIXt(from = Sys.time(), by = 60, length.out = 10),
value = rnorm(n = 10, mean = 10, sd = 2)
)

代码

根据timevalue列添加新变量:

> time_threshold
[1] "2020-04-06 13:08:42 EDT"
> dat
time     value
1: 2020-04-06 13:05:42  8.240336
2: 2020-04-06 13:06:42  9.744952
3: 2020-04-06 13:07:42  6.984802
4: 2020-04-06 13:08:42  8.015951
5: 2020-04-06 13:09:42 13.435096
6: 2020-04-06 13:10:42 10.835025
7: 2020-04-06 13:11:42  7.216484
8: 2020-04-06 13:12:42  9.559917
9: 2020-04-06 13:13:42  8.320369
10: 2020-04-06 13:14:42 13.201530
> dat[ time >= time_threshold & value >= 10, new_variable := 1]
> dat
time     value new_variable
1: 2020-04-06 13:05:42  8.240336           NA
2: 2020-04-06 13:06:42  9.744952           NA
3: 2020-04-06 13:07:42  6.984802           NA
4: 2020-04-06 13:08:42  8.015951           NA
5: 2020-04-06 13:09:42 13.435096            1
6: 2020-04-06 13:10:42 10.835025            1
7: 2020-04-06 13:11:42  7.216484           NA
8: 2020-04-06 13:12:42  9.559917           NA
9: 2020-04-06 13:13:42  8.320369           NA
10: 2020-04-06 13:14:42 13.201530            1

您也可以将mutate选项与dplyr一起查看。

最新更新