当 R shift() 类型滞后时,数据表中的 ifelse 不起作用



我有一个这样的数据帧:

timestamp           Status
05-01-2020  12:07:08    0
05-01-2020  12:36:05    1
05-01-2020  23:45:02    0
05-01-2020  13:44:33    1
06-01-2020  01:07:08    1
06-01-2020  10:23:05    1
06-01-2020  12:11:08    1
06-01-2020  22:06:12    1
07-01-2020  00:01:05    0
07-01-2020  02:17:09    1
07-01-2020  12:36:05    1
07-01-2020  12:07:08    1
07-01-2020  12:36:05    1
07-01-2020  12:36:05    0
08-01-2020  12:36:05    1
08-01-2020  12:36:05    0
08-01-2020  12:36:05    0
09-01-2020  12:36:05    1
09-01-2020  12:07:08    0
09-01-2020  12:36:05    1
11-01-2020  12:07:08    0
11-01-2020  12:36:05    1

ifelse 中的第一个条件不起作用。是因为我试图在shift函数中使用lag吗?这是我的代码。

df[, difference := ifelse((df$Status == 0 & shift(df$Status,type='lag') == 1) & (as.Date(df$timestamp) !=  shift(as.Date(df$timestamp),type = 'lag')),
as.numeric(df$timestamp - as.POSIXct(paste0(as.Date(timestamp)," ","00:00:00"),tz="UTC"),units='mins'),ifelse((df$Status == 1 & shift(df$Status,type='lead') == 0) & as.Date(df$timestamp) !=  shift(as.Date(df$timestamp),type = 'lead'),as.numeric(as.POSIXct(paste0(as.Date(timestamp)," ","23:59:59"),tz="UTC") - df$timestamp,units='mins'),
as.numeric(shift(df$timestamp,type = 'lead') -  df$timestamp,units='mins')))]

我们可以首先考虑通过指定正确的format来创建"日期"列

library(data.table)
setDT(df)[, date := as.IDate(timestamp, "%m-%d-%Y")]
df[, timestamp := as.POSIXct(timestamp, format = "%m-%d-%Y %H:%M:%S")]

然后,创建ifelsefifelse,确保通过使用fill更改shift中的NA

df[, i1 := Status == 0 & shift(Status, fill = first(Status)) == 1]
df[, i2 := date != shift(date, fill = first(date))]
df[, i3 := Status == 0 & shift(Status, fill = last(Status), type = 'lead') == 0]
df[, i4 := date != shift(date, fill = last(date), type = 'lead')]

,然后使用fifelse/ifelse

df[, difference := fifelse(i1 &i2, 
as.numeric(difftime(timestamp, as.POSIXct(date), units = 'mins')),
fifelse(i3 & i4, 
as.numeric(difftime(as.POSIXct(paste(date, "23:59:59"),tz="UTC"),
timestamp, units = 'mins')), as.numeric(difftime(shift(timestamp, 
type = 'lead', fill = last(timestamp)), timestamp, units = 'mins')))) ]

数据

df <- structure(list(timestamp = c("05-01-2020  12:07:08", "05-01-2020  12:36:05", 
"05-01-2020  23:45:02", "05-01-2020  13:44:33", "06-01-2020  01:07:08", 
"06-01-2020  10:23:05", "06-01-2020  12:11:08", "06-01-2020  22:06:12", 
"07-01-2020  00:01:05", "07-01-2020  02:17:09", "07-01-2020  12:36:05", 
"07-01-2020  12:07:08", "07-01-2020  12:36:05", "07-01-2020  12:36:05", 
"08-01-2020  12:36:05", "08-01-2020  12:36:05", "08-01-2020  12:36:05", 
"09-01-2020  12:36:05", "09-01-2020  12:07:08", "09-01-2020  12:36:05", 
"11-01-2020  12:07:08", "11-01-2020  12:36:05"), Status = c(0L, 
1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 
1L, 0L, 1L, 0L, 1L)), class = "data.frame", row.names = c(NA, 
-22L))

最新更新