我有这个数据帧:
日期 | 事件 | 设备口罩 | ||
---|---|---|---|---|
2021/12/08 | 进入设备 | L1 | a||
2021/12/09 | 解耦 | NA | a||
2021/12/09 | 状态更改 | NA | a | |
2021/12/10 | ||||
2021/12/10 | 企业设备 | L1 | b | |
2021/12/11 | ||||
2021/12/11 | 状态更改 | NA | b | |
2021/12/11 |
假设数据库中的EVENT条目是有序和完整的,如图所示,您可以强制到factor
并返回as.integer
,在这种情况下,EVENT_N
和c(1, 2, 3, 4)
重复";永远";。现在,您可以利用cumsum
,我们在模%% 4
的零值上计算它,即所得TRUE
的累积和。我们需要减去- 1
才能得到正确的滞后。
lv <- c("enterEquipment", "Decouple", "stateChange", "leaveEquipment")
dat <- within(dat, {
EVENT_N <- as.integer(factor(EVENT, levels=lv))
Equipement_Correct <- paste0('L', cumsum(EVENT_N %% 4 - 1 == 0))
rm(EVENT_N) ## removes temporary column
})
dat
# DATE EVENT EQUIPMENT MASK Equipement_Correct
# 1 2021/12/08 enterEquipment L1 a L1
# 2 2021/12/09 Decouple <NA> a L1
# 3 2021/12/09 stateChange <NA> a L1
# 4 2021/12/10 leaveEquipment L1 a L1
# 5 2021/12/10 enterEquipment L2 b L2
# 6 2021/12/11 Decouple <NA> b L2
# 7 2021/12/11 stateChange <NA> b L2
# 8 2021/12/11 leaveEquipment L1 b L2
也许使用K
ey数据帧比使用factor
更快。
K <- data.frame(matrix(c("enterEquipment", "Decouple", "stateChange",
"leaveEquipment", 1:4), ncol=2)) |>
type.convert(as.is=TRUE)
within(dat, {
Equipement_Correct <-
paste0('L', cumsum(K[match(EVENT, K[, 1]), 2] %% 4 - 1 == 0))
})
数据:
dat <- structure(list(DATE = c("2021/12/08", "2021/12/09", "2021/12/09",
"2021/12/10", "2021/12/10", "2021/12/11", "2021/12/11", "2021/12/11"
), EVENT = c("enterEquipment", "Decouple", "stateChange", "leaveEquipment",
"enterEquipment", "Decouple", "stateChange", "leaveEquipment"
), EQUIPMENT = c("L1", NA, NA, "L1", "L2", NA, NA, "L1"), MASK = c("a",
"a", "a", "a", "b", "b", "b", "b"), Equipement_Correct = c("L1",
"L1", NA, "L1", "L1", "L2", NA, "L2")), class = "data.frame", row.names = c(NA,
-8L))
使用这个包'zoo',在那里您可以获得函数na.locf(最后一次观测结转(
install.packages(zoo)
library(zoo)
data$EQUIPMENT <- na.locf(data$EQUIPMENT)
eg: > library(zoo)
> data <- data.frame(New= c("L1",NA,NA,"L2",NA,"L3",NA,NA), old = 1:8)
> data
New old
1 L1 1
2 <NA> 2
3 <NA> 3
4 L2 4
5 <NA> 5
6 L3 6
7 <NA> 7
8 <NA> 8
> data$New <- na.locf(data$New)
> data
New old
1 L1 1
2 L1 2
3 L1 3
4 L2 4
5 L2 5
6 L3 6
7 L3 7
8 L3 8
您也可以将tidyr::fill
用于此目的:
library(dplyr)
library(tidyr)
df %>%
fill(Equipement_Correct, .direction = "down")
DATE EVENT EQUIPMENT MASK Equipement_Correct
1 2021/12/08 enterEquipment L1 a L1
2 2021/12/09 Decouple <NA> a L1
3 2021/12/09 stateChange <NA> a L1
4 2021/12/10 leaveEquipment L1 a L1
5 2021/12/10 enterEquipment L2 b L1
6 2021/12/11 Decouple <NA> b L2
7 2021/12/11 stateChange <NA> b L2
8 2021/12/11 leaveEquipment L1 b L2