在不使用循环的情况下替换dataFrame的Na值



我有这个数据帧:

设备aa
日期 事件口罩
2021/12/08 进入设备 L1
2021/12/09 解耦 NA
2021/12/09 状态更改 NA a
2021/12/10
2021/12/10 企业设备 L1 b
2021/12/11
2021/12/11 状态更改 NA b
2021/12/11

假设数据库中的EVENT条目是有序和完整的,如图所示,您可以强制到factor并返回as.integer,在这种情况下,EVENT_Nc(1, 2, 3, 4)重复";永远";。现在,您可以利用cumsum,我们在模%% 4的零值上计算它,即所得TRUE的累积和。我们需要减去- 1才能得到正确的滞后。

lv <- c("enterEquipment", "Decouple", "stateChange", "leaveEquipment")
dat <- within(dat, {
EVENT_N <- as.integer(factor(EVENT, levels=lv))
Equipement_Correct <- paste0('L', cumsum(EVENT_N %% 4 - 1 == 0))
rm(EVENT_N)  ## removes temporary column
})
dat
#         DATE          EVENT EQUIPMENT MASK Equipement_Correct
# 1 2021/12/08 enterEquipment        L1    a                 L1
# 2 2021/12/09       Decouple      <NA>    a                 L1
# 3 2021/12/09    stateChange      <NA>    a                 L1
# 4 2021/12/10 leaveEquipment        L1    a                 L1
# 5 2021/12/10 enterEquipment        L2    b                 L2
# 6 2021/12/11       Decouple      <NA>    b                 L2
# 7 2021/12/11    stateChange      <NA>    b                 L2
# 8 2021/12/11 leaveEquipment        L1    b                 L2

也许使用Key数据帧比使用factor更快。

K <- data.frame(matrix(c("enterEquipment", "Decouple", "stateChange", 
"leaveEquipment", 1:4), ncol=2)) |> 
type.convert(as.is=TRUE)
within(dat, {
Equipement_Correct <- 
paste0('L', cumsum(K[match(EVENT, K[, 1]), 2] %% 4 - 1 == 0))
})

数据:

dat <- structure(list(DATE = c("2021/12/08", "2021/12/09", "2021/12/09", 
"2021/12/10", "2021/12/10", "2021/12/11", "2021/12/11", "2021/12/11"
), EVENT = c("enterEquipment", "Decouple", "stateChange", "leaveEquipment", 
"enterEquipment", "Decouple", "stateChange", "leaveEquipment"
), EQUIPMENT = c("L1", NA, NA, "L1", "L2", NA, NA, "L1"), MASK = c("a", 
"a", "a", "a", "b", "b", "b", "b"), Equipement_Correct = c("L1", 
"L1", NA, "L1", "L1", "L2", NA, "L2")), class = "data.frame", row.names = c(NA, 
-8L))

使用这个包'zoo',在那里您可以获得函数na.locf(最后一次观测结转(

install.packages(zoo)
library(zoo)
data$EQUIPMENT <- na.locf(data$EQUIPMENT)
eg: > library(zoo)
> data <- data.frame(New= c("L1",NA,NA,"L2",NA,"L3",NA,NA), old = 1:8)
> data
New old
1   L1   1
2 <NA>   2
3 <NA>   3
4   L2   4
5 <NA>   5
6   L3   6
7 <NA>   7
8 <NA>   8
> data$New <- na.locf(data$New)
> data
New old
1  L1   1
2  L1   2
3  L1   3
4  L2   4
5  L2   5
6  L3   6
7  L3   7
8  L3   8

您也可以将tidyr::fill用于此目的:

library(dplyr)
library(tidyr)
df %>%
fill(Equipement_Correct, .direction = "down")
DATE          EVENT EQUIPMENT MASK Equipement_Correct
1 2021/12/08 enterEquipment        L1    a                 L1
2 2021/12/09       Decouple      <NA>    a                 L1
3 2021/12/09    stateChange      <NA>    a                 L1
4 2021/12/10 leaveEquipment        L1    a                 L1
5 2021/12/10 enterEquipment        L2    b                 L1
6 2021/12/11       Decouple      <NA>    b                 L2
7 2021/12/11    stateChange      <NA>    b                 L2
8 2021/12/11 leaveEquipment        L1    b                 L2

最新更新