我有以下数据:
df<-structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2), day = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), x1 = c(15, 15, 15.2, 15.2,
15.3, 15.2, 15.3, 15, 15, 15.2, 15.3, 12, 12.1, 12.3, 12.2, 12,
12.4, 12.5, 12.4, 12.6, 12.7), x2 = c(1, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -21L))
我想生成一个变量,指示x2中从1到0的变化,但前提是以下4行保持为0(按ID(。如x2从1到0的变化首次出现至少4天。要在此数据中生成变量:
df2<-structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2), day = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), x1 = c(15, 15, 15.2, 15.2,
15.3, 15.2, 15.3, 15, 15, 15.2, 15.3, 12, 12.1, 12.3, 12.2, 12,
12.4, 12.5, 12.4, 12.6, 12.7), x2 = c(1, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1), x3 = c(0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -21L))
其中,当x2停止至少4天时,无论是否再次出现,x3从第一次出现起获得值1
我想有一种方法可以在dplyr中使用滞后或超前函数,但我不确定如何对"至少4天"条件进行编程。有什么建议吗?
我们可以使用zoo::rollapply
进行滚动窗口计算。
fun <- function(z) +(length(z) == 6 && z[1] == 1 && z[2] == 0 && all(z[-(1:2)] == 0))
df %>%
group_by(ID) %>%
mutate(x3a = cummax(zoo::rollapply(lead(x2), 6, fun, fill = 0))) %>%
ungroup()
# # A tibble: 21 x 6
# ID day x1 x2 x3 x3a
# <dbl> <dbl> <dbl> <dbl> <dbl> <int>
# 1 1 1 15 1 0 0
# 2 1 2 15 1 0 0
# 3 1 3 15.2 0 0 0
# 4 1 4 15.2 0 0 0
# 5 1 5 15.3 0 0 0
# 6 1 6 15.2 1 0 0
# 7 1 7 15.3 0 1 1
# 8 1 8 15 0 1 1
# 9 1 9 15 0 1 1
# 10 1 10 15.2 0 1 1
# # ... with 11 more rows
tidyverse解决方案(也(可以如下所示:
library(dplyr)
library(tidyr)
df %>%
group_by(ID) %>%
mutate(grp = cumsum(x2)) %>%
group_by(ID, grp) %>%
mutate(fourOrMore = n() > 4,
x3 = + lag(fourOrMore),
x3 = replace_na(x3, 0)) %>%
ungroup() %>%
select(- c("grp", "fourOrMore"))
# # A tibble: 21 × 5
# ID day x1 x2 x3
# <dbl> <dbl> <dbl> <dbl> <int>
# 1 1 1 15 1 0
# 2 1 2 15 1 0
# 3 1 3 15.2 0 0
# 4 1 4 15.2 0 0
# 5 1 5 15.3 0 0
# 6 1 6 15.2 1 0
# 7 1 7 15.3 0 1
# 8 1 8 15 0 1
# 9 1 9 15 0 1
# 10 1 10 15.2 0 1
# # … with 11 more rows