r-如果一个元素在组数据中出现至少3次,则裁剪观测值



如果状态连续显示Lost至少3次,或者显示Status == died,否则显示Censor =0,我想创建一个新列作为Censor =1。然后,事件发生在三个Lost状态或died中的第一个。

dfram <-  data.frame(id=c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4, 5,5,6,6,6,6), 
status=c("Alive","Lost", "Lost","Restart","Lost","Alive", "Lost","Lost","Lost","Dropped", "Alive","Alive","Lost","Lost","Restart","Alive","Lost","Lost","Alive","died","Alive","Lost","Lost","Lost"))
id  status
1   1   Alive
2   1    Lost
3   1    Lost
4   1 Restart
5   1    Lost 
6   2   Alive
7   2    Lost   # event, since there are 3 consecutive lost
8   2    Lost
9   2    Lost
10   2 Dropped
11  3   Alive
12  3   Alive
13  3    Lost
14  3    Lost
15  3 Restart
16  4   Alive
17  4    Lost
18  4    Lost
19  5   Alive
20  5    died  # event, death also considered as our event. 
21  6   Alive
22  6    Lost  # event
23  6    Lost
24  6    Lost

预期结果

id  status Censor
1   1   Alive      0
2   1    Lost      0
3   1    Lost      0
4   1 Restart      0
5   1    Lost      0
6   2   Alive      0
7   2    Lost      1
8   3   Alive      0
9   3   Alive      0
10  3    Lost      0
11  3    Lost      0
12  3 Restart      0
13  4   Alive      0
14  4    Lost      0
15  4    Lost      0
16  5   Alive      0
17  5    died      1
18  6   Alive      0
19  6    Lost      1
library(tidyverse)
df <- data.frame(
id=c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4, 5,5,6,6,6,6), 
status=c("Alive","Lost", "Lost","Restart","Lost","Alive", "Lost","Lost","Lost","Dropped", "Alive","Alive","Lost","Lost","Restart","Alive","Lost","Lost","Alive","died","Alive","Lost","Lost","Lost"))
threeLost <- function(x){
v <- c()              # Create an empty vector for data storage
cum <- 0              # An cumulative index start at 0
for (i in x){         # Run through the input vector
if (i == "Lost"){
cum <- cum + 1      # if met "Lost", cumulatively + 1
v <- c(v, cum)      # store the value
}else{
cum <- 0          # if met other conditions, restore to zero
v <- c(v, cum)    # store the value
}
}
return(v)     # therefore, v >= 3 means three continuous "Lost" met
}
df1 <- df %>% 
mutate(index = threeLost(status)) %>% 
mutate(Censor = case_when(index >= 3 | status == "died" ~ 1,
TRUE ~ 0))

df1   

> df1   
id  status index Censor
1   1   Alive     0      0
2   1    Lost     1      0
3   1    Lost     2      0
4   1 Restart     0      0
5   1    Lost     1      0
6   2   Alive     0      0
7   2    Lost     1      0
8   2    Lost     2      0
9   2    Lost     3      1
10  2 Dropped     0      0
11  3   Alive     0      0
12  3   Alive     0      0
13  3    Lost     1      0
14  3    Lost     2      0
15  3 Restart     0      0
16  4   Alive     0      0
17  4    Lost     1      0
18  4    Lost     2      0
19  5   Alive     0      0
20  5    died     0      1
21  6   Alive     0      0
22  6    Lost     1      0
23  6    Lost     2      0
24  6    Lost     3      1

最新更新