我尝试过滤组id并在第一次观察性别后将其删除==2)。数据看起来像
data<- data.frame( id= c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,3 ,3,3,4,4,4), sex=c(1,1,2,2,1,1,1,2,2,2,1,1,2,1,1,2,1,2,2))
data
id sex
1 1
1 1
1 2
1 2
2 1
2 1
2 1
2 2
2 2
2 2
3 1
3 1
3 2
3 1
3 1
3 2
4 1
4 2
4 2
期望输出
id sex
1 1
1 1
1 2
2 1
2 1
2 1
2 2
3 1
3 1
3 2
3 1
3 1
3 2
4 1
4 2
我试着
library(dplyr)
data1 <- data %>% filter(type == 1 ) & silec(2))
但是我得到了一个错误。有人能帮忙吗?
数据
data<- data.frame( id= c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,3 ,3,3,4,4,4), sex=c(1,1,2,2,1,1,1,2,2,2,1,1,2,1,1,2,1,2,2))
data %>%
#Grouping by id
group_by(id) %>%
#Filter sex = 1 or the first time sex was equal 2
filter( sex == 1 | (cumsum(sex == 2) == 1))
输出# A tibble: 14 x 2
# Groups: id [4]
id sex
<dbl> <dbl>
1 1 1
2 1 1
3 1 2
4 2 1
5 2 1
6 2 1
7 2 2
8 3 1
9 3 1
10 3 2
11 3 1
12 3 1
13 4 1
14 4 2
data %>%
#Grouping by id
group_by(id) %>%
#Filter sex = 1 or the first time sex was equal 2
filter( sex == 1 | (cumsum(sex == 2) == 1))
# A tibble: 14 x 2
# Groups: id [4]
id sex
<dbl> <dbl>
1 1 1
2 1 1
3 1 2
4 2 1
5 2 1
6 2 1
7 2 2
8 3 1
9 3 1
10 3 2
11 3 1
12 3 1
13 4 1
14 4 2
您可以在每个组中创建一组连续出现的1和2。从每一组中选择行,直到遇到其中的前2行。
library(dplyr)
library(data.table)
data %>%
group_by(id, grp = ceiling(rleid(sex)/2)) %>%
slice(seq_len(match(2, sex))) %>%
ungroup
select(-grp)
# id sex
# <dbl> <dbl>
# 1 1 1
# 2 1 1
# 3 1 2
# 4 2 1
# 5 2 1
# 6 2 1
# 7 2 2
# 8 3 1
# 9 3 1
#10 3 2
#11 3 1
#12 3 1
#13 3 2
#14 4 1
#15 4 2