r-计算行数,直到达到标准,然后重新开始



我的数据如下:

id = c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
time=c(20,30,1100,40,31,32,33,1005,22,23,1001,24,12,13,14,1002)
test <- data.frame(id,time)

我现在正在尝试对行进行计数,直到time > 1000id分组。到目前为止,我得到了

library(dplyr) 
test %>%    
group_by(id, idx = cumsum(time >= 1000))
%>%    
mutate(trip_count = row_number()) %>%    
ungroup %>%   
select(-idx)

到目前为止,这是有效的,但当time > 1000时,我希望计数更进一步,并在下一列再次从1开始,而不是1。这有可能吗?

由于每组数据中有4行,我们可以使用以下方法:

> test %>% left_join(test %>% filter(time < 1000) %>% group_by(id) %>% mutate(trip_count = row_number())) %>% group_by(id) %>% 
+                                                       mutate(trip_count = replace_na(trip_count, 4))
Joining, by = c("id", "time")
# A tibble: 16 x 3
# Groups:   id [4]
id  time trip_count
<dbl> <dbl>      <dbl>
1     1    20          1
2     1    30          2
3     1    40          3
4     1  1100          4
5     2    31          1
6     2    32          2
7     2    33          3
8     2  1005          4
9     3    22          1
10     3    23          2
11     3    24          3
12     3  1001          4
13     4    12          1
14     4    13          2
15     4    14          3
16     4  1002          4
> 

如果您的数据每组没有4行,可以使用以下方法:

> id = c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,4)
> time=c(20,30,40,1100,31,32,33,1005,22,23,24,1001,12,13,14,15,1002)
> test <- data.frame(id,time)
> test %>% left_join(test %>% filter(time < 1000) %>% group_by(id) %>% mutate(trip_count = row_number())) %>% group_by(id) %>% 
+               mutate(across(trip_count, ~ replace(., is.na(.), n())))
Joining, by = c("id", "time")
# A tibble: 17 x 3
# Groups:   id [4]
id  time trip_count
<dbl> <dbl>      <int>
1     1    20          1
2     1    30          2
3     1    40          3
4     1  1100          4
5     2    31          1
6     2    32          2
7     2    33          3
8     2  1005          4
9     3    22          1
10     3    23          2
11     3    24          3
12     3  1001          4
13     4    12          1
14     4    13          2
15     4    14          3
16     4    15          4
17     4  1002          5
> 

我在第4组增加了一行。

基于OP:共享的新数据

> test %>% 
+   left_join(test %>% group_by(id) %>% filter(row_number() < which(time >= 1000)) %>% 
+   mutate(trip_count = row_number())) %>% 
+   left_join(test %>% group_by(id) %>% filter(row_number() > which(time >= 1000)) %>% mutate(trip_count1 = row_number())) %>% 
+                     mutate(trip_count = coalesce(trip_count, trip_count1)) %>% select(-trip_count1) %>% group_by(id) %>% 
+                       mutate(rowid = row_number()) %>% rowwise() %>%  mutate(trip_count = replace_na(trip_count, rowid)) %>% select(-rowid)
Joining, by = c("id", "time")
Joining, by = c("id", "time")
# A tibble: 16 x 3
# Rowwise:  id
id  time trip_count
<dbl> <dbl>      <int>
1     1    20          1
2     1    30          2
3     1  1100          3
4     1    40          1
5     2    31          1
6     2    32          2
7     2    33          3
8     2  1005          4
9     3    22          1
10     3    23          2
11     3  1001          3
12     3    24          1
13     4    12          1
14     4    13          2
15     4    14          3
16     4  1002          4
> 

您可以使用lag:

library(dplyr)
test %>%    
group_by(id, idx = cumsum(lag(time, default = 0) >= 1000)) %>%    
mutate(trip_count = row_number()) %>%    
ungroup %>%   
select(-idx)

输出:

# A tibble: 16 x 3
id  time trip_count
<dbl> <dbl>      <int>
1     1    20          1
2     1    30          2
3     1    40          3
4     1  1100          4
5     2    31          1
6     2    32          2
7     2    33          3
8     2  1005          4
9     3    22          1
10     3    23          2
11     3    24          3
12     3  1001          4
13     4    12          1
14     4    13          2
15     4    14          3
16     4  1002          4

最新更新