创建基于两列字符串和值的ID,并在r(硬)中更改条件后增加



如何创建一个新列,以便a.(当userID达到值1时,下一行将更改ID或b.(下一行更改userID.

例如:

df_base <- structure(list(
userid    =  c("5465d","5465d","5465d","5465d","6765b","6765b","7995b","7995b","7988b","7778b"), 
sells     =  c(1, 0, 0, 1,0,0,0,1,0,0)), 
.Names=c("userid", "sells"), 
row.names = c("1", "2", "3", "4","5","6","7","8","9","10"), class =("data.frame"))

创建列后

df_base_result <- structure(list(
userid    =  c("5465d","5465d","5465d","5465d","6765b","6765b","7995b","7995b","7988b","7778b"), 
sells     =  c(1, 0, 0, 1,0,0,0,1,0,0),
ids     =  c("ID1","ID2","ID2","ID2","ID3","ID3","ID4","ID4","ID5","ID6")),
.Names=c("userid", "sells","ids"), 
row.names = c("1", "2", "3", "4","5","6","7","8","9","10"), class =("data.frame"))
df_base %>%
group_by(userid) %>%
mutate(a = cumsum(lag(sells,default = 0)))%>%
ungroup() %>%
mutate(ids = paste0('ID',data.table::rleid(userid, a))) %>%
select(-a)

# A tibble: 10 x 3
userid sells ids  
<chr>  <dbl> <chr>
1 5465d      1 ID1  
2 5465d      0 ID2  
3 5465d      0 ID2  
4 5465d      1 ID2  
5 6765b      0 ID3  
6 6765b      0 ID3  
7 7995b      0 ID4  
8 7995b      1 ID4  
9 7988b      0 ID5  
10 7778b      0 ID6  

您可以尝试

x <- 1
res <- c(x)
for (i in 2:nrow(df_base)){
if(df_base$userid[i] != df_base$userid[i-1] | (df_base$sells[i] == 0 && df_base$sells[i-1] ==1)){
x <- x + 1
res <- c(res, x)
} else {
x <- x
res <- c(res, x)
}
}
df_base$ids <- paste0("ID",res)
df_base
userid sells ids
1   5465d     1 ID1
2   5465d     0 ID2
3   5465d     0 ID2
4   5465d     1 ID2
5   6765b     0 ID3
6   6765b     0 ID3
7   7995b     0 ID4
8   7995b     1 ID4
9   7988b     0 ID5
10  7778b     0 ID6

最新更新