我正在尝试复制观察(ID(集群并生成一个新变量来标识 群集唯一 (new_ID(。例如,考虑数据框 df1
df1 <- data.frame(ID=c("1", "1", "1", "2", "2", "3"), sex=c("M", "M", "M", "F", "F", "M"),count=c(4,4,4,3,3,2))
df1
#> ID sex count
#> 1 1 M 4
#> 2 1 M 4
#> 3 1 M 4
#> 4 2 F 3
#> 5 2 F 3
#> 6 3 M 2
df2 <- data.frame(
ID=c("1","1","1","1","1","1","1","1","1","1","1","1","2","2","2","2","2","2","3","3"),
new_ID = c("1","1","1","2","2","2","3","3","3","4","4","4","5","5","6","6","7","7", "8","9"),
sex=c("M","M","M","M","M","M","M","M","M","M","M","M", "F", "F", "F", "F","F", "F","M","M"),
count=c(4,4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,2,2))
df2
#> ID new_ID sex count
#> 1 1 1 M 4
#> 2 1 1 M 4
#> 3 1 1 M 4
#> 4 1 2 M 4
#> 5 1 2 M 4
#> 6 1 2 M 4
#> 7 1 3 M 4
#> 8 1 3 M 4
#> 9 1 3 M 4
#> 10 1 4 M 4
#> 11 1 4 M 4
#> 12 1 4 M 4
#> 13 2 5 F 3
#> 14 2 5 F 3
#> 15 2 6 F 3
#> 16 2 6 F 3
#> 17 2 7 F 3
#> 18 2 7 F 3
#> 19 3 8 M 2
#> 20 3 9 M 2
感谢您提前提供帮助。
如果我理解正确,
library(dplyr)
df1 %>%
tidyr::uncount(count, .remove = FALSE) %>%
group_by(ID) %>%
mutate(new_ID = rep(seq_len(first(count)), each = n()/first(count))) %>%
ungroup() %>%
mutate(new_ID = data.table::rleid(new_ID))
# A tibble: 20 x 4
# ID sex count new_ID
# <chr> <chr> <dbl> <int>
# 1 1 M 4 1
# 2 1 M 4 1
# 3 1 M 4 1
# 4 1 M 4 2
# 5 1 M 4 2
# 6 1 M 4 2
# 7 1 M 4 3
# 8 1 M 4 3
# 9 1 M 4 3
#10 1 M 4 4
#11 1 M 4 4
#12 1 M 4 4
#13 2 F 3 5
#14 2 F 3 5
#15 2 F 3 6
#16 2 F 3 6
#17 2 F 3 7
#18 2 F 3 7
#19 3 M 2 8
#20 3 M 2 9