如何根据一系列递增的非连续数字向 R 中的数据框添加唯一 ID



>我有一个这样的数据帧:

ID  Ixs Ixe Lem1 Lem2
1   0   1   S-      S-  
2   1   2   P       P   
3   0   1   T       t   
4   1   2   1       1   
5   0   1   W       w   
6   1   2   Na      Nadd
7   3   4   Cze     Czec
8   5   6   Abch    Ab
9   7   8   Gr      grn
10  9   10  Grs     grs
11  0   1   Cz      Czc

Ixs列中的每个数字序列,从 0 开始,属于同一个观测值。序列是增量的,但不是连续的。我需要有一个带有 ID 计数器的额外列,只要Ixs中有 0,它就会分配连续的 ID:

ID  Ixs Ixe Lem1 Lem2  SeqID
1   0   1   S-      S-   1  
2   1   2   P       P    1  
3   0   1   T       t    2  
4   1   2   1       1    2
5   0   1   W       w    3  
6   1   2   Na      Nadd 3 
7   3   4   Cze     Czec 3
8   5   6   Abch    Ab   3
9   7   8   Gr      grn  3
10  9   10  Grs     grs  3
11  0   1   Cz      Czc  4

我见过的所有添加整数序列的解决方案都需要一些我没有的分组变量。提前感谢您的帮助

您可以使用基 R 中的findInterval()

df <- within(df,SeqID <- findInterval(seq(nrow(df)),which(Ixs==0)))

这样

> df
ID Ixs Ixe Lem1 Lem2 SeqID
1   1   0   1   S-   S-     1
2   2   1   2    P    P     1
3   3   0   1    T    t     2
4   4   1   2    1    1     2
5   5   0   1    W    w     3
6   6   1   2   Na Nadd     3
7   7   3   4  Cze Czec     3
8   8   5   6 Abch   Ab     3
9   9   7   8   Gr  grn     3
10 10   9  10  Grs  grs     3
11 11   0   1   Cz  Czc     4

数据

df <- structure(list(ID = 1:11, Ixs = c(0L, 1L, 0L, 1L, 0L, 1L, 3L, 
5L, 7L, 9L, 0L), Ixe = c(1L, 2L, 1L, 2L, 1L, 2L, 4L, 6L, 8L, 
10L, 1L), Lem1 = c("S-", "P", "T", "1", "W", "Na", "Cze", "Abch", 
"Gr", "Grs", "Cz"), Lem2 = c("S-", "P", "t", "1", "w", "Nadd", 
"Czec", "Ab", "grn", "grs", "Czc")), class = "data.frame", row.names = c(NA, 
-11L))

我们可以在逻辑向量上使用cumsumbase R

df1$SeqID <- cumsum(df1$Ixs == 0)
df1$SeqID
#[1] 1 1 2 2 3 3 3 3 3 3 4

数据

df1 <- structure(list(ID = 1:11, Ixs = c(0L, 1L, 0L, 1L, 0L, 1L, 3L, 
5L, 7L, 9L, 0L), Ixe = c(1L, 2L, 1L, 2L, 1L, 2L, 4L, 6L, 8L, 
10L, 1L), Lem1 = c("S-", "P", "T", "1", "W", "Na", "Cze", "Abch", 
"Gr", "Grs", "Cz"), Lem2 = c("S-", "P", "t", "1", "w", "Nadd", 
"Czec", "Ab", "grn", "grs", "Czc")), class = "data.frame", row.names = c(NA, 
-11L))

这是使用data.tablezoo的一种方法:

library(data.table)
library(zoo)
setDT(df)[Ixs == 0, SeqID := 1:.N][, SeqID := na.locf(SeqID, na.rm = F)]
#    ID Ixs Ixe Lem1 Lem2 SeqID
#1:  1   0   1   S-   S-     1
#2:  2   1   2    P    P     1
#3:  3   0   1    T    t     2
#4:  4   1   2    1    1     2
#5:  5   0   1    W    w     3
#6:  6   1   2   Na Nadd     3
#7:  7   3   4  Cze Czec     3
#8:  8   5   6 Abch   Ab     3
#9:  9   7   8   Gr  grn     3
#10: 10  9  10  Grs  grs     3
#11: 11  0   1   Cz  Czc     4

数据

df <- structure(list(ID = 1:11, Ixs = c(0L, 1L, 0L, 1L, 0L, 1L, 3L, 
5L, 7L, 9L, 0L), Ixe = c(1L, 2L, 1L, 2L, 1L, 2L, 4L, 6L, 8L, 
10L, 1L), Lem1 = structure(c(9L, 8L, 10L, 1L, 11L, 7L, 4L, 2L, 
                  5L, 6L, 3L), .Label = c("1", "Abch", "Cz", "Cze", "Gr", "Grs", 
                                          "Na", "P", "S-", "T", "W"), class = "factor"), Lem2 = structure(c(9L, 
                                                                                                            8L, 10L, 1L, 11L, 7L, 4L, 2L, 5L, 6L, 3L), .Label = c("1", "Ab", 
                                                                                                                                                                  "Czc", "Czec", "grn", "grs", "Nadd", "P", "S-", "t", "w"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                     -11L))

最新更新