如何基于r中的多个变量创建UID



如何基于col1和col2为col3创建一个唯一的UID

#dataframe
col1 <- c("AP", "AP", "AP", "AP", "AP", "POL")
col2 <- c("M", "M", "M", "F", "F", "M")
col3 <- c("16", "16", "16", "16", "16", "16")
df <- data.frame(col1, col2, col3)
df
#dataframe I want (kinda of)
col1 <- c("AP", "AP", "AP", "AP", "AP", "POL")
col2 <- c("M", "M", "M", "F", "F", "M")
col3 <- c("16", "16_1", "16_2", "16", "16_1", "16")
df1 <- data.frame(col1, col2, col3)
df1

base R

df$newcol <- ave(df$col3, df[c("col1", "col2")], FUN = seq_along)
df
#   col1 col2 col3 newcol
# 1   AP    M   16      1
# 2   AP    M   16      2
# 3   AP    M   16      3
# 4   AP    F   16      1
# 5   AP    F   16      2
# 6  POL    M   16      1

dplyr

library(dplyr)
df %>%
group_by(col1, col2) %>%
mutate(newcol = row_number()) %>%
ungroup()
# # A tibble: 6 x 4
#   col1  col2  col3  newcol
#   <chr> <chr> <chr>  <int>
# 1 AP    M     16         1
# 2 AP    M     16         2
# 3 AP    M     16         3
# 4 AP    F     16         1
# 5 AP    F     16         2
# 6 POL   M     16         1

data.table

library(data.table)
as.data.table(df)[, newcol := seq(.N), by = rleid(col1, col2)][]
#      col1   col2   col3 newcol
#    <char> <char> <char>  <int>
# 1:     AP      M     16      1
# 2:     AP      M     16      2
# 3:     AP      M     16      3
# 4:     AP      F     16      1
# 5:     AP      F     16      2
# 6:    POL      M     16      1

相关内容

  • 没有找到相关文章

最新更新