如何基于col1和col2为col3创建一个唯一的UID
#dataframe
col1 <- c("AP", "AP", "AP", "AP", "AP", "POL")
col2 <- c("M", "M", "M", "F", "F", "M")
col3 <- c("16", "16", "16", "16", "16", "16")
df <- data.frame(col1, col2, col3)
df
#dataframe I want (kinda of)
col1 <- c("AP", "AP", "AP", "AP", "AP", "POL")
col2 <- c("M", "M", "M", "F", "F", "M")
col3 <- c("16", "16_1", "16_2", "16", "16_1", "16")
df1 <- data.frame(col1, col2, col3)
df1
base R
df$newcol <- ave(df$col3, df[c("col1", "col2")], FUN = seq_along)
df
# col1 col2 col3 newcol
# 1 AP M 16 1
# 2 AP M 16 2
# 3 AP M 16 3
# 4 AP F 16 1
# 5 AP F 16 2
# 6 POL M 16 1
dplyr
library(dplyr)
df %>%
group_by(col1, col2) %>%
mutate(newcol = row_number()) %>%
ungroup()
# # A tibble: 6 x 4
# col1 col2 col3 newcol
# <chr> <chr> <chr> <int>
# 1 AP M 16 1
# 2 AP M 16 2
# 3 AP M 16 3
# 4 AP F 16 1
# 5 AP F 16 2
# 6 POL M 16 1
data.table
library(data.table)
as.data.table(df)[, newcol := seq(.N), by = rleid(col1, col2)][]
# col1 col2 col3 newcol
# <char> <char> <char> <int>
# 1: AP M 16 1
# 2: AP M 16 2
# 3: AP M 16 3
# 4: AP F 16 1
# 5: AP F 16 2
# 6: POL M 16 1