我有一个数据帧"df",它有三列。第三列包含一系列随机分配的数字(1到X(,这些数字经常重复多次。即使该列包含一组随机分配的数字,它们也会从小到大排序,以使这更容易。此外,Site和Date有多个条目,其中Site和Date的每个组合都有随机选择的数字(1到X(。df目前看起来是这样的(出于太空目的而缩短(:
站点 | 日期 | 分钟|
---|---|---|
BMA | 44648 | 4|
BMA | 44648 | 4[/tr>|
BMA | 44648 | 4[/tr>|
BMA | 44648 | 4[/tr>|
BMA | 44648 | 4[/tr>|
BMA | 44648 | 13 |
BMA | 44648 | 13 |
BMA | 44648 | 13 |
BMA | 44648 | 27 | [/tr>
BMA | 44648 | 27 | [/tr>
BMA | 44648 | 27 | [/tr>
BMA | 44648 | 27 | [/tr>
BMA | 44648 | 27 | [/tr>
BMA | 44773 | 2 |
BMA | 44773 | 2 |
BMA | 44773 | 2 |
BMA | 44773 | 2 |
BMA | 44773 | 2 |
BMA | 44773 | 12 | [/tr>
BMA | 44773 | 12 | [/tr>
BMA | 44773 | 12 | [/tr>
BMC | 44648 | 3 |
BMC | 44648 | 3 |
BMC | 44648 | 3 |
BMC | 44648 | 3 |
BMC | 44648 | 3 |
BMC | 44648 | 3 |
BMC | 44648 | 44 |
BMC | 44648 | 44 |
BMC | 44648 | 44 |
BMC | 44648 | 44 |
BMC | 44648 | 60 |
BMC | 44648 | 60 |
BMC | 44648 | 60 |
BMC | 44648 | 60 |
基R
ave(dat$Minute, dat[c("Site","Date")], FUN = function(z) match(z, sort(unique(z))))
# [1] 1 1 1 1 1 2 2 2 3 3 3 3 3 1 1 1 1 1 2 2 2 1 1 1 1 1 1 2 2 2 2 3 3 3 3
dat$NewMinute <- ave(dat$Minute, dat[c("Site","Date")], FUN = function(z) match(z, sort(unique(z))))
identical(dat, dat2) # dat2 is expected output from the OP
# [1] TRUE
dplyr
library(dplyr)
dat %>%
group_by(Site, Date) %>%
mutate(NewMinute = match(Minute, sort(unique(Minute)))) %>%
ungroup()
# # A tibble: 35 x 4
# Site Date Minute NewMinute
# <chr> <int> <int> <int>
# 1 BMA 44648 4 1
# 2 BMA 44648 4 1
# 3 BMA 44648 4 1
# 4 BMA 44648 4 1
# 5 BMA 44648 4 1
# 6 BMA 44648 13 2
# 7 BMA 44648 13 2
# 8 BMA 44648 13 2
# 9 BMA 44648 27 3
# 10 BMA 44648 27 3
# # ... with 25 more rows
数据
dat <- structure(list(Site = c("BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC"), Date = c(44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44773L, 44773L, 44773L, 44773L, 44773L, 44773L, 44773L, 44773L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L), Minute = c(4L, 4L, 4L, 4L, 4L, 13L, 13L, 13L, 27L, 27L, 27L, 27L, 27L, 2L, 2L, 2L, 2L, 2L, 12L, 12L, 12L, 3L, 3L, 3L, 3L, 3L, 3L, 44L, 44L, 44L, 44L, 60L, 60L, 60L, 60L)), row.names = c(NA, -35L), class = "data.frame")
dat2 <- structure(list(Site = c("BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMA", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC", "BMC"), Date = c(44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44773L, 44773L, 44773L, 44773L, 44773L, 44773L, 44773L, 44773L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L, 44648L), Minute = c(4L, 4L, 4L, 4L, 4L, 13L, 13L, 13L, 27L, 27L, 27L, 27L, 27L, 2L, 2L, 2L, 2L, 2L, 12L, 12L, 12L, 3L, 3L, 3L, 3L, 3L, 3L, 44L, 44L, 44L, 44L, 60L, 60L, 60L, 60L), NewMinute = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -35L))