问题的答案"R根据参考列的最小值或最大值创建包含另一列值的列"做我想要的,但它不工作时,参考列有一个以上的数字或日期。
library(dplyr)
df <- data.frame(ID = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
Value = c(34, 54, 17, 2, 25, 89, 23, 93, 90),
Int1 = rep(1:3, 3),
Int2 = rep(11:13, 3),
Dbl1 = rep(c(1.1, 2.2, 3.3), 3),
Dbl2 = rep(c(11.1, 12.2, 13.3), 3),
Date = as.Date(c("2001-01-01", "2002-02-02", "2003-03-03", "2001-01-01", "2002-02-02", "2003-03-03", "2001-01-01", "2002-02-02", "2003-03-03")))
df
ID Value Int1 Int2 Dbl1 Dbl2 Date
1 1 34 1 11 1.1 11.1 2001-01-01
2 1 54 2 12 2.2 12.2 2002-02-02
3 1 17 3 13 3.3 13.3 2003-03-03
4 2 2 1 11 1.1 11.1 2001-01-01
5 2 25 2 12 2.2 12.2 2002-02-02
6 2 89 3 13 3.3 13.3 2003-03-03
7 3 23 1 11 1.1 11.1 2001-01-01
8 3 93 2 12 2.2 12.2 2002-02-02
9 3 90 3 13 3.3 13.3 2003-03-03
df %>%
group_by(ID) %>%
mutate(.Int1 = Value[min(Int1)],
.Int2 = Value[min(Int2)],
.Dbl1 = Value[min(Dbl1)],
.Dbl2 = Value[min(Dbl2)],
.Date = Value[min(Date)]) %>%
ungroup
# A tibble: 9 × 12
ID Value Int1 Int2 Dbl1 Dbl2 Date .Int1 .Int2 .Dbl1 .Dbl2 .Date
<dbl> <dbl> <int> <int> <dbl> <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 34 1 11 1.1 11.1 2001-01-01 34 NA 34 NA NA
2 1 54 2 12 2.2 12.2 2002-02-02 34 NA 34 NA NA
3 1 17 3 13 3.3 13.3 2003-03-03 34 NA 34 NA NA
4 2 2 1 11 1.1 11.1 2001-01-01 2 NA 2 NA NA
5 2 25 2 12 2.2 12.2 2002-02-02 2 NA 2 NA NA
6 2 89 3 13 3.3 13.3 2003-03-03 2 NA 2 NA NA
7 3 23 1 11 1.1 11.1 2001-01-01 23 NA 23 NA NA
8 3 93 2 12 2.2 12.2 2002-02-02 23 NA 23 NA NA
9 3 90 3 13 3.3 13.3 2003-03-03 23 NA 23 NA NA
我需要做什么才能使。int2,。dbl2和。date中的结果不是NA,并且与。int1和。dbl1相同?
这不是我们需要索引的min
最大值,它将是which.min
的位置索引,否则,ID 1的min(Int2)
将是11
,但ID 1只有3行。
df %>%
group_by(ID) %>%
mutate(.Int1 = Value[which.min(Int1)],
.Int2 = Value[which.min(Int2)],
.Dbl1 = Value[which.min(Dbl1)],
.Dbl2 = Value[which.min(Dbl2)],
.Date = Value[which.min(Date)]) %>%
ungroup
与产出
# A tibble: 9 × 12
ID Value Int1 Int2 Dbl1 Dbl2 Date .Int1 .Int2 .Dbl1 .Dbl2 .Date
<dbl> <dbl> <int> <int> <dbl> <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 34 1 11 1.1 11.1 2001-01-01 34 34 34 34 34
2 1 54 2 12 2.2 12.2 2002-02-02 34 34 34 34 34
3 1 17 3 13 3.3 13.3 2003-03-03 34 34 34 34 34
4 2 2 1 11 1.1 11.1 2001-01-01 2 2 2 2 2
5 2 25 2 12 2.2 12.2 2002-02-02 2 2 2 2 2
6 2 89 3 13 3.3 13.3 2003-03-03 2 2 2 2 2
7 3 23 1 11 1.1 11.1 2001-01-01 23 23 23 23 23
8 3 93 2 12 2.2 12.2 2002-02-02 23 23 23 23 23
9 3 90 3 13 3.3 13.3 2003-03-03 23 23 23 23 23
使用across
df %>%
group_by(ID) %>%
mutate(across(-Value, ~ Value[which.min(.x)],
.names = ".{.col}")) %>%
ungroup
与产出
# A tibble: 9 × 12
ID Value Int1 Int2 Dbl1 Dbl2 Date .Int1 .Int2 .Dbl1 .Dbl2 .Date
<dbl> <dbl> <int> <int> <dbl> <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 34 1 11 1.1 11.1 2001-01-01 34 34 34 34 34
2 1 54 2 12 2.2 12.2 2002-02-02 34 34 34 34 34
3 1 17 3 13 3.3 13.3 2003-03-03 34 34 34 34 34
4 2 2 1 11 1.1 11.1 2001-01-01 2 2 2 2 2
5 2 25 2 12 2.2 12.2 2002-02-02 2 2 2 2 2
6 2 89 3 13 3.3 13.3 2003-03-03 2 2 2 2 2
7 3 23 1 11 1.1 11.1 2001-01-01 23 23 23 23 23
8 3 93 2 12 2.2 12.2 2002-02-02 23 23 23 23 23
9 3 90 3 13 3.3 13.3 2003-03-03 23 23 23 23 23