我有以下数据帧(tibble(:
# A tibble: 5 × 11
ID V1 V2 V3 V4 V5 R1 R2 R3 R4 R5
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 A X1 X2 X3 X4 X5 1 2 3 4 5
2 B X6 X7 X8 X9 X10 5 4 3 2 1
3 C X11 X12 X13 X14 X15 2 1 4 3 5
4 D X16 X17 X18 X19 X20 1 2 3 4 5
5 E X21 X22 X23 X24 X25 5 4 3 2 1
现在,我想对此进行变换,以便V1-V5中的值的位置根据它们在R1-R2中的相应秩进行重新排列,如下所示:
# A tibble: 5 × 6
ID V1 V2 V3 V4 V5
<chr> <chr> <chr> <chr> <chr> <chr>
1 A X1 X2 X3 X4 X5
2 B X10 X9 X8 X7 X6
3 C X12 X11 X14 X13 X15
4 D X16 X17 X18 X19 X20
5 E X25 X24 X23 X22 X21
`
我们可以重塑为"长"格式,然后重新排序并重塑为"宽">
library(dplyr)
library(tidyr)
df1 %>%
pivot_longer(cols = -ID, names_to = c(".value", "grp"),
names_pattern = "(\D+)(\d+)") %>%
group_by(ID) %>%
mutate(V = replace(V, !is.na(R), V[order(R[!is.na(R)])])) %>%
ungroup %>%
select(-R) %>%
pivot_wider(names_from = grp, values_from = V, names_prefix = "V")
-输出
# A tibble: 5 × 6
ID V1 V2 V3 V4 V5
<chr> <chr> <chr> <chr> <chr> <chr>
1 A X1 X2 X3 X4 X5
2 B X10 X9 X8 X7 X6
3 C X12 X11 X14 X13 X15
4 D X16 X17 X18 X19 X20
5 E X25 X24 X23 X22 X21
如果要删除"R"中的"0"或NA
元素
df1 %>%
pivot_longer(cols = -ID, names_to = c(".value", "grp"),
names_pattern = "(\D+)(\d+)") %>%
filter(complete.cases(R), R != 0) %>%
group_by(ID) %>%
mutate(V = V[order(R)]) %>%
ungroup %>%
select(-R) %>%
pivot_wider(names_from = grp, values_from = V, names_prefix = "V")
或使用base R
cbind(df1[1], t(apply(df1[-1], 1, (x) x[1:5][as.numeric(x[6:10])])))
数据
df1 <- structure(list(ID = c("A", "B", "C", "D", "E"), V1 = c("X1",
"X6", "X11", "X16", "X21"), V2 = c("X2", "X7", "X12", "X17",
"X22"), V3 = c("X3", "X8", "X13", "X18", "X23"), V4 = c("X4",
"X9", "X14", "X19", "X24"), V5 = c("X5", "X10", "X15", "X20",
"X25"), R1 = c(1L, 5L, 2L, 1L, 5L), R2 = c(2L, 4L, 1L, 2L, 4L
), R3 = c(3L, 3L, 4L, 3L, 3L), R4 = c(4L, 2L, 3L, 4L, 2L), R5 = c(5L,
1L, 5L, 5L, 1L)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5"))
这里是使用purrr
的另一种方法。
library(dplyr)
library(purrr)
ids <- df1$ID
df1_v <- select(df1, V1:V5)
df1_r <- select(df1, R1:R5)
map2_dfr(set_names(transpose(df1_v), ids), transpose(df1_r),
~ set_names(unlist(.x[unlist(.y)]), names(df1_v)),
.id = "ID")
# # A tibble: 5 x 6
# ID V1 V2 V3 V4 V5
# <chr> <chr> <chr> <chr> <chr> <chr>
# 1 A X1 X2 X3 X4 X5
# 2 B X10 X9 X8 X7 X6
# 3 C X12 X11 X14 X13 X15
# 4 D X16 X17 X18 X19 X20
# 5 E X25 X24 X23 X22 X21
这里是使用嵌套数据帧的另一种方法,这可能非常有用。
library(dplyr)
library(purrr)
library(tidyr)
df1 %>%
nest(V = starts_with("V"),
R = starts_with("R")) %>%
mutate(V = map2(V, R, ~ set_names((.x)[unlist(.y)], names(.x)))) %>%
select(-R) %>%
unnest(V)