按R中的秩重新排序数据帧



我有以下数据帧(tibble(:

# A tibble: 5 × 11
ID     V1     V2    V3     V4     V5    R1    R2    R3      R4    R5
<chr> <chr>   <chr>  <chr>  <chr>  <chr> <chr> <chr> <chr>  <chr>  <chr>
1      A    X1      X2    X3     X4     X5     1     2     3      4     5
2      B    X6      X7    X8     X9     X10    5     4     3      2     1
3      C    X11     X12   X13    X14    X15    2     1     4      3     5
4      D    X16     X17   X18    X19    X20    1     2     3      4     5
5      E    X21     X22   X23    X24    X25    5     4     3      2     1

现在,我想对此进行变换,以便V1-V5中的值的位置根据它们在R1-R2中的相应秩进行重新排列,如下所示:

# A tibble: 5 × 6
ID     V1     V2    V3     V4     V5   
<chr> <chr>   <chr>  <chr>  <chr>  <chr> 
1      A    X1      X2    X3     X4     X5   
2      B    X10     X9   X8      X7     X6 
3      C    X12     X11   X14    X13    X15   
4      D    X16     X17   X18    X19    X20  
5      E    X25     X24   X23    X22    X21 
`

我们可以重塑为"长"格式,然后重新排序并重塑为"宽">

library(dplyr)
library(tidyr)
df1 %>% 
pivot_longer(cols = -ID, names_to = c(".value", "grp"), 
names_pattern = "(\D+)(\d+)") %>% 
group_by(ID) %>% 
mutate(V = replace(V, !is.na(R), V[order(R[!is.na(R)])])) %>%
ungroup %>%
select(-R) %>%   
pivot_wider(names_from = grp, values_from = V, names_prefix = "V")

-输出

# A tibble: 5 × 6
ID    V1    V2    V3    V4    V5   
<chr> <chr> <chr> <chr> <chr> <chr>
1 A     X1    X2    X3    X4    X5   
2 B     X10   X9    X8    X7    X6   
3 C     X12   X11   X14   X13   X15  
4 D     X16   X17   X18   X19   X20  
5 E     X25   X24   X23   X22   X21  

如果要删除"R"中的"0"或NA元素

df1 %>% 
pivot_longer(cols = -ID, names_to = c(".value", "grp"), 
names_pattern = "(\D+)(\d+)") %>%
filter(complete.cases(R), R != 0) %>%
group_by(ID) %>%
mutate(V = V[order(R)]) %>%
ungroup %>%
select(-R) %>%
pivot_wider(names_from = grp, values_from = V, names_prefix = "V")

或使用base R

cbind(df1[1], t(apply(df1[-1], 1, (x) x[1:5][as.numeric(x[6:10])])))

数据

df1 <- structure(list(ID = c("A", "B", "C", "D", "E"), V1 = c("X1", 
"X6", "X11", "X16", "X21"), V2 = c("X2", "X7", "X12", "X17", 
"X22"), V3 = c("X3", "X8", "X13", "X18", "X23"), V4 = c("X4", 
"X9", "X14", "X19", "X24"), V5 = c("X5", "X10", "X15", "X20", 
"X25"), R1 = c(1L, 5L, 2L, 1L, 5L), R2 = c(2L, 4L, 1L, 2L, 4L
), R3 = c(3L, 3L, 4L, 3L, 3L), R4 = c(4L, 2L, 3L, 4L, 2L), R5 = c(5L, 
1L, 5L, 5L, 1L)), class = "data.frame", row.names = c("1", "2", 
"3", "4", "5"))

这里是使用purrr的另一种方法。

library(dplyr)
library(purrr)
ids <- df1$ID
df1_v <- select(df1, V1:V5)
df1_r <- select(df1, R1:R5)
map2_dfr(set_names(transpose(df1_v), ids), transpose(df1_r),
~ set_names(unlist(.x[unlist(.y)]), names(df1_v)),
.id = "ID")
# # A tibble: 5 x 6
#   ID    V1    V2    V3    V4    V5   
#   <chr> <chr> <chr> <chr> <chr> <chr>
# 1 A     X1    X2    X3    X4    X5   
# 2 B     X10   X9    X8    X7    X6   
# 3 C     X12   X11   X14   X13   X15  
# 4 D     X16   X17   X18   X19   X20  
# 5 E     X25   X24   X23   X22   X21 

这里是使用嵌套数据帧的另一种方法,这可能非常有用。

library(dplyr)
library(purrr)
library(tidyr)
df1 %>% 
nest(V = starts_with("V"),
R = starts_with("R")) %>% 
mutate(V = map2(V, R, ~ set_names((.x)[unlist(.y)], names(.x)))) %>% 
select(-R) %>% 
unnest(V)

最新更新