r语言 - 对2组列使用pivot_longer



这是我的代码

first_df<-tibble(y_1 = seq(0,1*3.14, length.out = 1000),
y_2 = seq(0,2*3.14, length.out = 1000),
y_3 = seq(0,3*3.14, length.out = 1000),
y_4 = seq(0,.2*3.14, length.out = 1000),
y_5 = seq(0,1*3.14, length.out = 1000),
flower_1 = sin(y_1)-2.5,
flower_2 = cos(y_2),
flower_3 = sin(y_3)+2.5,
flower_4 = cos(y_4)+5,
flower_5 = sin(y_5)+7)

我想做一个pivot_longer,输出为4列:x, y, values_flowers和values_y

输出应该像这样:

flowers    y     value...2 value...4
<chr>    <chr>     <dbl>     <dbl>
1 flower_1 y_1       -2.5    0      
2 flower_1 y_1       -2.50   0.00314
3 flower_1 y_1       -2.49   0.00629
4 flower_1 y_1       -2.49   0.00943
... ...     ...        ...     ... 

另一个解决方案:

library(tidyverse)
first_df %>% 
pivot_longer(everything(),
names_to = c(".value","flowers"),
names_pattern = "([a-z]+_)(\d)") %>%
transmute(y=paste0("y_",flowers), flowers = paste0("flower_",flowers),
value_flower=flower_, value_y=y_) %>% 
arrange(y,flowers)
#> # A tibble: 5,000 × 4
#>    y     flowers  value_flower value_y
#>    <chr> <chr>           <dbl>   <dbl>
#>  1 y_1   flower_1        -2.5  0      
#>  2 y_1   flower_1        -2.50 0.00314
#>  3 y_1   flower_1        -2.49 0.00629
#>  4 y_1   flower_1        -2.49 0.00943
#>  5 y_1   flower_1        -2.49 0.0126 
#>  6 y_1   flower_1        -2.48 0.0157 
#>  7 y_1   flower_1        -2.48 0.0189 
#>  8 y_1   flower_1        -2.48 0.0220 
#>  9 y_1   flower_1        -2.47 0.0251 
#> 10 y_1   flower_1        -2.47 0.0283 
#> # … with 4,990 more rows

这里有一个hack的方法:

first_df %>% 
mutate(temp = row_number()) %>% 
pivot_longer(-temp) %>% 
group_by(temp) %>% 
mutate(flowers = ifelse(substr(name, 1, 6) == "flower", name, NA_character_),
y = rep(name[1:5],2),
value2 = value,
value4 = rep(value[1:5],2)) %>% 
drop_na() %>% 
ungroup() %>% 
select(flowers, y, value2, value4) %>% 
arrange(flowers, y)

这给了我们:

# A tibble: 5,000 x 4
flowers  y     value2  value4
<chr>    <chr>  <dbl>   <dbl>
1 flower_1 y_1    -2.5  0      
2 flower_1 y_1    -2.50 0.00314
3 flower_1 y_1    -2.49 0.00629
4 flower_1 y_1    -2.49 0.00943
5 flower_1 y_1    -2.49 0.0126 
6 flower_1 y_1    -2.48 0.0157 
7 flower_1 y_1    -2.48 0.0189 
8 flower_1 y_1    -2.48 0.0220 
9 flower_1 y_1    -2.47 0.0251 
10 flower_1 y_1    -2.47 0.0283 
# ... with 4,990 more rows

这是另一个解决方案:

  1. 仅使用y列分割为df
  2. 将其以长格式带入,并在安排
  3. 后分配给数据帧a
  4. 从df开始,只保留flower部分
  5. 以长格式提交
  6. bind cols from dfa
library(dplyr)
library(tidyr)
a <- first_df %>% 
select(1:5) %>% 
pivot_longer(
everything(),
names_to = "y", 
values_to = "value...4"
) %>% 
arrange(y)

first_df %>% 
select(-c(1:5)) %>% 
pivot_longer(
cols = everything(),
names_to = "flowers",
values_to = "value...2"
) %>% 
arrange(flowers) %>% 
bind_cols(a)
flowers  value...2 y     value...4
<chr>        <dbl> <chr>     <dbl>
1 flower_1     -2.5  y_1     0      
2 flower_1     -2.50 y_1     0.00314
3 flower_1     -2.49 y_1     0.00629
4 flower_1     -2.49 y_1     0.00943
5 flower_1     -2.49 y_1     0.0126 
6 flower_1     -2.48 y_1     0.0157 
7 flower_1     -2.48 y_1     0.0189 
8 flower_1     -2.48 y_1     0.0220 
9 flower_1     -2.47 y_1     0.0251 
10 flower_1     -2.47 y_1     0.0283 
# ... with 4,990 more rows

您可以将pivot_longerjoin函数结合使用:

library(dplyr)
library(tidyr)
temp_df <- first_df %>% 
mutate(rn = row_number()) %>% 
pivot_longer(-rn,
names_to = c("cat", "rn2"),
names_pattern = "(.*)_(.*)")
temp_df %>% 
filter(cat == "y") %>% 
left_join(temp_df %>% filter(cat != "y"),
by = c("rn", "rn2")) %>% 
mutate(y = paste0(cat.x, "_", rn2),
flowers = paste0(cat.y, "_", rn2)) %>% 
select(y, flowers, value_flower = value.y, value_y = value.x) %>% 
arrange(y, flowers)

这返回

# A tibble: 5,000 x 4
y     flowers  value_flower value_y
<chr> <chr>           <dbl>   <dbl>
1 y_1   flower_1        -2.5  0      
2 y_1   flower_1        -2.50 0.00314
3 y_1   flower_1        -2.49 0.00629
4 y_1   flower_1        -2.49 0.00943
5 y_1   flower_1        -2.49 0.0126 
6 y_1   flower_1        -2.48 0.0157 
7 y_1   flower_1        -2.48 0.0189 
8 y_1   flower_1        -2.48 0.0220 
9 y_1   flower_1        -2.47 0.0251 
10 y_1   flower_1        -2.47 0.0283 
# ... with 4,990 more rows

这不是一个快速的解决方案,但它可以在一个管道中完成:

first_df %>%
mutate(rn = row_number()) %>% 
pivot_longer(-rn) %>%
mutate(rn2 = gsub(".*_", "", name)) %>% 
group_by(rn) %>% 
group_map(~.x %>% 
filter(grepl("y_", name)) %>% 
left_join(.x %>% filter(!grepl("y_", name)), 
by = "rn2")) %>% 
bind_rows() %>% 
select(y = name.x, flowers = name.y, value_y = value.x, value_flower = value.y)

相关内容

  • 没有找到相关文章

最新更新