如何使用pivot-wide按组进行汇总

  • 本文关键字:何使用 pivot-wide r
  • 更新时间 :
  • 英文 :

df <- data.frame( plant1= c('1','0','1','0','0','1','0','0','1'),
plant2= c('0','1','0','1','1','0','1','1','0'),
Public.1= c('1','0','1','0','0','0','0','0','1'),
Private.1= c('0','0','0','0','0','1','0','0','0'),
Public.2= c('0','0','0','1','1','0','0','1','0'),
Private.2= c('0','1','0','0','0','0','1','0','0'))
df

我如何使用pivot-wider来根据Plant1和Plant2总结公私营关系?逻辑背后:每个Plant元素只能注册Public或Private工厂1和公用.1专用.1相关

预期输出:

plant1 plant2 Public Private 
1      1      0      1       0
2      0      1      0       1      
3      1      0      1       0
4      0      1      1       0
5      0      1      1       0
6      1      0      0       1
7      0      1      0       1
8      0      1      1       0 
9      1      0      1       0

我们可以使用names_sep

library(tidyr)
library(dplyr)
pivot_longer(df, 
cols = matches('Public|Private'), 
names_to = c(".value", 'grp'), names_sep ="\.") %>%
select(-grp)

-输出

# A tibble: 18 x 4
plant1 plant2 Public Private
<chr>  <chr>  <chr>  <chr>  
1 1      0      1      0      
2 1      0      0      0      
3 0      1      0      0      
4 0      1      0      1      
5 1      0      1      0      
6 1      0      0      0      
7 0      1      0      0      
8 0      1      1      0      
9 0      1      0      0      
10 0      1      1      0      
11 1      0      0      1      
12 1      0      0      0      
13 0      1      0      0      
14 0      1      0      1      
15 0      1      0      0      
16 0      1      1      0      
17 1      0      1      0      
18 1      0      0      0   

您可以使用pivot_longer:

tidyr::pivot_longer(df, 
cols = -starts_with('plant'), 
names_to = '.value', 
names_pattern = '(.*)\.')
#  plant1 plant2 Public Private
#   <chr>  <chr>  <chr>  <chr>  
# 1 1      0      1      0      
# 2 1      0      0      0      
# 3 0      1      0      0      
# 4 0      1      0      1      
# 5 1      0      1      0      
# 6 1      0      0      0      
# 7 0      1      0      0      
# 8 0      1      1      0      
# 9 0      1      0      0   
#...
#...   

这给出了您的预期输出:

第一个获取整数的type.convert然后是CCD_ 4 CCD_。

library(dplyr)
df %>% 
type.convert(as.is=TRUE) %>% 
rowwise() %>% 
mutate(Public = sum(c(Public.1, Public.2)),
Private= sum(c(Private.1, Private.2))) %>% 
select(plant1, plant2, Public, Private)

输出:

plant1 plant2 Public Private
<int>  <int>  <int>   <int>
1      1      0      1       0
2      0      1      0       1
3      1      0      1       0
4      0      1      1       0
5      0      1      1       0
6      1      0      0       1
7      0      1      0       1
8      0      1      1       0
9      1      0      1       0

最新更新