r语言 - 如何计算两个标题之间的差异?



如果我有这两个数据:

l <- list(list(var = "bb",b =  "mod1", c = 3, d = 5), list(var = "hh", b = "mod2", c = 2,  d = 4), list(var = "bb", b = "mod2", c = 2,  d = 4),list(var = "bb", b = "mod", c = 2,  d = 4))
l2 <- map(l, ~ data.frame(.))
l3 <- map_dfr(l2, ~ mutate_all(., as.character))  
l4 <- as_tibble(l3)
#   var   b     c     d    
#   <chr> <chr> <chr> <chr>
# 1 bb    mod1  3     5    
# 2 hh    mod2  2     4    
# 3 bb    mod2  2     4    
# 4 bb    mod   2     4  

l <- list(list(var = "bb",b =  "mod", c = 3, d = 5), list(var = "a", b = "mod2", c  = 2,  d = 4), list(var = "hh", b = "mod2", c = 2,  d = 4))
l2 <- map(l, ~ data.frame(.))
l3 <- map_dfr(l2, ~ mutate_all(., as.character))  
l5 <- as_tibble(l3)
#   var   b     c     d    
#   <chr> <chr> <chr> <chr>
# 1 bb    mod   3     5    
# 2 a     mod2  2     4    
# 3 hh    mod2  2     4 

我想计算l4l5的差值对于那些在列var和b中有相似输入的列c的值

所需输出

var b   c
bb mod -1    
hh mod2 0

您可以先按列varb对两个标题进行inner_join,然后计算它们的差值。只保留使用select的相关列。

inner_join中,您可以通过suffix = c("_l4", "_l5")指定后缀,以清楚地显示值的来源。

library(tidyverse)
inner_join(l4, l5, by = c("var", "b"), suffix = c("_l4", "_l5")) %>% 
mutate(c = as.numeric(c_l4) - as.numeric(c_l5)) %>% 
select(var, b, c)
# A tibble: 2 × 3
var   b         c
<chr> <chr> <dbl>
1 hh    mod2      0
2 bb    mod      -1

数据
l4 <- structure(list(var = c("bb", "hh", "bb", "bb"), b = c("mod1", 
"mod2", "mod2", "mod"), c = c("3", "2", "2", "2"), d = c("5", 
"4", "4", "4")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-4L))
l5 <- structure(list(var = c("bb", "a", "hh"), b = c("mod", "mod2", 
"mod2"), c = c("3", "2", "2"), d = c("5", "4", "4")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -3L))

可以先行绑定,然后按组汇总:

library(dplyr)
bind_rows(l5, l4) %>%
group_by(var, b) %>%
filter(n() == 2) %>%
summarise(c = diff(as.numeric(c))) %>%
ungroup()
# # A tibble: 2 × 3
#   var   b         c
#   <chr> <chr> <dbl>
# 1 bb    mod      -1
# 2 hh    mod2      0

相关内容

  • 没有找到相关文章

最新更新