如果我有这两个数据:
l <- list(list(var = "bb",b = "mod1", c = 3, d = 5), list(var = "hh", b = "mod2", c = 2, d = 4), list(var = "bb", b = "mod2", c = 2, d = 4),list(var = "bb", b = "mod", c = 2, d = 4))
l2 <- map(l, ~ data.frame(.))
l3 <- map_dfr(l2, ~ mutate_all(., as.character))
l4 <- as_tibble(l3)
# var b c d
# <chr> <chr> <chr> <chr>
# 1 bb mod1 3 5
# 2 hh mod2 2 4
# 3 bb mod2 2 4
# 4 bb mod 2 4
和
l <- list(list(var = "bb",b = "mod", c = 3, d = 5), list(var = "a", b = "mod2", c = 2, d = 4), list(var = "hh", b = "mod2", c = 2, d = 4))
l2 <- map(l, ~ data.frame(.))
l3 <- map_dfr(l2, ~ mutate_all(., as.character))
l5 <- as_tibble(l3)
# var b c d
# <chr> <chr> <chr> <chr>
# 1 bb mod 3 5
# 2 a mod2 2 4
# 3 hh mod2 2 4
我想计算l4
和l5
的差值对于那些在列var和b中有相似输入的列c的值
所需输出
var b c
bb mod -1
hh mod2 0
您可以先按列var
和b
对两个标题进行inner_join
,然后计算它们的差值。只保留使用select
的相关列。
在inner_join
中,您可以通过suffix = c("_l4", "_l5")
指定后缀,以清楚地显示值的来源。
library(tidyverse)
inner_join(l4, l5, by = c("var", "b"), suffix = c("_l4", "_l5")) %>%
mutate(c = as.numeric(c_l4) - as.numeric(c_l5)) %>%
select(var, b, c)
# A tibble: 2 × 3
var b c
<chr> <chr> <dbl>
1 hh mod2 0
2 bb mod -1
数据l4 <- structure(list(var = c("bb", "hh", "bb", "bb"), b = c("mod1",
"mod2", "mod2", "mod"), c = c("3", "2", "2", "2"), d = c("5",
"4", "4", "4")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-4L))
l5 <- structure(list(var = c("bb", "a", "hh"), b = c("mod", "mod2",
"mod2"), c = c("3", "2", "2"), d = c("5", "4", "4")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -3L))
可以先行绑定,然后按组汇总:
library(dplyr)
bind_rows(l5, l4) %>%
group_by(var, b) %>%
filter(n() == 2) %>%
summarise(c = diff(as.numeric(c))) %>%
ungroup()
# # A tibble: 2 × 3
# var b c
# <chr> <chr> <dbl>
# 1 bb mod -1
# 2 hh mod2 0