假设A
和B
是两个dataframes
,最快的方法是什么:
查找A>0
and
B<0
?即CCD_ 7大于零,条件是CCD_。
两个dataframes
具有相等的尺寸,非常大(>70GB
(,并且对A
中的column1
和B
中的coulmn1
等进行运算
A=c(-31.621, -40.149, -21.519, -33.773, -34.023, -38.345, -39.238,
-30.221, -22.278, 27.055)
B=c(-29.321, -26.398, -25.663, -26.423, -23.721, -17.418, -15.979,
-21.927, -16.398, -21.084)
这就是您想要的吗?
选项1-加入然后处理条件
library(tibble)
library(dplyr)
A <- c(-31.621, -40.149, -21.519, -33.773, -34.023, -38.345, -39.238,
-30.221, -22.278, 27.055) %>%
enframe(name = "ID")
B <- c(-29.321, -26.398, -25.663, -26.423, -23.721, -17.418, -15.979,
-21.927, -16.398, -21.084) %>%
enframe(name = "ID")
A %>%
left_join(., B, by = "ID", suffix = c("_A", "_B")) %>%
mutate(condition = case_when(value_A > 0 & value_B < 0 ~ T,
TRUE ~ F))
#> # A tibble: 10 x 4
#> ID value_A value_B condition
#> <int> <dbl> <dbl> <lgl>
#> 1 1 -31.6 -29.3 FALSE
#> 2 2 -40.1 -26.4 FALSE
#> 3 3 -21.5 -25.7 FALSE
#> 4 4 -33.8 -26.4 FALSE
#> 5 5 -34.0 -23.7 FALSE
#> 6 6 -38.3 -17.4 FALSE
#> 7 7 -39.2 -16.0 FALSE
#> 8 8 -30.2 -21.9 FALSE
#> 9 9 -22.3 -16.4 FALSE
#> 10 10 27.1 -21.1 TRUE
选项2-处理条件,然后加入
library(tibble)
library(dplyr)
A <- c(-31.621, -40.149, -21.519, -33.773, -34.023, -38.345, -39.238,
-30.221, -22.278, 27.055) %>%
enframe(name = "ID") %>%
filter(value > 0)
B <- c(-29.321, -26.398, -25.663, -26.423, -23.721, -17.418, -15.979,
-21.927, -16.398, -21.084) %>%
enframe(name = "ID") %>%
filter(value < 0)
A %>%
left_join(., B, by = "ID", suffix = c("_A", "_B"))
#> # A tibble: 1 x 3
#> ID value_A value_B
#> <int> <dbl> <dbl>
#> 1 10 27.1 -21.1
选项3-处理条件并查找与条件匹配的索引
library(tibble)
library(dplyr)
A <- c(-31.621, -40.149, -21.519, -33.773, -34.023, -38.345, -39.238,
-30.221, -22.278, 27.055) %>%
enframe(name = "ID") %>%
filter(value > 0)
B <- c(-29.321, -26.398, -25.663, -26.423, -23.721, -17.418, -15.979,
-21.927, -16.398, -21.084) %>%
enframe(name = "ID") %>%
filter(value < 0)
intersect(A$ID, B$ID)
#> [1] 10
由reprex包(v0.3.0(于2020-02-25创建