我有df1和df2,我想检查一下df1和df2是否都包含在df3$lbtest中。检查并标记包含的和缺失的是一个好方法吗?
df1<- tructure(list(CHMTEST = c("Sodium", "Potassium", "Chloride",
"Bicarbonate (CO2)", "Calcium", "Glucose")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
df2<-structure(list(COATEST = c("PT", "aPTT")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))
df3<- structure(list(source = c("LBCH", "LBCH", "LBCH", "LBCH", "LBCO",
"LBHE", "LBUA"), lbcat = c("Chemistry", "Chemistry", "Chemistry",
"Chemistry", "Coagulation", "Hematology", "Urinalysis"), lbtest = c("Bicarbonate (CO2)",
"Glucose", "Potassium", "Sodium", "PT", "WBC", "Glucose")), row.names = c(NA,
-7L), class = c("tbl_df", "tbl", "data.frame"))
将从'df1', 'df2'数据集中提取的列连接到一个向量中,使用%in%
检查元素是否存在于'df3$lbtest'列中,并使用all
包装
all(c(df1$CHMTEST, df2$COATEST) %in% df3$lbtest)
[1] FALSE
返回FALSE
,因为某些元素仍然不存在,即
c(df1$CHMTEST, df2$COATEST) %in% df3$lbtest
[1] TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE
如果要返回存在而不存在的元素,则创建单个向量,生成逻辑索引并将其用于
子集v1 <- c(df1$CHMTEST, df2$COATEST)
i1 <- v1 %in% df3$lbtest
v1[i1] # present
[1] "Sodium" "Potassium" "Bicarbonate (CO2)" "Glucose" "PT"
v1[!i1] # not present
[1] "Chloride" "Calcium" "aPTT"
如果我们想从'df3'检查这一点,使用subset
或dplyr::filter
library(dplyr)
df3 %>%
filter(lbtest %in% c(df1$CHMTEST, df2$COATEST))
# A tibble: 6 x 3
source lbcat lbtest
<chr> <chr> <chr>
1 LBCH Chemistry Bicarbonate (CO2)
2 LBCH Chemistry Glucose
3 LBCH Chemistry Potassium
4 LBCH Chemistry Sodium
5 LBCO Coagulation PT
6 LBUA Urinalysis Glucose
df3 %>%
filter(!lbtest %in% c(df1$CHMTEST, df2$COATEST))
# A tibble: 1 x 3
source lbcat lbtest
<chr> <chr> <chr>
1 LBHE Hematology WBC
或者另一种选择是使列名相同,将'df1', 'df2'绑定到单个数据集,并使用inner_join/anti_join
library(purrr)
> list(df1, df2) %>%
map_dfr(~ .x %>%
rename_with(~ 'lbtest', 1)) %>%
inner_join(df3, .)
Joining, by = "lbtest"
# A tibble: 6 x 3
source lbcat lbtest
<chr> <chr> <chr>
1 LBCH Chemistry Bicarbonate (CO2)
2 LBCH Chemistry Glucose
3 LBCH Chemistry Potassium
4 LBCH Chemistry Sodium
5 LBCO Coagulation PT
6 LBUA Urinalysis Glucose
> list(df1, df2) %>%
map_dfr(~ .x %>%
rename_with(~ 'lbtest', 1)) %>%
anti_join(df3, .)
Joining, by = "lbtest"
# A tibble: 1 x 3
source lbcat lbtest
<chr> <chr> <chr>
1 LBHE Hematology WBC