R:以某种方式标识两个数据帧中的值



我有两个看起来几乎相同的数据框,我想以某种方式识别Data_1中不在Data_2中的值。我有两个大数据,如下所示:

Dataframe 1:

Animal<-c("bird","Blue Catfish","Cat","Buffalo","Lion","Monkey","Horse", "Butterfly", "Ant", "elephant","Snake",
"Chameloen","Cow")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3","S1","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444,222,111)
Data_1<-data.frame(Animal,season, ROOM)
> Data_1
Animal season ROOM
1          bird     S1  111
2  Blue Catfish     S1  222
3           Cat     S2  444
4       Buffalo     S2  222
5          Lion     S3  111
6        Monkey     S4  444
7         Horse     S4  222
8     Butterfly    S15  111
9           Ant     S3  222
10     elephant     S2  111
11        Snake     S3  444
12    Chameloen     S1  222
13          Cow     S3  111

Dataframe 2:

Animal<-c("bird","Mouse","Cat","Zebra","Lion","Monkey","Horse", "Leopard", "Ant", "elephant","Bison")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444)

Data_2<-data.frame(Animal,season, ROOM)
> Data_2
Animal season ROOM
1      bird     S1  111
2     Mouse     S1  222
3       Cat     S2  444
4     Zebra     S2  222
5      Lion     S3  111
6    Monkey     S4  444
7     Horse     S4  222
8   Leopard    S15  111
9       Ant     S3  222
10 elephant     S2  111
11    Bison     S3  444

我想比较两个数据框,并确定Data_1中不是Data_2的动物名称。这应该是确定公关季节的公关室。例如,两个数据框中的S2季节222房间不匹配,这里它应该返回动物的名称。有什么建议吗?

我们可以用anti_join

library(dplyr)
anti_join(Data_1, Data_2, by = c("Animal", "season"))
Animal season ROOM
1 Blue Catfish     S1  222
2      Buffalo     S2  222
3    Butterfly    S15  111
4        Snake     S3  444
5    Chameloen     S1  222
6          Cow     S3  111

您也可以使用left_join()来检查。

Animal<-c("bird","Blue Catfish","Cat","Buffalo","Lion","Monkey","Horse", "Butterfly", "Ant", "elephant","Snake",
"Chameloen","Cow")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3","S1","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444,222,111)
Data_1<-data.frame(Animal,season, ROOM)
Animal<-c("bird","Mouse","Cat","Zebra","Lion","Monkey","Horse", "Leopard", "Ant", "elephant","Bison")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444)
Data_2<-data.frame(Animal,season, ROOM)
Data_1 %>% 
left_join(Data_2,by = c('season','ROOM'),suffix=c('_1','_2')) %>% 
filter(Animal_1!=Animal_2)

输出
Animal_1 season ROOM Animal_2
1 Blue Catfish     S1  222    Mouse
2      Buffalo     S2  222    Zebra
3    Butterfly    S15  111  Leopard
4        Snake     S3  444    Bison
5    Chameloen     S1  222    Mouse
6          Cow     S3  111     Lion

Usingdata.table:

library(data.table)
setkey(setDT(Data_1), season, ROOM, Animal)
setkey(setDT(Data_2), season, ROOM, Animal)
Data_1[!Data_2]
##          Animal season ROOM
## 1: Blue Catfish     S1  222
## 2:    Chameloen     S1  222
## 3:    Butterfly    S15  111
## 4:      Buffalo     S2  222
## 5:          Cow     S3  111
## 6:        Snake     S3  444

最新更新