将表与R中另一个表的引用结合起来



我有一个如下的df:

> df
US  Canada Japan
food1 "1" "5"    "7"  
food2 "1" "9"    "10" 
food3 "0" "6"    "5"  

以及其他3个充当翻译器的dfs:

> US
Name   Corr. Name
1 food1    Pink Lady
2 food2 Blood Orange
3 food3   Asian Pear
> Canada
Name   Corr. Name
1 food1   Honeycrisp
2 food2 Navel Orange
3 food3 Callery Pear
> Japan
Name   Corr. Name
1 food1         Gala
2 food2 Navel Orange
3 food3   Bosc Pears

我想根据其他3个dfs将第一个df翻译为以下内容:

> df
US  Canada Japan
Pink Lady      1    NA     NA  
Honeycrisp     NA   5      NA  
Gala           NA   NA     7  
Blood Orange   1    NA     NA
Navel Orange   NA   9      10 
Asian Pear     0    NA     NA
Callery Pear   NA   6      NA 
Bosc Pears     NA   NA     5  

以下是可以重现表格的代码:

df <- structure(c("1", "1", "0", "5", "9", "6", "7", "10", "5"), .Dim = c(3L, 
3L), .Dimnames = list(c("food1", "food2", "food3"), c("US", "Canada", 
"Japan")))
US <- structure(list(Name = structure(1:3, .Label = c("food1", "food2", 
"food3"), class = "factor"), Corr..Name = structure(3:1, .Label = c("Asian Pear", 
"Blood Orange", "Pink Lady"), class = "factor")), class = "data.frame", row.names = c(NA, 
3L))
Canada <- structure(list(Name = structure(1:3, .Label = c("food1", "food2", 
"food3"), class = "factor"), Corr..Name = structure(4:6, .Label = c("Asian Pear", 
"Blood Orange", "Pink Lady", "Honeycrisp", "Navel Orange", "Callery Pear"
), class = "factor")), class = "data.frame", row.names = c(NA, 
3L))
Japan <- structure(list(Name = structure(1:3, .Label = c("food1", "food2", 
"food3"), class = "factor"), Corr..Name = structure(c(4L, 7L, 
6L), .Label = c("Asian Pear", "Blood Orange", "Pink Lady", "Gala", 
"Mandarin Orange", "Bosc Pears", "Navel Orange"), class = "factor")), class = "data.frame", row.names = c(NA, 
3L))

编辑以获取更多问题:如果我重复了那个国家呢?示例:

> df
col1   col2  col3 col4
country   US Canada Japan   US
food1      1      5     7    3
food2      1      9    10    2
food3      0      6     5    4

再现代码

> dput(df)
structure(list(col1 = structure(c(3L, 2L, 2L, 1L), .Label = c("0", 
"1", "US"), class = "factor"), col2 = structure(c(4L, 1L, 3L, 
2L), .Label = c("5", "6", "9", "Canada"), class = "factor"), 
col3 = structure(c(4L, 3L, 1L, 2L), .Label = c("10", "5", 
"7", "Japan"), class = "factor"), col4 = c("US", "3", "2", 
"4")), row.names = c("country", "food1", "food2", "food3"
), class = "data.frame")

所需输出:

> df
country        US  Canada Japan US 
Pink Lady      1    NA     NA   3
Honeycrisp     NA   5      NA   NA  
Gala           NA   NA     7    NA 
Blood Orange   1    NA     NA   2
Navel Orange   NA   9      10   NA
Asian Pear     0    NA     NA   4
Callery Pear   NA   6      NA   NA
Bosc Pears     NA   NA     5    NA

如果有什么不清楚的地方,请告诉我

使用data.table:的选项

library(data.table)
ctries <- list(US=US, Canada=Canada, Japan=Japan)
rowsbind <- rbindlist(ctries, idcol="ctry")[
setDT(reshape2::melt(df)), on=.(Name=Var1, ctry=Var2), value := value]
ans <- dcast(rowsbind,  Name + Corr..Name ~ ctry, value.var="value")
setcolorder(ans, c("Name", "Corr..Name", names(ctries)))

输出:

Name   Corr..Name   US Canada Japan
1: food1    Pink Lady    1   <NA>  <NA>
2: food1   Honeycrisp <NA>      5  <NA>
3: food1         Gala <NA>   <NA>     7
4: food2 Blood Orange    1   <NA>  <NA>
5: food2 Navel Orange <NA>      9    10
6: food3   Asian Pear    0   <NA>  <NA>
7: food3 Callery Pear <NA>      6  <NA>
8: food3   Bosc Pears <NA>   <NA>     5

编辑新df:

library(data.table)
ctries <- list(US=US, Canada=Canada, Japan=Japan)
mDT <- melt(setDT(df[-1L], keep.rownames=TRUE), id.vars="rn")[, 
ctry := gsub("\.(.*)", "", variable)]
mDT[rbindlist(ctries, idcol="ctry"), on=.(rn=Name, ctry), Corr..Name := Corr..Name]
ans <- dcast(mDT,  rn + Corr..Name ~ variable, value.var="value")

输出:

rn   Corr..Name   US Canada Japan US.1
1: food1    Pink Lady    1   <NA>  <NA>    3
2: food1   Honeycrisp <NA>      5  <NA> <NA>
3: food1         Gala <NA>   <NA>     7 <NA>
4: food2 Blood Orange    1   <NA>  <NA>    2
5: food2 Navel Orange <NA>      9    10 <NA>
6: food3   Asian Pear    0   <NA>  <NA>    4
7: food3 Callery Pear <NA>      6  <NA> <NA>
8: food3   Bosc Pears <NA>   <NA>     5 <NA>

这里有一个有趣的解决方案:

library(tidyverse)
df_long <- df %>%
as_tibble(rownames = "Name") %>%
pivot_longer(-Name, names_to = "country", values_to = "score")
list(US = US, Canada = Canada, Japan = Japan) %>%
bind_rows(.id = "country") %>%
left_join(df_long, by = c("country", "Name")) %>%
arrange(Name) %>%
pivot_wider(names_from = country, values_from = score) %>%
select(-Name)
#> # A tibble: 8 x 4
#>   Corr..Name   US    Canada Japan
#>   <fct>        <chr> <chr>  <chr>
#> 1 Pink Lady    1     <NA>   <NA> 
#> 2 Honeycrisp   <NA>  5      <NA> 
#> 3 Gala         <NA>  <NA>   7    
#> 4 Blood Orange 1     <NA>   <NA> 
#> 5 Navel Orange <NA>  9      10   
#> 6 Asian Pear   0     <NA>   <NA> 
#> 7 Callery Pear <NA>  6      <NA> 
#> 8 Bosc Pears   <NA>  <NA>   5

由reprex包(v0.3.0(于2020-07-07创建

这里有另一种只使用基本R的方法。

# Make a list of dataframes for each country and name the elements respectively
x <- list(US, Canada, Japan)
names(x) <- c("US", "Canada", "Japan")
# Merge "df" to each dataframe independently using lapply
x <- lapply(1:length(x), function(i){ # length of x is 3
# which country?
country <- names(x)[i] 
# merge Corr.Name column to respective food counts - assumes row order is the same
df2 <- cbind(x[[i]]$Corr..Name, data.frame(df[,country])) 
# rename the column names
colnames(df2) <- c("Corr..Name", country)
df2 
}) 
# Convert list of dataframes to one dataframes, merging by "Corr.Name"
x <- Reduce(function(df1, df2) merge(df1, df2, by = "Corr..Name", all = TRUE), x)
x # Result
#  Corr..Name   US Canada Japan
# 1   Asian Pear    0   <NA>  <NA>
# 2 Blood Orange    1   <NA>  <NA>
# 3    Pink Lady    1   <NA>  <NA>
# 4   Honeycrisp <NA>      5  <NA>
# 5 Navel Orange <NA>      9    10
# 6 Callery Pear <NA>      6  <NA>
# 7         Gala <NA>   <NA>     7
# 8   Bosc Pears <NA>   <NA>     5

一个简单而基本的R解决方案:

### combine the 3 country dataframes into one dataframe, to make lookup easier
# add column to code for country
US$country     <- "US"
Canada$country <- "Canada"
Japan$country  <- "Japan"
# combine usinmg rbind()
df2 <- rbind(US, Canada, Japan); str(df2)

### create desired dataframe
# unique apple types (If order is important, define how to handle duplicates) 
df3 <- data.frame(Corr..Name = unique(df2$Corr..Name))
# lookup:
#   a) match: matches the apple type to the line in the initial country df
#   b) lookup in "df" using "[" operator
df3$US     <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = US$Corr..Name), "US"]))
df3$Canada <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = Canada$Corr..Name), "Canada"]))
df3$Japan  <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = Japan$Corr..Name), "Japan"]))
# show result
df3

结果:

Corr..Name US Canada Japan
1    Pink Lady  1     NA    NA
2 Blood Orange  1     NA    NA
3   Asian Pear  0     NA    NA
4   Honeycrisp NA      5    NA
5 Navel Orange NA      9    10
6 Callery Pear NA      6    NA
7         Gala NA     NA     7
8   Bosc Pears NA     NA     5

最新更新