r-从多列中选择组中的值

  • 本文关键字:选择 r dplyr data.table
  • 更新时间 :
  • 英文 :


我的数据具有以下结构。对于每个国家和id,从装运栏(中国和香港(,如果一个集团同时拥有中国和香港,那么新的栏装运应该有香港如果一个集团有中国,那么新的栏装运应该有中国如果一个组有HK,则新的列装运应该有HK如果为NA,则应为NA

类似于M、Q和Y列。根据哪一列具有给定组的值,它应该在单列句点下。

我在下面列出了我的尝试。但想知道是否有更好的方法来做到这一点。如何包含shipment_hkshipment_china,而不是true或false。以便用户可读。此外,季度价值在ifelse条件下似乎不起作用。

library(dplyr)
sample_df %>% tibble::as.tibble() %>% 
dplyr::group_by(country,id) %>% 
dplyr::mutate(periods = ifelse(Monthly == "Monthly", "monthly", ifelse(Quarterly == "Quarterly", "quarterly", ifelse(Yearly == "Yearly", "yearly", "")))) %>%
dplyr::mutate(shipment = any(shipment_hk %in% "send to hk")) %>% 
dplyr::select(country,id, type,periods,shipment)
#> # A tibble: 20 x 5
#> # Groups:   country, id [7]
#>    country id     type        periods shipment
#>    <chr>   <chr>  <chr>       <chr>   <lgl>   
#>  1 group_1 2.1    ""          <NA>    FALSE   
#>  2 group_1 2.1    "bar"       monthly FALSE   
#>  3 group_1 2.1    "chocolate" monthly FALSE   
#>  4 group_1 2.17   ""          <NA>    FALSE   
#>  5 group_1 2.17   "bar"       monthly FALSE   
#>  6 group_1 2.17   "chocolate" monthly FALSE   
#>  7 group_1 2.2    ""          <NA>    TRUE    
#>  8 group_1 2.2    ""          <NA>    TRUE    
#>  9 group_1 2.2    "bar"       <NA>    TRUE    
#> 10 group_1 2.2    "chocolate" <NA>    TRUE    
#> 11 group_2 1      ""          <NA>    TRUE    
#> 12 group_2 1      ""          <NA>    TRUE    
#> 13 group_2 1      "bar"       monthly TRUE    
#> 14 group_2 2.1    ""          <NA>    FALSE   
#> 15 group_2 2.1    "bar"       monthly FALSE   
#> 16 group_2 2.12.1 ""          <NA>    TRUE    
#> 17 group_2 2.12.1 ""          <NA>    TRUE    
#> 18 group_2 2.12.1 "donut"     <NA>    TRUE    
#> 19 group_2 2.12.2 ""          <NA>    FALSE   
#> 20 group_2 2.12.2 "bar"       <NA>    FALSE

由reprex包(v0.3.0(于2020-11-03创建


dput(sample_df)
structure(list(country = c("group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_2", "group_2", "group_2", "group_2", "group_2", "group_2", 
"group_2", "group_2", "group_2", "group_2", "group_3", "group_3", 
"group_3", "group_3", "group_3", "group_3"), id = c("2.1", "2.1", 
"2.1", "2.17", "2.17", "2.17", "2.2", "2.2", "2.2", "2.2", "1", 
"1", "1", "2.1", "2.1", "2.12.1", "2.12.1", "2.12.1", "2.12.2", 
"2.12.2", "2.17", "2.17", "2.17", "2.18", "2.18", "2.18"), type = c("", 
"bar", "chocolate", "", "bar", "chocolate", "", "", "bar", "chocolate", 
"", "", "bar", "", "bar", "", "", "donut", "", "bar", "tiles", 
"tiles", "tiles", "tiles", "tiles", "tiles"), shipment_china = c("send to china", 
NA, NA, "send to china", NA, NA, "send to china", NA, NA, NA, 
"send to china", NA, NA, "send to china", NA, "send to china", 
NA, NA, "send to china", NA, NA, NA, NA, NA, NA, NA), shipment_hk = c(NA, 
NA, NA, NA, NA, NA, NA, "send to hk", NA, NA, NA, "send to hk", 
NA, NA, NA, NA, "send to hk", NA, NA, NA, NA, NA, NA, NA, NA, 
NA), Monthly = c(NA, "Monthly", "Monthly", NA, "Monthly", "Monthly", 
NA, NA, NA, NA, NA, NA, "Monthly", NA, "Monthly", NA, NA, NA, 
NA, NA, NA, "Monthly", NA, NA, NA, NA), Quarterly = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Quarterly", 
NA, "Quarterly", NA, NA, NA, "Quarterly", NA, NA), Yearly = c(NA, 
NA, NA, NA, NA, NA, NA, NA, "Yearly", "Yearly", NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, 
-26L))

一种使用coalesce和使用case_when而不是嵌套ifelse:的方法

library(dplyr)
sample_df %>%
group_by(country, id) %>%
mutate(periods = coalesce(Monthly, Quarterly, Yearly),
shipment = case_when(
any(shipment_hk == "send to hk") ~ "send to hk",
any(shipment_china == "send to china") ~ "send to china",
TRUE ~ NA_character_
))

在新的shipment列中,这将使shipment_hk优先于shipment_china

最新更新