我的数据具有以下结构。对于每个国家和id,从装运栏(中国和香港(,如果一个集团同时拥有中国和香港,那么新的栏装运应该有香港如果一个集团有中国,那么新的栏装运应该有中国如果一个组有HK,则新的列装运应该有HK如果为NA,则应为NA
类似于M、Q和Y列。根据哪一列具有给定组的值,它应该在单列句点下。
我在下面列出了我的尝试。但想知道是否有更好的方法来做到这一点。如何包含shipment_hk
或shipment_china
,而不是true或false。以便用户可读。此外,季度价值在ifelse条件下似乎不起作用。
library(dplyr)
sample_df %>% tibble::as.tibble() %>%
dplyr::group_by(country,id) %>%
dplyr::mutate(periods = ifelse(Monthly == "Monthly", "monthly", ifelse(Quarterly == "Quarterly", "quarterly", ifelse(Yearly == "Yearly", "yearly", "")))) %>%
dplyr::mutate(shipment = any(shipment_hk %in% "send to hk")) %>%
dplyr::select(country,id, type,periods,shipment)
#> # A tibble: 20 x 5
#> # Groups: country, id [7]
#> country id type periods shipment
#> <chr> <chr> <chr> <chr> <lgl>
#> 1 group_1 2.1 "" <NA> FALSE
#> 2 group_1 2.1 "bar" monthly FALSE
#> 3 group_1 2.1 "chocolate" monthly FALSE
#> 4 group_1 2.17 "" <NA> FALSE
#> 5 group_1 2.17 "bar" monthly FALSE
#> 6 group_1 2.17 "chocolate" monthly FALSE
#> 7 group_1 2.2 "" <NA> TRUE
#> 8 group_1 2.2 "" <NA> TRUE
#> 9 group_1 2.2 "bar" <NA> TRUE
#> 10 group_1 2.2 "chocolate" <NA> TRUE
#> 11 group_2 1 "" <NA> TRUE
#> 12 group_2 1 "" <NA> TRUE
#> 13 group_2 1 "bar" monthly TRUE
#> 14 group_2 2.1 "" <NA> FALSE
#> 15 group_2 2.1 "bar" monthly FALSE
#> 16 group_2 2.12.1 "" <NA> TRUE
#> 17 group_2 2.12.1 "" <NA> TRUE
#> 18 group_2 2.12.1 "donut" <NA> TRUE
#> 19 group_2 2.12.2 "" <NA> FALSE
#> 20 group_2 2.12.2 "bar" <NA> FALSE
由reprex包(v0.3.0(于2020-11-03创建
dput(sample_df)
structure(list(country = c("group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_2", "group_2", "group_2", "group_2", "group_2", "group_2",
"group_2", "group_2", "group_2", "group_2", "group_3", "group_3",
"group_3", "group_3", "group_3", "group_3"), id = c("2.1", "2.1",
"2.1", "2.17", "2.17", "2.17", "2.2", "2.2", "2.2", "2.2", "1",
"1", "1", "2.1", "2.1", "2.12.1", "2.12.1", "2.12.1", "2.12.2",
"2.12.2", "2.17", "2.17", "2.17", "2.18", "2.18", "2.18"), type = c("",
"bar", "chocolate", "", "bar", "chocolate", "", "", "bar", "chocolate",
"", "", "bar", "", "bar", "", "", "donut", "", "bar", "tiles",
"tiles", "tiles", "tiles", "tiles", "tiles"), shipment_china = c("send to china",
NA, NA, "send to china", NA, NA, "send to china", NA, NA, NA,
"send to china", NA, NA, "send to china", NA, "send to china",
NA, NA, "send to china", NA, NA, NA, NA, NA, NA, NA), shipment_hk = c(NA,
NA, NA, NA, NA, NA, NA, "send to hk", NA, NA, NA, "send to hk",
NA, NA, NA, NA, "send to hk", NA, NA, NA, NA, NA, NA, NA, NA,
NA), Monthly = c(NA, "Monthly", "Monthly", NA, "Monthly", "Monthly",
NA, NA, NA, NA, NA, NA, "Monthly", NA, "Monthly", NA, NA, NA,
NA, NA, NA, "Monthly", NA, NA, NA, NA), Quarterly = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Quarterly",
NA, "Quarterly", NA, NA, NA, "Quarterly", NA, NA), Yearly = c(NA,
NA, NA, NA, NA, NA, NA, NA, "Yearly", "Yearly", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-26L))
一种使用coalesce
和使用case_when
而不是嵌套ifelse
:的方法
library(dplyr)
sample_df %>%
group_by(country, id) %>%
mutate(periods = coalesce(Monthly, Quarterly, Yearly),
shipment = case_when(
any(shipment_hk == "send to hk") ~ "send to hk",
any(shipment_china == "send to china") ~ "send to china",
TRUE ~ NA_character_
))
在新的shipment
列中,这将使shipment_hk
优先于shipment_china
。