r语言 - 如何运行dplyr::case_when()迭代时,条件表达式存储在一个数据框列?



我需要构建一个表,该表根据许多行条件改变其他列。我试图编写所需的非常少的代码,因此以某种方式解决。我预先将所有条件存储在一个表中,然后从表中绘制所有内容。但是,当接近突变阶段时,我不知道如何迭代地圈过表中的条件。

假设我想根据几何形状的性质对它们进行分类。我可以一次指定它,而不是先写出创建属性表的代码,然后再为条件编写非常相似的代码。

library(tibble)
my_table <- 
tribble(~shape_id, ~string_of_conditions, ~shape_classification,
"a", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), "rectangle",
"b", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), "square",
"c", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), "rhombus",
"d", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), "trapezoid",
"e", c("is_curve == "TRUE" & equidistant_from_center == "TRUE""), "circle",
"f", c("is_curve == "TRUE" & equidistant_from_center == "FALSE""), "ellipse")
my_table
#> # A tibble: 6 x 3
#>   shape_id string_of_conditions                               shape_classificat~
#>   <chr>    <chr>                                              <chr>             
#> 1 a        "n_of_vertices == "4" & n_of_angles == "4" & ~ rectangle         
#> 2 b        "n_of_vertices == "4" & n_of_angles == "4" & ~ square            
#> 3 c        "n_of_vertices == "4" & n_of_angles == "4" & ~ rhombus           
#> 4 d        "n_of_vertices == "4" & n_of_angles == "4" & ~ trapezoid         
#> 5 e        "is_curve == "TRUE" & equidistant_from_center =~ circle            
#> 6 f        "is_curve == "TRUE" & equidistant_from_center =~ ellipse

由reprex包(v0.3.0)创建于2021-02-19

然后我可以基于my_table构建数据集:

library(rlang)
library(dplyr)
library(purrr)
convert_to_named_vector <- function(x) {
gsub("==", "=", x) %>%
gsub("&", ",", .) %>%
paste0("c(", ., ")") %>%
parse_expr(.) %>%
eval(.)
}
new_data <- 
my_table %>%
mutate(as_named_vec = map(.x = string_of_conditions, .f = convert_to_named_vector)) %>%
pull(as_named_vec) %>%
bind_rows()
> new_data
## # A tibble: 6 x 7
##   n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve equidistant_from_center
##   <chr>         <chr>       <chr>            <chr>      <chr>           <chr>    <chr>                  
## 1 4             4           TRUE             4          FALSE           FALSE    NA                     
## 2 4             4           TRUE             4          TRUE            FALSE    NA                     
## 3 4             4           FALSE            4          TRUE            FALSE    NA                     
## 4 4             4           FALSE            4          FALSE           FALSE    NA                     
## 5 NA            NA          NA               NA         NA              TRUE     TRUE                   
## 6 NA            NA          NA               NA         NA              TRUE     FALSE  

到目前为止一切顺利。
这是我的问题:对于new_data中的每一行,我想改变一个新列,该列将保存该行的分类标签。我想使用case_when,其中LHS将基于my_table["string_of_conditions"]中存储的条件,RHS将基于my_table["shape_classification"]中的值。

如何迭代运行my_table&new_data使用case_when?

否则,一个冗长的不需要的1 × 1 =

cond_shape_a <-
my_table %>%
slice(1) %>%
pull(string_of_conditions) %>%
parse_expr()
classification_shape_a <-
my_table %>%
slice(1) %>%
pull(shape_classification)
new_data %>%
mutate(figured_out_shape_class = case_when(!!cond_shape_a ~ classification_shape_a))
## # A tibble: 6 x 8
##   n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve equidistant_from_center figured_out_shape_class
##   <chr>         <chr>       <chr>            <chr>      <chr>           <chr>    <chr>                   <chr>                  
## 1 4             4           TRUE             4          FALSE           FALSE    NA                      rectangle              
## 2 4             4           TRUE             4          TRUE            FALSE    NA                      NA                     
## 3 4             4           FALSE            4          TRUE            FALSE    NA                      NA                     
## 4 4             4           FALSE            4          FALSE           FALSE    NA                      NA                     
## 5 NA            NA          NA               NA         NA              TRUE     TRUE                    NA                     
## 6 NA            NA          NA               NA         NA              TRUE     FALSE                   NA     

是否有一种方法可以很容易地使用purrr来实现这一点?

我知道这个问题可能看起来很愚蠢,因为我可以简单地从一开始就使用tribble写出所需的输出。但在我的实际数据中,这很快就会变得很麻烦,我需要一个程序化的解决方案。


编辑

@Jon Spring在评论中补充说,如果我在string_of_conditions列中包含shape_classification而不是专门的列,我可以完全避免case_when()

library(tibble)
library(purrr)
library(rlang)
library(dplyr)
my_table_2 <- 
tribble(~shape_id, ~string_of_conditions,
"a", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE" , shape_classification == "rectangle""),
"b", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE", shape_classification == "square""),
"c", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE", shape_classification == "rhombus""),
"d", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE" , shape_classification == "trapezoid""),
"e", c("is_curve == "TRUE" & equidistant_from_center == "TRUE", shape_classification == "circle""),
"f", c("is_curve == "TRUE" & equidistant_from_center == "FALSE", shape_classification == "ellipse""))
my_table_2
#> # A tibble: 6 x 2
#>   shape_id string_of_conditions                                                 
#>   <chr>    <chr>                                                                
#> 1 a        "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 2 b        "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 3 c        "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 4 d        "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 5 e        "is_curve == "TRUE" & equidistant_from_center == "TRUE", shape_c~
#> 6 f        "is_curve == "TRUE" & equidistant_from_center == "FALSE", shape_~
convert_to_named_vector <- function(x) {
gsub("==", "=", x) %>%
gsub("&", ",", .) %>%
paste0("c(", ., ")") %>%
parse_expr(.) %>%
eval(.)
}
new_data_2 <- 
my_table_2 %>%
mutate(as_named_vec = map(.x = string_of_conditions, .f = convert_to_named_vector)) %>%
pull(as_named_vec) %>%
bind_rows()
new_data_2
#> # A tibble: 6 x 8
#>   n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve
#>   <chr>         <chr>       <chr>            <chr>      <chr>           <chr>   
#> 1 4             4           TRUE             4          FALSE           FALSE   
#> 2 4             4           TRUE             4          TRUE            FALSE   
#> 3 4             4           FALSE            4          TRUE            FALSE   
#> 4 4             4           FALSE            4          FALSE           FALSE   
#> 5 <NA>          <NA>        <NA>             <NA>       <NA>            TRUE    
#> 6 <NA>          <NA>        <NA>             <NA>       <NA>            TRUE    
#> # ... with 2 more variables: shape_classification <chr>,
#> #   equidistant_from_center <chr>

由reprex包(v0.3.0)创建于2021-02-19

这种解决方案的问题是,它可以很好地处理简单的shape_classification输出,如字符串(例如,"square", "rectangle"等),但如果我们希望shape_classification是复杂的结构,如列表嵌套列表等,则无法扩展。

在这种情况下,我们必须使用专用列来输出:

library(tibble)
my_table_3 <- 
tribble(~shape_id, ~string_of_conditions, ~shape_classification,
"a", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), list(list(shape_name = "rectangle", shape_aspects = tribble(~aspect, ~value,
                                                                                                                                                                                 "type_of_geometry", "Euclidean plane geometry",
                                                                                                                                                                                 "sum_of_angles", "360",
                                                                                                                                                                                 "other_names", "oblong"))),
"b", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), list(list(shape_name = "square", shape_aspects = tribble(~aspect, ~value,
                                                                                                                                                                    "type_of_geometry", "Euclidean plane geometry",
                                                                                                                                                                    "sum_of_angles", "360",
                                                                                                                                                                    "n_of_diagonals", "2"))),
"c", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), list(list(shape_name = "rhombus", shape_aspects = tribble(~aspect, ~value,
                                                                                                                                                                  "type_of_geometry", "Euclidean plane geometry",
                                                                                                                                                                  "plural_form", "rhombi"))),
"d", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), list(list(shape_name = "trapezoid", shape_aspects = tribble(~aspect, ~value,
                                                                                                                                                                      "sum_of_angles", "360",
                                                                                                                                                                      "n_of_diagonals", "2"))),
"e", c("is_curve == "TRUE" & equidistant_from_center == "TRUE""), list(c("circle")),
"f", c("is_curve == "TRUE" & equidistant_from_center == "FALSE""), list(c("ellipse")))
my_table_3
#> # A tibble: 6 x 3
#>   shape_id string_of_conditions                               shape_classificat~
#>   <chr>    <chr>                                              <list>            
#> 1 a        "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>        
#> 2 b        "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>        
#> 3 c        "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>        
#> 4 d        "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>        
#> 5 e        "is_curve == "TRUE" & equidistant_from_center =~ <list [1]>        
#> 6 f        "is_curve == "TRUE" & equidistant_from_center =~ <list [1]>

由reprex包(v0.3.0)创建于2021-02-19

也许这有帮助

library(dplyr)
library(purrr)
bind_cols(my_table, new_data) %>% 
transmute(tmp = map2(string_of_conditions,
shape_classification,  ~  
case_when(eval(parse_expr(.x)) ~ .y))) %>% 
pull(tmp) %>%
reduce(coalesce) %>% 
bind_cols(new_data, figured_out_shape_class = .)

与产出

# A tibble: 6 x 8
#  n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve equidistant_from_center figured_out_shape_class
#  <chr>         <chr>       <chr>            <chr>      <chr>           <chr>    <chr>                   <chr>                  
#1 4             4           TRUE             4          FALSE           FALSE    <NA>                    rectangle              
#2 4             4           TRUE             4          TRUE            FALSE    <NA>                    square                 
#3 4             4           FALSE            4          TRUE            FALSE    <NA>                    rhombus                
#4 4             4           FALSE            4          FALSE           FALSE    <NA>                    trapezoid              
#5 <NA>          <NA>        <NA>             <NA>       <NA>            TRUE     TRUE                    circle                 
#6 <NA>          <NA>        <NA>             <NA>       <NA>            TRUE     FALSE                   ellipse     

最新更新