我需要构建一个表,该表根据许多行条件改变其他列。我试图编写所需的非常少的代码,因此以某种方式解决。我预先将所有条件存储在一个表中,然后从表中绘制所有内容。但是,当接近突变阶段时,我不知道如何迭代地圈过表中的条件。
假设我想根据几何形状的性质对它们进行分类。我可以一次指定它,而不是先写出创建属性表的代码,然后再为条件编写非常相似的代码。
library(tibble)
my_table <-
tribble(~shape_id, ~string_of_conditions, ~shape_classification,
"a", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), "rectangle",
"b", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), "square",
"c", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), "rhombus",
"d", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), "trapezoid",
"e", c("is_curve == "TRUE" & equidistant_from_center == "TRUE""), "circle",
"f", c("is_curve == "TRUE" & equidistant_from_center == "FALSE""), "ellipse")
my_table
#> # A tibble: 6 x 3
#> shape_id string_of_conditions shape_classificat~
#> <chr> <chr> <chr>
#> 1 a "n_of_vertices == "4" & n_of_angles == "4" & ~ rectangle
#> 2 b "n_of_vertices == "4" & n_of_angles == "4" & ~ square
#> 3 c "n_of_vertices == "4" & n_of_angles == "4" & ~ rhombus
#> 4 d "n_of_vertices == "4" & n_of_angles == "4" & ~ trapezoid
#> 5 e "is_curve == "TRUE" & equidistant_from_center =~ circle
#> 6 f "is_curve == "TRUE" & equidistant_from_center =~ ellipse
由reprex包(v0.3.0)创建于2021-02-19
然后我可以基于my_table
构建数据集:
library(rlang)
library(dplyr)
library(purrr)
convert_to_named_vector <- function(x) {
gsub("==", "=", x) %>%
gsub("&", ",", .) %>%
paste0("c(", ., ")") %>%
parse_expr(.) %>%
eval(.)
}
new_data <-
my_table %>%
mutate(as_named_vec = map(.x = string_of_conditions, .f = convert_to_named_vector)) %>%
pull(as_named_vec) %>%
bind_rows()
> new_data
## # A tibble: 6 x 7
## n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve equidistant_from_center
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 4 4 TRUE 4 FALSE FALSE NA
## 2 4 4 TRUE 4 TRUE FALSE NA
## 3 4 4 FALSE 4 TRUE FALSE NA
## 4 4 4 FALSE 4 FALSE FALSE NA
## 5 NA NA NA NA NA TRUE TRUE
## 6 NA NA NA NA NA TRUE FALSE
到目前为止一切顺利。
这是我的问题:对于new_data
中的每一行,我想改变一个新列,该列将保存该行的分类标签。我想使用case_when
,其中LHS将基于my_table["string_of_conditions"]
中存储的条件,RHS将基于my_table["shape_classification"]
中的值。
如何迭代运行my_table
&new_data
使用case_when
?
否则,一个冗长的不需要的1 × 1 =
cond_shape_a <-
my_table %>%
slice(1) %>%
pull(string_of_conditions) %>%
parse_expr()
classification_shape_a <-
my_table %>%
slice(1) %>%
pull(shape_classification)
new_data %>%
mutate(figured_out_shape_class = case_when(!!cond_shape_a ~ classification_shape_a))
## # A tibble: 6 x 8
## n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve equidistant_from_center figured_out_shape_class
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 4 4 TRUE 4 FALSE FALSE NA rectangle
## 2 4 4 TRUE 4 TRUE FALSE NA NA
## 3 4 4 FALSE 4 TRUE FALSE NA NA
## 4 4 4 FALSE 4 FALSE FALSE NA NA
## 5 NA NA NA NA NA TRUE TRUE NA
## 6 NA NA NA NA NA TRUE FALSE NA
是否有一种方法可以很容易地使用purrr
来实现这一点?
我知道这个问题可能看起来很愚蠢,因为我可以简单地从一开始就使用tribble
写出所需的输出。但在我的实际数据中,这很快就会变得很麻烦,我需要一个程序化的解决方案。
编辑
@Jon Spring在评论中补充说,如果我在string_of_conditions
列中包含shape_classification
而不是专门的列,我可以完全避免case_when()
。
library(tibble)
library(purrr)
library(rlang)
library(dplyr)
my_table_2 <-
tribble(~shape_id, ~string_of_conditions,
"a", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE" , shape_classification == "rectangle""),
"b", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE", shape_classification == "square""),
"c", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE", shape_classification == "rhombus""),
"d", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE" , shape_classification == "trapezoid""),
"e", c("is_curve == "TRUE" & equidistant_from_center == "TRUE", shape_classification == "circle""),
"f", c("is_curve == "TRUE" & equidistant_from_center == "FALSE", shape_classification == "ellipse""))
my_table_2
#> # A tibble: 6 x 2
#> shape_id string_of_conditions
#> <chr> <chr>
#> 1 a "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 2 b "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 3 c "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 4 d "n_of_vertices == "4" & n_of_angles == "4" & all_angles_right ==~
#> 5 e "is_curve == "TRUE" & equidistant_from_center == "TRUE", shape_c~
#> 6 f "is_curve == "TRUE" & equidistant_from_center == "FALSE", shape_~
convert_to_named_vector <- function(x) {
gsub("==", "=", x) %>%
gsub("&", ",", .) %>%
paste0("c(", ., ")") %>%
parse_expr(.) %>%
eval(.)
}
new_data_2 <-
my_table_2 %>%
mutate(as_named_vec = map(.x = string_of_conditions, .f = convert_to_named_vector)) %>%
pull(as_named_vec) %>%
bind_rows()
new_data_2
#> # A tibble: 6 x 8
#> n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 4 4 TRUE 4 FALSE FALSE
#> 2 4 4 TRUE 4 TRUE FALSE
#> 3 4 4 FALSE 4 TRUE FALSE
#> 4 4 4 FALSE 4 FALSE FALSE
#> 5 <NA> <NA> <NA> <NA> <NA> TRUE
#> 6 <NA> <NA> <NA> <NA> <NA> TRUE
#> # ... with 2 more variables: shape_classification <chr>,
#> # equidistant_from_center <chr>
由reprex包(v0.3.0)创建于2021-02-19
这种解决方案的问题是,它可以很好地处理简单的shape_classification
输出,如字符串(例如,"square", "rectangle"等),但如果我们希望shape_classification
是复杂的结构,如列表嵌套列表等,则无法扩展。
在这种情况下,我们必须使用专用列来输出:
library(tibble)
my_table_3 <-
tribble(~shape_id, ~string_of_conditions, ~shape_classification,
"a", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), list(list(shape_name = "rectangle", shape_aspects = tribble(~aspect, ~value,
"type_of_geometry", "Euclidean plane geometry",
"sum_of_angles", "360",
"other_names", "oblong"))),
"b", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "TRUE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), list(list(shape_name = "square", shape_aspects = tribble(~aspect, ~value,
"type_of_geometry", "Euclidean plane geometry",
"sum_of_angles", "360",
"n_of_diagonals", "2"))),
"c", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "TRUE" & is_curve == "FALSE""), list(list(shape_name = "rhombus", shape_aspects = tribble(~aspect, ~value,
"type_of_geometry", "Euclidean plane geometry",
"plural_form", "rhombi"))),
"d", c("n_of_vertices == "4" & n_of_angles == "4" & all_angles_right == "FALSE" & n_of_sides == "4" & all_sides_equal == "FALSE" & is_curve == "FALSE""), list(list(shape_name = "trapezoid", shape_aspects = tribble(~aspect, ~value,
"sum_of_angles", "360",
"n_of_diagonals", "2"))),
"e", c("is_curve == "TRUE" & equidistant_from_center == "TRUE""), list(c("circle")),
"f", c("is_curve == "TRUE" & equidistant_from_center == "FALSE""), list(c("ellipse")))
my_table_3
#> # A tibble: 6 x 3
#> shape_id string_of_conditions shape_classificat~
#> <chr> <chr> <list>
#> 1 a "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>
#> 2 b "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>
#> 3 c "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>
#> 4 d "n_of_vertices == "4" & n_of_angles == "4" & ~ <list [1]>
#> 5 e "is_curve == "TRUE" & equidistant_from_center =~ <list [1]>
#> 6 f "is_curve == "TRUE" & equidistant_from_center =~ <list [1]>
由reprex包(v0.3.0)创建于2021-02-19
也许这有帮助
library(dplyr)
library(purrr)
bind_cols(my_table, new_data) %>%
transmute(tmp = map2(string_of_conditions,
shape_classification, ~
case_when(eval(parse_expr(.x)) ~ .y))) %>%
pull(tmp) %>%
reduce(coalesce) %>%
bind_cols(new_data, figured_out_shape_class = .)
与产出
# A tibble: 6 x 8
# n_of_vertices n_of_angles all_angles_right n_of_sides all_sides_equal is_curve equidistant_from_center figured_out_shape_class
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 4 4 TRUE 4 FALSE FALSE <NA> rectangle
#2 4 4 TRUE 4 TRUE FALSE <NA> square
#3 4 4 FALSE 4 TRUE FALSE <NA> rhombus
#4 4 4 FALSE 4 FALSE FALSE <NA> trapezoid
#5 <NA> <NA> <NA> <NA> <NA> TRUE TRUE circle
#6 <NA> <NA> <NA> <NA> <NA> TRUE FALSE ellipse