基于 R 数据帧中的静态函数的逻辑函数



我有一个非常大的Excel电子表格,其中包含许多对观察结果的"检查"(300+列)。检查由布尔运算符(大于、等于)和一些求和/减法组成:

df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 =  c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))

head(df) ##this is the input
#checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7
#1           50          +         18         ==         80       <NA>         NA
#2           70         ==         17       <NA>         NA       <NA>         NA
#3          111          *          6          -         76         ==        590
#4          320          >          3       <NA>         NA       <NA>         NA
INSERT CODE THAT MAKES THE EXCEL FUNCTIONS COME TO LIFE HERE. 
Mind you that some rows have much longer checks than others, so you can't rely on the column names. 
#outcome data frame should look like this, where the checks have been conducted:
View(outputchecks)
#checkID
#1   FALSE      
#2   FALSE
#3   TRUE        
#4   TRUE   

有谁知道R中的一些tidyr/dplyr/其他应用程序可以在数据帧中执行这些"静态函数"?

谢谢!

使用pmap

df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 =  c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))

library(tidyverse)
df %>% mutate(exp = pmap_lgl(df[-1], ~ eval(parse(text = paste(na.omit(c(...)), collapse = '')))))
#>   checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6
#> 1       1         50          +         18         ==         80       <NA>
#> 2       2         70         ==         17       <NA>         NA       <NA>
#> 3       3        111          *          6          -         76         ==
#> 4       4        320          >          3       <NA>         NA       <NA>
#>   checkpart7   exp
#> 1         NA FALSE
#> 2         NA FALSE
#> 3        590  TRUE
#> 4         NA  TRUE

创建于 2021-07-04 由 reprex 软件包 (v2.0.0)

<小时 />
df <-data.frame(checkID = c(1,2,3,4), checkpart1 = c(50, 70, 111, 320),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c(18, 17, 6, 3), checkpart4 =  c("==", NA, "-", NA), checkpart5 = c(80, NA,76,NA), checkpart6 = c(NA, NA, "==", NA), checkpart7 = c(NA,NA,590, NA))

library(tidyverse)
df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
mutate(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))))
# A tibble: 4 x 9
# Rowwise:  checkID
checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7 exp  
<dbl> <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <lgl>
1       1 50         +          18         "=="       "80"       ""         ""         FALSE
2       2 70         ==         17         ""         ""         ""         ""         FALSE
3       3 111        *          6          "-"        "76"       "=="       "590"      TRUE 
4       4 320        >          3          ""         ""         ""         ""         TRUE 
<小时 />

transmute将产生

df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
transmute(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))))
# A tibble: 4 x 2
# Rowwise:  checkID
checkID exp  
<dbl> <lgl>
1       1 FALSE
2       2 FALSE
3       3 TRUE 
4       4 TRUE 

使用summarise也会删除组

df %>% group_by(checkID) %>%
mutate(across(everything(), ~ifelse(is.na(.), '', as.character(.)))) %>%
rowwise() %>%
summarise(exp = eval(parse(text = paste(c_across(everything()), collapse = ''))), .groups = 'drop')
# A tibble: 4 x 2
checkID exp  
<dbl> <lgl>
1       1 FALSE
2       2 FALSE
3       3 TRUE 
4       4 TRUE 

这是eval/parse的一种方法。首先使用操作形成一个字符串,然后计算表达式。

txt <- apply(df[-1], 1, function(x) paste(trimws(x[!is.na(x)]), collapse = ""))
sapply(txt, function(x) eval(parse(text = x)))
#    50+18==80        70==17 111*6-76==590         320>3 
#        FALSE         FALSE          TRUE          TRUE 

您还可以使用以下解决方案:

  • 我使用pmap函数将数据集中的每一行捕获为字符串,省略第一个变量(checkID)
  • 然后我保留了每行中的所有NA
  • 之后,为了计算我们的公式(现在以字符串的形式),我们首先需要将它们折叠成长度为1的字符串
  • 然后我使用parse_exprfromrlang等效于基 R 中的eval将字符串转换为表达式
  • 最后,我使用了eval_tidy它再次等效于基数 R 中的eval函数来评估我们的表达式

这里不需要使用rlang,因为您可以非常轻松地使用基本 R 函数,但我的意思是向您展示替代方案。

library(purrr)
library(rlang)
df %>%
mutate(output = pmap_lgl(select(cur_data(), !checkID), ~ {x <- c(...)[!is.na(c(...))] 
parse_expr(paste(x, collapse = " ")) %>% 
eval_tidy()}))
checkID checkpart1 checkpart2 checkpart3 checkpart4 checkpart5 checkpart6 checkpart7 output
1       1         50          +         18         ==         80       <NA>         NA  FALSE
2       2         70         ==         17       <NA>         NA       <NA>         NA  FALSE
3       3        111          *          6          -         76         ==        590   TRUE
4       4        320          >          3       <NA>         NA       <NA>         NA   TRUE

这是一个tidyrdplyr的可能性:

library(tidyr)
library(dplyr)
df %>% 
tibble() %>%
unite(check, starts_with("checkpart"), sep=" ", na.rm = TRUE) %>% 
rowwise() %>% 
mutate(check = eval(str2expression(check))) %>%
ungroup()

返回

# A tibble: 4 x 2
checkID check
<dbl> <lgl>
1       1 FALSE
2       2 FALSE
3       3 TRUE 
4       4 TRUE 

相关内容

  • 没有找到相关文章

最新更新