r-哪些tidyverse函数返回tibbles



一些tidyverse函数返回数据帧,但也有一些返回tibble。我找不到任何关于哪些函数返回哪种数据类型的资源,也看不到任何真正可预测的模式

library(tidyverse)
# Returns dataframe
df1 <- iris %>% mutate(newcol = mean(Sepal.Length))
df2 <- iris %>% filter(Sepal.Length > 5)
df3 <- iris %>% select(Sepal.Length, Species)
class(df1); class(df2); class(df3)
[1] "data.frame"
[1] "data.frame"
[1] "data.frame"
# Returns tibble
df4 <- iris %>% add_count(Species)
df5 <- iris %>% pivot_longer(cols = -Species)
df6 <- iris %>% group_by(Species) %>% mutate(newcol = mean(Sepal.Length))
class(df4); class(df5); class(df6)
[1] "tbl_df"     "tbl"        "data.frame"
[1] "tbl_df"     "tbl"        "data.frame"
[1] "grouped_df" "tbl_df"     "tbl"        "data.frame"

有没有办法告诉函数的返回值类型是什么?或者可能有一个资源,哪些函数返回tidyverse中的哪种数据类型?许多R代码在tibble和数据帧上运行相同,但有一些重要的区别,例如提取列的默认行为,其中drop = FALSE是tibble的默认行为。我知道我总是可以手动更改类型,但当处理大量遗留代码时,了解会非常有用

我用伪参数调用了这些tidyverse函数中的每一个,以找到生成的类。

lst(
"dplyr::filter"=dplyr::filter(iris, T) %>% class(),
"dplyr::filter_all"=dplyr::filter_all(mtcars, all_vars(. > 1)) %>% class(),
"dplyr::group_by"=dplyr::group_by(iris, Species) %>% class(),
"dplyr::group_by_all"=dplyr::group_by_all(iris, function(...) TRUE) %>% class(),
"dplyr::group_keys"=dplyr::group_keys(iris) %>% class(),
"dplyr::group_modify"=dplyr::group_modify(iris, function(x, ...) identity(x)) %>% class(),
"dplyr::mutate"=dplyr::mutate(iris) %>% class(),
"dplyr::mutate_all"=dplyr::mutate_all(iris, identity) %>% class(),
"dplyr::rowwise"=dplyr::rowwise(iris) %>% class(),
"dplyr::select"=dplyr::select(iris) %>% class(),
"dplyr::select_all"=dplyr::select_all(iris) %>% class(),
"dplyr::slice"=dplyr::slice(iris) %>% class(),
"dplyr::summarise"=dplyr::summarise(iris) %>% class(),
"dplyr::summarise_all"=dplyr::summarise_all(iris, ~.) %>% class(),
"modelr::add_predictions"=modelr::add_predictions(mtcars, lm(cyl~hp, data = mtcars)) %>% class(),
"modelr::add_residuals"=modelr::add_residuals(mtcars, lm(cyl~hp, data = mtcars)) %>% class(),
"modelr::bootstrap"=modelr::bootstrap(mtcars, 1) %>% class(),
"modelr::crossv_mc"=modelr::crossv_mc(iris, 1) %>% class(),
"modelr::data_grid"=modelr::data_grid(mtcars, vs, am) %>% class(),
"modelr::model_matrix"=modelr::model_matrix(mtcars, am ~ disp) %>% class(),
"modelr::permute"=modelr::permute(iris, 1) %>% class(),
"tibble::add_column"=tibble::add_column(iris) %>% class(),
"tibble::add_row"=tibble::add_row(iris) %>% class(),
"tidyr::complete"=tidyr::complete(iris) %>% class(),
"tidyr::drop_na"=tidyr::drop_na(iris) %>% class(),
"tidyr::expand"=tidyr::expand(iris) %>% class(),
"tidyr::extract"=tidyr::extract(data.frame(x = c(NA, "a-b", "a-d", "b-c", "d-e")), x, "A") %>% class(),
"tidyr::fill"=tidyr::fill(iris) %>% class(),
"tidyr::gather"=tidyr::gather(iris) %>% class(),
"tidyr::nest"=tidyr::nest(iris) %>% class(),
"tidyr::replace_na"=tidyr::replace_na(iris) %>% class(),
"tidyr::separate"=tidyr::separate(data.frame(x = c(NA, "a.b", "a.d", "b.c")), x, c("A", "B")) %>% class(),
"tidyr::separate_rows"=tidyr::separate_rows(iris) %>% class(),
"tidyr::spread"=tidyr::spread(data.frame(x = c("a", "b"), y = c(3, 4), z = c(5, 6)), x, y) %>% class(),
"tidyr::uncount"=tidyr::uncount(data.frame(x = c("a", "b"), n = c(1, 2)), n) %>% class(),
"tidyr::unite"=tidyr::unite(iris, "z", 1:2) %>% class(),
"tidyr::unnest"=tidyr::unnest(iris) %>% class()
) %>% 
enframe() %>%
mutate(value = map_chr(value, max)) %>%
data.frame()
#>                       name      value
#> 1            dplyr::filter data.frame
#> 2        dplyr::filter_all data.frame
#> 3          dplyr::group_by     tbl_df
#> 4      dplyr::group_by_all     tbl_df
#> 5        dplyr::group_keys data.frame
#> 6      dplyr::group_modify data.frame
#> 7            dplyr::mutate data.frame
#> 8        dplyr::mutate_all data.frame
#> 9           dplyr::rowwise     tbl_df
#> 10           dplyr::select data.frame
#> 11       dplyr::select_all data.frame
#> 12            dplyr::slice data.frame
#> 13        dplyr::summarise data.frame
#> 14    dplyr::summarise_all data.frame
#> 15 modelr::add_predictions data.frame
#> 16   modelr::add_residuals data.frame
#> 17       modelr::bootstrap     tbl_df
#> 18       modelr::crossv_mc     tbl_df
#> 19       modelr::data_grid     tbl_df
#> 20    modelr::model_matrix     tbl_df
#> 21         modelr::permute     tbl_df
#> 22      tibble::add_column data.frame
#> 23         tibble::add_row data.frame
#> 24         tidyr::complete data.frame
#> 25          tidyr::drop_na data.frame
#> 26           tidyr::expand     tbl_df
#> 27          tidyr::extract data.frame
#> 28             tidyr::fill data.frame
#> 29           tidyr::gather data.frame
#> 30             tidyr::nest     tbl_df
#> 31       tidyr::replace_na data.frame
#> 32         tidyr::separate data.frame
#> 33    tidyr::separate_rows     tbl_df
#> 34           tidyr::spread data.frame
#> 35          tidyr::uncount data.frame
#> 36            tidyr::unite data.frame
#> 37           tidyr::unnest     tbl_df

通过查找第一个自变量为".data"".tbl""data"tidyverse函数来识别候选函数。

df <-
collidr::CRANdf %>%
filter(package_names %in% tidyverse::tidyverse_packages()) %>%
mutate(f = map2(function_names, package_names, possibly(getFromNamespace, otherwise = NA))) %>%
filter(map_lgl(f, is_function), !map_lgl(f, rlang::is_primitive)) %>%
mutate(first_arg = map(f, ~rlang::fn_fmls(.) %>% names() %>% first())) %>%
filter(first_arg %in% c(".data", ".tbl", "data")) %>%
select(package_names, function_names, first_arg)
df
#>    package_names       function_names first_arg
#> 1            cli                 tree      data
#> 2         dbplyr     arrange.tbl_lazy     .data
#> 3         dbplyr           do.tbl_sql     .data
#> 4         dbplyr         window_order     .data
#> 5          dplyr              arrange     .data
#> 6          dplyr          arrange_all      .tbl
#> 7          dplyr             distinct     .data
#> 8          dplyr         distinct_all      .tbl
#> 9          dplyr                   do     .data
#> 10         dplyr               filter     .data
#> 11         dplyr           filter_all      .tbl
#> 12         dplyr             group_by     .data
#> 13         dplyr         group_by_all      .tbl
#> 14         dplyr           group_keys      .tbl
#> 15         dplyr            group_map     .data
#> 16         dplyr           group_rows     .data
#> 17         dplyr           group_trim      .tbl
#> 18         dplyr               mutate     .data
#> 19         dplyr           mutate_all      .tbl
#> 20         dplyr                 pull     .data
#> 21         dplyr              rowwise      data
#> 22         dplyr               select     .data
#> 23         dplyr           select_all      .tbl
#> 24         dplyr                slice     .data
#> 25         dplyr            summarise     .data
#> 26         dplyr        summarise_all      .tbl
#> 27       ggplot2               ggplot      data
#> 28        modelr      add_predictions      data
#> 29        modelr        add_residuals      data
#> 30        modelr            bootstrap      data
#> 31        modelr            crossv_mc      data
#> 32        modelr            data_grid      data
#> 33        modelr             fit_with      data
#> 34        modelr         model_matrix      data
#> 35        modelr              permute      data
#> 36        modelr             resample      data
#> 37        modelr   resample_bootstrap      data
#> 38        modelr   resample_partition      data
#> 39        modelr resample_permutation      data
#> 40         rlang         as_data_mask      data
#> 41        tibble           add_column     .data
#> 42        tibble              add_row     .data
#> 43         tidyr             complete      data
#> 44         tidyr              drop_na      data
#> 45         tidyr               expand      data
#> 46         tidyr              extract      data
#> 47         tidyr                 fill      data
#> 48         tidyr               gather      data
#> 49         tidyr                 nest     .data
#> 50         tidyr           replace_na      data
#> 51         tidyr             separate      data
#> 52         tidyr        separate_rows      data
#> 53         tidyr               spread      data
#> 54         tidyr              uncount      data
#> 55         tidyr                unite      data
#> 56         tidyr               unnest      data

最新更新