r-使用purrr::map2()或purrr::imap()提取数据集的名称



我正试图使用purrr::map2()purrr::imap()从有给定变量的大量数据集中查找数据集。从本质上讲,我将循环浏览数据集列表,只打印具有感兴趣变量的数据集的名称。当我使用purrr::map()时,数据集是未命名的";。x[[i]]";。如有任何帮助,我们将不胜感激。谢谢

#load packages
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(purrr)
#create fictitious datasets
df1 <- tibble(score_a=1:20,
sex_a=rep(c("M", "F"), 10))
df2 <- tibble(score_b=1:20,
sex_b=rep(c("M", "F"), 10))
df3 <- tibble(score_c=1:20,
sex_c=rep(c("M", "F"), 10))
#create a function that returns the dataset that
#contains a given variable
get_dataset_name <- function(data, contains){

data_var_names <- colnames(data) 
dataname <- deparse(substitute(data))

if(contains %in% data_var_names){
return(dataname)
}
}
#testing the function
get_dataset_name(data=df3, contains="score_c")
#> [1] "df3"

#creating a list of the all datasets
data_list <- list(df1, df2, df3)
#looping through a list of the dataset to find the dataset
#that includes the given variable
map(data_list, get_dataset_name, contains="score_c")
#> [[1]]
#> NULL
#> 
#> [[2]]
#> NULL
#> 
#> [[3]]
#> [1] ".x[[i]]"
#I was hoping to obtain "df3" instead of  ".x[[i]]"
#I read that purrr::map2() or purrr::imap could solve
#the issue but I am not sure how to set it up
#Any help would be appreciated it

# map2(.x=data_list, 
#      .y=names(data_list), 
#      ~get_dataset_name(data=.x, contains="score_c"),
#      nest(.x, name=.y)
# )

#imap(data_list, get_dataset_name, contains="score_c")

创建于2022-09-26由reprex包(v2.0.0(

尝试获取数据集名称的索引,而不是deparse/substitute。索引可以是逻辑索引,也可以是数字索引。如果列表是一个命名列表,则可以使用这些名称为列表编制索引,并且这些值将由第二个函数iget_dataset_name返回。

# load packages
suppressPackageStartupMessages({
library(dplyr)
library(purrr)
})
# create fictitious datasets
df1 <- tibble(score_a=1:20,
sex_a=rep(c("M", "F"), 10))
df2 <- tibble(score_b=1:20,
sex_b=rep(c("M", "F"), 10))
df3 <- tibble(score_c=1:20,
sex_c=rep(c("M", "F"), 10))
# create two functions that return indices to the
# data sets that contain the wanted name
get_dataset_name <- function(data, contains){
data_var_names <- colnames(data) 
contains %in% data_var_names
}
iget_dataset_name <- function(data, col_names, contains){
data_var_names <- colnames(data) 
i <- which(contains %in% data_var_names)
col_names[i]
}
# testing the function
get_dataset_name(data=df3, contains="score_c")
#> [1] TRUE

# creating a list of the all datasets
data_list <- list(df1, df2, df3)
# looping through a list of the dataset to find the dataset
# that includes the given variable
map(data_list, get_dataset_name, contains="score_c")
#> [[1]]
#> [1] FALSE
#> 
#> [[2]]
#> [1] FALSE
#> 
#> [[3]]
#> [1] TRUE
imap(data_list, iget_dataset_name, contains="score_c")
#> [[1]]
#> integer(0)
#> 
#> [[2]]
#> integer(0)
#> 
#> [[3]]
#> [1] 3
# give the list a names attribute
names(data_list) <- c("df1", "df2", "df3")
# retest the functions
map(data_list, get_dataset_name, contains="score_c")
#> $df1
#> [1] FALSE
#> 
#> $df2
#> [1] FALSE
#> 
#> $df3
#> [1] TRUE
imap(data_list, iget_dataset_name, contains="score_c")
#> $df1
#> character(0)
#> 
#> $df2
#> character(0)
#> 
#> $df3
#> [1] "df3"

创建于2022-09-27,reprex v2.0.2

最新更新