背景
我有一个简单的脚本,它遍历目录中可用的CSV文件,并分别使用read_csv
和map_dfr
函数将它们读取到一个数据帧中。脚本可能会遇到缺少列的文件。在这种情况下,read_csv
将生成一个警告,因为我通过cols_only
指定列名,我希望保留它。
期望的结果
我希望能够在显示可选警告的情况下运行此脚本。我试图通过使用withCallingHandlers
来捕获此输出,然后,根据函数参数的值,显示警告或不处理它并继续。
问题
即使show_warnings
是TRUE
,也不会显示警告。似乎没有对`withCallingHandlers的warning
元素求值。
示例
# Notes
# Conditionally import files and glide over files with missing columns
# Settings ----------------------------------------------------------------
# Libs
library("tidyverse")
library("fs")
# Constants
pth_tmp_files <- path_temp("temp_csvs")
dir_create(pth_tmp_files)
# Files -------------------------------------------------------------------
# Create a set of sample files
while (length(dir_ls(pth_tmp_files)) < 11) {
write_csv(x = mtcars,
file = file_temp("sample_csv_", pth_tmp_files, "csv"))
}
# Add one 'damaged' file
write_csv(
x = subset(mtcars, select = -cyl),
file = file_temp("broken_sample_csv_", pth_tmp_files, "csv")
)
# Importer ----------------------------------------------------------------
append_files <- function(files_list, show_warnings) {
csv_reader <- function(import_file, show_warnings = show_warnings) {
withCallingHandlers(
suppressWarnings(expr = {
read_csv(
file = import_file,
col_types = cols_only(cyl = col_integer(),
am = col_integer())
)
}),
warning = function(w) {
if (show_warnings) {
warning(w, immediate. = TRUE)
}
}
)
}
imported_files <- map_dfr(.x = files_list,
.f = ~ csv_reader(import_file = .x),
.id = "origin_file")
mutate(imported_files, origin_file = path_ext_remove(path_file(origin_file))) %>%
arrange(desc(origin_file))
}
# Tests -------------------------------------------------------------------
# Should return data.frame(tibble) object without warnings
append_files(dir_ls(pth_tmp_files), show_warnings = FALSE)
# Correct
# Should show warnings and return identical object
append_files(dir_ls(pth_tmp_files), show_warnings = TRUE)
# Object returned with no warnings
备注
- 我最感兴趣的是正确使用
withCallingHandlers
和/或invokeRestart
函数。我不想要使用tryCatch
的解决方案,因为我最感兴趣的是学习其他错误处理方法
问题是在expr =
参数中使用suppressWarnings
,因此withCallingHandlers
包装器不需要处理任何警告(实际上suppressWarnings
内部已经使用了withCallingHandlers
,因此它还添加了一层冗余。(
关键是调用具有重新启动条件的重新启动";"马弗警告";如果发出警告消息和,则用户要求show_warnings = FALSE
。
顺便说一句,您需要在内部函数中更改show_warnings
参数的名称,或者将外部函数的show_warnings
参数显式传递给map
调用,否则R无法确定它正在处理哪个promise对象。
append_files <- function(files_list, show_warnings) {
csv_reader <- function(import_file, show_warnings = show_warnings) {
withCallingHandlers(
expr = read_csv(file = import_file,
col_types = cols_only(cyl = col_integer(),
am = col_integer())),
warning = function(w) {
if(!show_warnings) tryInvokeRestart("muffleWarning")
}
)
}
imported_files <- map_dfr(.x = files_list,
.f = ~ csv_reader(import_file = .x, show_warnings),
.id = "origin_file")
mutate(imported_files,
origin_file = path_ext_remove(path_file(origin_file))) %>%
arrange(desc(origin_file))
}
所以现在我们有了:
append_files(dir_ls(pth_tmp_files), show_warnings = FALSE)
#> # A tibble: 384 x 3
#> origin_file am cyl
#> <chr> <int> <int>
#> 1 sample_csv_1950b846938 1 6
#> 2 sample_csv_1950b846938 1 6
#> 3 sample_csv_1950b846938 1 4
#> 4 sample_csv_1950b846938 0 6
#> 5 sample_csv_1950b846938 0 8
#> 6 sample_csv_1950b846938 0 6
#> 7 sample_csv_1950b846938 0 8
#> 8 sample_csv_1950b846938 0 4
#> 9 sample_csv_1950b846938 0 4
#> 10 sample_csv_1950b846938 0 6
#> # ... with 374 more rows
和
append_files(dir_ls(pth_tmp_files), show_warnings = TRUE)
#> # A tibble: 384 x 3
#> origin_file am cyl
#> <chr> <int> <int>
#> 1 sample_csv_1950b846938 1 6
#> 2 sample_csv_1950b846938 1 6
#> 3 sample_csv_1950b846938 1 4
#> 4 sample_csv_1950b846938 0 6
#> 5 sample_csv_1950b846938 0 8
#> 6 sample_csv_1950b846938 0 6
#> 7 sample_csv_1950b846938 0 8
#> 8 sample_csv_1950b846938 0 4
#> 9 sample_csv_1950b846938 0 4
#> 10 sample_csv_1950b846938 0 6
#> # ... with 374 more rows
#> Warning message:
#> The following named parsers don't match the column names: cyl