R download.file()返回403禁止错误



下面的代码用于工作,但我试图下载文件的网站已经添加了一个用户验证步骤。我尝试了一些方法,包括要求代码在循环步骤中休眠,但到目前为止都没有任何效果。任何建议吗?

library(tidyverse)
library(rvest)

page <-
"https://burnsville.civicweb.net/filepro/documents/25657/" %>%
read_html
df <- tibble(
names1 = page %>%
html_nodes(".document-link") %>%
html_text2() %>%
str_remove_all("r") %>%
str_squish(),
links = page %>%
html_nodes(".document-link") %>%
html_attr("href") %>%
paste0("https://burnsville.civicweb.net", .)
)
destfile<-("destination.pdf")
df %>% 
map(~ download.file(df$links, destfile = paste0(df$names1, ".pdf")))
#loop through and download PDFs
for (i in df$links) {
tryCatch({
download.file(url,
basename(url),
mode = "wb",
quiet=TRUE)
}, error = function(e){})
}

提前感谢!

library(tidyverse)
library(rvest)
page <-
"https://burnsville.civicweb.net/filepro/documents/25657/" %>%
read_html
docs <- tibble(
names = page %>%
html_nodes(".document-link") %>%
html_text2() %>%
str_remove_all("r") %>%
str_squish(),
links = page %>%
html_nodes(".document-link") %>%
html_attr("href") %>%
paste0("https://burnsville.civicweb.net", .), 
file = str_extract(links, "[^/]*$")
)
map2(docs$links, docs$file, ~ download.file(url = .x, 
destfile = str_c(.y, ".pdf"), 
mode = "wb"))

最新更新