使用r语言登录的抓取网页



我想抓取一些历史报纸的网页。然而,我必须通过我的图书馆的网站登录,我有麻烦通过登录页面。我正在使用vest,并已将用户名和密码替换为****。提前感谢。下面是我使用的代码:

library(rvest)
url <- "https://auth.lse.ac.uk/auth/login?service=https%3A%2F%2Fgate.library.lse.ac.uk%2Fidp%2FAuthn%2FRemoteUser"
session <- html_session(url)              
form <- html_form(session)[[1]]
filled_form <- set_values(form,username="****", password="****")
completed_form <- submit_form(session,filled_form)
completed_form %>%
html_nodes("p") %>%
html_text()

您可以考虑以下方法:

library(RSelenium)
shell('docker run -d -p 4445:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "firefox")
remDr$open()
remDr$navigate('https://auth.lse.ac.uk/auth/login?service=https%3A%2F%2Fgate.library.lse.ac.uk%2Fidp%2FAuthn%2FRemoteUser')
web_Obj_Username <- remDr$findElement("id", "username")
web_Obj_Username$sendKeysToElement(list("my_user_name"))
remDr$screenshot(display = TRUE, useViewer = TRUE) 
web_Obj_Username <- remDr$findElement("id", "password")
web_Obj_Username$sendKeysToElement(list("my_password"))
remDr$screenshot(display = TRUE, useViewer = TRUE) 
web_Obj_Button <- remDr$findElement("class name", "button")
web_Obj_Button$clickElement()
remDr$screenshot(display = TRUE, useViewer = TRUE) 

您还可以考虑以下方法:

library(RDCOMClient)
url <- 'https://auth.lse.ac.uk/auth/login?service=https%3A%2F%2Fgate.library.lse.ac.uk%2Fidp%2FAuthn%2FRemoteUser'
IEApp <- COMCreate("InternetExplorer.Application")
IEApp[['Visible']] <- TRUE
IEApp$Navigate(url)
web_Obj_Username <- IEApp$Document()$getElementByID("username")
web_Obj_Username$Click()
web_Obj_Username$Focus()
web_Obj_Username[["Value"]] <- "my_user_name"
web_Obj_Username <- IEApp$Document()$getElementByID("password")
web_Obj_Username$Click()
web_Obj_Username$Focus()
web_Obj_Username[["Value"]] <- "my_password"
doc <- IEApp$Document()
clickEvent <- doc$createEvent("MouseEvent")
clickEvent$initEvent("click", TRUE, FALSE)
web_Obj_Login <- IEApp$Document()$getElementsByClassName("button")
web_Obj_Login$item(0)$dispatchEvent(clickEvent)

最新更新