我想用R STudio从guardian api获得所有关于基因组编辑的文章。我尝试了一些代码,但出现了几个错误。
data <- fromJSON("https://content.guardianapis.com/search?q=genome%20editing&show-
blocks=body&&api-key=myapikey&page-size=200")
number_calls <- data$response$pages
api_calls <- paste("https://content.guardianapis.com/search?q=genome%20editing&show-
blocks=body&api-key=myapikey&page-
size=200page=", as.character(c(1:number_calls)), sep = "")
DatafromCall <- function(x) {
data <- fromJSON(x)
body <- data$response$results$blocks$body
textContent <- vector()
for (i in seq_along(body)) {
textContent[i] <- ifelse(is.null(body[[i]]$bodyTextSummary), NA,
body[[i]]$bodyTextSummary)
}
}
for (i in 1:number_calls) {
all.data <- rbind(DataframeFromCall(api_calls[i]))
}
我在打开连接时出错(con,"rb"(:HTTP错误400
library(guardianapi)
Genomeediting <- gu_content(query= "genome%20editing")
我在打开连接时出错(con,"rb"(:HTTP错误403
有什么建议吗?
我使用R访问API的经验是,特定的错误意味着地址中有拼写错误。我没有使用API访问卫报,所以我无法识别你的错误在哪里。
我得到了一个具有以下代码的解决方案:
library(jsonlite)
library(tidyverse)
library(httr)
library(stringr)
# APIs
url1 <- paste("https://content.guardianapis.com/search?api-
key=c3c4b8ff-bf4b-473f-832f-0440850a95be", sep = "", collapse="")
query1 <- "&q=genome%20editing&show-blocks=body&page-size=1&page=1"
query2 <- "&q=genome%20editing&show-blocks=body&page-
size=100&page="
### How many API calls
firstCall <- GET(paste(url1, query1, sep = "")) %>% content()
number_calls <- ceiling(firstCall$response$pages / 100)
api_calls <- paste(url1, query2, as.character(c(1:number_calls)),
sep = "")
DataFrameFromCall <- function(x) {
data <- fromJSON(x)
body <- data$response$results$blocks$body
textContent <- vector()
for (i in seq_along(body)) {
textContent[i] <- ifelse(is.null(body[[i]]$bodyTextSummary),
NA, body[[i]]$bodyTextSummary)
}
TestData <- data.frame(textContent)
return(TestData)
}
all.data <- data.frame(textContent=character(),
stringsAsFactors=FALSE)
for (i in 1:number_calls) {
all.data <- rbind(all.data, DatafromCall(api_calls[i]))
}
tibbleData <- as_tibble(all.data)
write.table(tibbleData, "GuardianGenomeEditing.txt", append =
FALSE, sep = " ", dec = ".", row.names = TRUE, col.names = TRUE)