我正在与Discogs API合作,试图为我的收藏提取社区信息(拥有和想要、最低价格等(。
不幸的是,它的速率限制为每分钟25,我无法找到将该限制应用到当前代码中的方法(见下文(。我可以使用sys.sleep((,但我不确定它在代码中的位置。
communityData <- lapply(as.list(collection$release_id), function(obj){
url <- httr::GET(paste0("https://api.discogs.com/releases/", obj))
url <- rjson::fromJSON(rawToChar(url$content))
data.frame(release_id = obj,
label = url$label[[1]]$name %||% NA,
year = url$year %||% NA,
title = url$title %||% NA,
artist_name = url$artist[[1]]$name %||% NA,
styles = url$styles[[1]] %||% NA,
genre = url$genre[[1]] %||% NA,
average_note = url$community$rating$average %||% NA,
votes = url$community$rating$count %||% NA,
want = url$community$want %||% NA,
have = url$community$have %||% NA,
lowest_price = url$lowest_price %||% NA,
country = url$country %||% NA)
}) %>% do.call(rbind, .) %>%
unique()
如有任何帮助,我们将不胜感激!
在返回值之前直接插入sleep命令应该可以正常工作。这将为您提供查询url、提取信息、睡眠、返回值、重复的模式。阿卡,像这样编辑上面的代码块:
communityData <- lapply(as.list(collection$release_id), function(obj){
url <- httr::GET(paste0("https://api.discogs.com/releases/", obj))
url <- rjson::fromJSON(rawToChar(url$content))
# 1 minute for 25 requests -- ~ 2.4 seconds of sleep between each request
Sys.sleep(2.4)
data.frame(release_id = obj,
label = url$label[[1]]$name %||% NA,
year = url$year %||% NA,
title = url$title %||% NA,
artist_name = url$artist[[1]]$name %||% NA,
styles = url$styles[[1]] %||% NA,
genre = url$genre[[1]] %||% NA,
average_note = url$community$rating$average %||% NA,
votes = url$community$rating$count %||% NA,
want = url$community$want %||% NA,
have = url$community$have %||% NA,
lowest_price = url$lowest_price %||% NA,
country = url$country %||% NA)
}) %>% do.call(rbind, .) %>%
unique()