我的代码看起来像这样:
### Loading needed packages
install.packages(c("tidyverse", "httr2"))
library(tidyverse)
library(httr2)
### Function which I use
get_earnings <- function(from_date, to_date) {
str_c(
"https://api.benzinga.com/api/v2.1/calendar/earnings?token=1c2735820e984715bc4081264135cb90¶meters[date_from]=",
from_date,
"¶meters[date_to]=",
to_date,
"¶meters[tickers]=&pagesize=1000"
) %>%
request() %>%
req_headers(accept = "application/json") %>%
req_perform() %>%
resp_body_json(simplifyVector = TRUE) %>%
pluck("earnings") %>%
as_tibble() %>%
type_convert()
}
### Use function for one month / the function does not work for more than 10 days
### -> splitting the month in three parts
df1<- get_earnings(from_date = "2022-12-21", to_date = "2022-12-31")
df2<- get_earnings(from_date = "2022-12-11", to_date = "2022-12-20")
df2<- get_earnings(from_date = "2022-11-01", to_date = "2022-12-10")
### I need more than one year with this function
### I could do it by copy and pasting -> huge chance of error
### So i want to write a function which does it on anually base
### Define function
extract_data_year <- function(year) {
year <- as.character(year)
df_whole <- data.frame()
for ( i in 1:9) {
# Months 1 - 9
if ( i <= 9) {
i_1 <- paste0(0,i)
# Month Part I
from_date = paste0(year,"-",i_1,"-","01")
to_date = paste0(year,"-",i_1,"-","10")
df_1 <-get_earnings(from_date = from_date, to_date = to_date)
# Month Part II
from_date = paste0(year,"-",i_1,"-","11")
to_date = paste0(year,"-",i_1,"-","20")
df_2 <-get_earnings(from_date = from_date, to_date = to_date)
# Month Part III
from_date = paste0(year,"-",i_1,"-","21")
to_date = paste0(year,"-",i_1,"-","28")
df_3 <-get_earnings(from_date = from_date, to_date = to_date)
df <- rbind(df_1,df_2,df_3)
}
else if(i > 9) { # Months 10-12
# i defined as in for loop
# Month Part I
from_date = paste0(year,"-",i,"-","01")
to_date = paste0(year,"-",i,"-","10")
df_1 <-get_earnings(from_date = from_date, to_date = to_date)
# Month Part II
from_date = paste0(year,"-",i,"-","11")
to_date = paste0(year,"-",i,"-","20")
df_2 <-get_earnings(from_date = from_date, to_date = to_date)
# Month Part III
from_date = paste0(year,"-",i,"-","21")
to_date = paste0(year,"-",i,"-","28")
df_3 <-get_earnings(from_date = from_date, to_date = to_date)
df <- rbind(df_1,df_2,df_3)
}
df_whole <- rbind(df,df_whole)
}
return(df_whole)
}
### Works for Months 1-9 but doesnt work for 10-12
df1 <- extract_data_year(2022)
我的问题:
为什么不下载月10-12?
如您所见,由于二月的关系,最大日期是28。如果函数在2月30日或31日得到输入,它将无法工作。有没有可能把每个月的月底改成28、30、31号?
谢谢
结合下载
适应每个月的正确结束
函数部分不起作用
您可以考虑以下方法:
library(tidyverse)
library(httr2)
get_earnings <- function(from_date, to_date) {
str_c(
"https://api.benzinga.com/api/v2.1/calendar/earnings?token=1c2735820e984715bc4081264135cb90¶meters[date_from]=",
from_date,
"¶meters[date_to]=",
to_date,
"¶meters[tickers]=&pagesize=1000"
) %>%
request() %>%
req_headers(accept = "application/json") %>%
req_perform() %>%
resp_body_json(simplifyVector = TRUE) %>%
pluck("earnings") %>%
as_tibble() %>%
type_convert()
}
date_Init <- as.Date("2022-01-01")
date_End <- as.Date("2023-01-01")
month_Vec <- seq(from = date_Init, to = date_End, by = "months")
ten_Days_Chunk <- seq(from = month_Vec[1], to = month_Vec[2] - 1, length.out = 3)
for(i in 2 : 12)
{
ten_Days_Chunk <- c(ten_Days_Chunk, seq(from = month_Vec[i], to = month_Vec[i + 1] - 1, length.out = 3))
}
list_DF <- list()
for(i in 1 : (length(ten_Days_Chunk) - 1))
{
print(i)
list_DF[[i]] <- get_earnings(from_date = ten_Days_Chunk[i], to_date = ten_Days_Chunk[i + 1])
}
list_DF[[35]]
# A tibble: 100 x 25
currency date date_con~1 eps eps_est eps_p~2 eps_s~3 eps_s~4 eps_t~5 excha~6 id impor~7 name notes period perio~8 revenue reven~9
<chr> <date> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <chr> <chr> <chr> <int> <dbl> <dbl>
1 USD 2022-12-30 1 0 NA NA NA NA GAAP NASDAQ 628b~ 0 OKYO~ FXC H1 2022 0 NA
2 USD 2022-12-30 1 -0.3 NA -0.97 NA NA GAAP NASDAQ 60a3~ 0 MEDI~ FXC H1 2022 2.57e7 NA
3 USD 2022-12-30 1 -0.2 NA NA NA NA GAAP NYSE 611c~ 0 Onio~ NA H1 2022 1.27e8 NA
4 USD 2022-12-30 1 -0.75 NA NA NA NA GAAP NASDAQ 6093~ 0 Blue~ NA H1 2022 1.99e6 NA
5 USD 2022-12-30 1 -1.73 NA -0.9 NA NA GAAP NASDAQ 6193~ 0 Seco~ Earn~ H1 2022 1.74e8 NA
6 USD 2022-12-30 1 -0.28 NA -0.3 NA NA Adj NASDAQ 61b9~ 0 Uxin Earn~ Q2 2023 8.70e7 NA
7 USD 2022-12-29 1 -0.22 NA NA NA NA Adj NASDAQ 6127~ 0 Miss~ NA H1 2022 3.98e8 NA
8 USD 2022-12-29 1 0 NA NA NA NA GAAP NASDAQ 61ce~ 0 Merc~ NA H1 2022 7.83e5 NA
9 USD 2022-12-29 1 -0.01 NA -0.01 NA NA GAAP OTC 6285~ 0 Timb~ NA Q4 2022 0 NA
10 USD 2022-12-29 1 -0.01 NA 0.16 NA NA GAAP AMEX 61c1~ 0 Barn~ NA Q4 2022 8.38e6 NA
# ... with 90 more rows, 7 more variables: revenue_prior <dbl>, revenue_surprise <dbl>, revenue_surprise_percent <dbl>, revenue_type <chr>,
# ticker <chr>, time <time>, updated <int>, and abbreviated variable names 1: date_confirmed, 2: eps_prior, 3: eps_surprise,
# 4: eps_surprise_percent, 5: eps_type, 6: exchange, 7: importance, 8: period_year, 9: revenue_est
# i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names