r语言 - For循环遍历日期以下载表格并将它们合并到一个函数中



我的代码看起来像这样:


### Loading needed packages 
install.packages(c("tidyverse", "httr2"))
library(tidyverse)
library(httr2)
### Function which I use
get_earnings <- function(from_date, to_date) {
str_c(
"https://api.benzinga.com/api/v2.1/calendar/earnings?token=1c2735820e984715bc4081264135cb90&parameters[date_from]=",
from_date,
"&parameters[date_to]=",
to_date,
"&parameters[tickers]=&pagesize=1000"
) %>%
request() %>%
req_headers(accept = "application/json") %>%
req_perform() %>%
resp_body_json(simplifyVector = TRUE) %>%
pluck("earnings") %>%
as_tibble() %>%
type_convert()
}

### Use function for one month / the function does not work for more than 10 days
### -> splitting the month in three parts 
df1<- get_earnings(from_date = "2022-12-21", to_date = "2022-12-31")
df2<- get_earnings(from_date = "2022-12-11", to_date = "2022-12-20")
df2<- get_earnings(from_date = "2022-11-01", to_date = "2022-12-10")
### I need more than one year with this function 
### I could do it by copy and pasting -> huge chance of error
### So i want to write a function which does it on anually base 
### Define function
extract_data_year <- function(year) {
year <- as.character(year)
df_whole <- data.frame()
for ( i in 1:9) {

# Months 1 - 9
if ( i <= 9) {
i_1 <- paste0(0,i)

# Month Part I
from_date = paste0(year,"-",i_1,"-","01") 
to_date = paste0(year,"-",i_1,"-","10") 
df_1 <-get_earnings(from_date = from_date, to_date = to_date)

# Month Part II
from_date = paste0(year,"-",i_1,"-","11") 
to_date = paste0(year,"-",i_1,"-","20") 
df_2 <-get_earnings(from_date = from_date, to_date = to_date)

# Month Part III
from_date = paste0(year,"-",i_1,"-","21") 
to_date = paste0(year,"-",i_1,"-","28") 
df_3 <-get_earnings(from_date = from_date, to_date = to_date)


df <- rbind(df_1,df_2,df_3)
}
else if(i > 9) { # Months 10-12
# i defined as in for loop

# Month Part I
from_date = paste0(year,"-",i,"-","01") 
to_date = paste0(year,"-",i,"-","10") 
df_1 <-get_earnings(from_date = from_date, to_date = to_date)

# Month Part II
from_date = paste0(year,"-",i,"-","11") 
to_date = paste0(year,"-",i,"-","20") 
df_2 <-get_earnings(from_date = from_date, to_date = to_date)

# Month Part III
from_date = paste0(year,"-",i,"-","21") 
to_date = paste0(year,"-",i,"-","28") 
df_3 <-get_earnings(from_date = from_date, to_date = to_date)


df <- rbind(df_1,df_2,df_3)

}
df_whole <- rbind(df,df_whole)
}
return(df_whole)
}
### Works for Months 1-9 but doesnt work for 10-12
df1 <- extract_data_year(2022)

我的问题:

为什么不下载月10-12?

如您所见,由于二月的关系,最大日期是28。如果函数在2月30日或31日得到输入,它将无法工作。有没有可能把每个月的月底改成28、30、31号?

谢谢

结合下载

适应每个月的正确结束

函数部分不起作用

您可以考虑以下方法:

library(tidyverse)
library(httr2)
get_earnings <- function(from_date, to_date) {
str_c(
"https://api.benzinga.com/api/v2.1/calendar/earnings?token=1c2735820e984715bc4081264135cb90&parameters[date_from]=",
from_date,
"&parameters[date_to]=",
to_date,
"&parameters[tickers]=&pagesize=1000"
) %>%
request() %>%
req_headers(accept = "application/json") %>%
req_perform() %>%
resp_body_json(simplifyVector = TRUE) %>%
pluck("earnings") %>%
as_tibble() %>%
type_convert()
}
date_Init <- as.Date("2022-01-01")
date_End <- as.Date("2023-01-01")
month_Vec <- seq(from = date_Init, to = date_End, by = "months")
ten_Days_Chunk <- seq(from = month_Vec[1], to = month_Vec[2] - 1, length.out = 3)
for(i in 2 : 12)
{
ten_Days_Chunk <- c(ten_Days_Chunk, seq(from = month_Vec[i], to = month_Vec[i + 1] - 1, length.out = 3))
}
list_DF <- list()
for(i in 1 : (length(ten_Days_Chunk) - 1))
{
print(i)
list_DF[[i]] <- get_earnings(from_date = ten_Days_Chunk[i], to_date = ten_Days_Chunk[i + 1])
}
list_DF[[35]]
# A tibble: 100 x 25
currency date       date_con~1   eps eps_est eps_p~2 eps_s~3 eps_s~4 eps_t~5 excha~6 id    impor~7 name  notes period perio~8 revenue reven~9
<chr>    <date>          <int> <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>   <chr>   <chr>   <int> <chr> <chr> <chr>    <int>   <dbl>   <dbl>
1 USD      2022-12-30          1  0         NA   NA         NA      NA GAAP    NASDAQ  628b~       0 OKYO~ FXC   H1        2022  0           NA
2 USD      2022-12-30          1 -0.3       NA   -0.97      NA      NA GAAP    NASDAQ  60a3~       0 MEDI~ FXC   H1        2022  2.57e7      NA
3 USD      2022-12-30          1 -0.2       NA   NA         NA      NA GAAP    NYSE    611c~       0 Onio~ NA    H1        2022  1.27e8      NA
4 USD      2022-12-30          1 -0.75      NA   NA         NA      NA GAAP    NASDAQ  6093~       0 Blue~ NA    H1        2022  1.99e6      NA
5 USD      2022-12-30          1 -1.73      NA   -0.9       NA      NA GAAP    NASDAQ  6193~       0 Seco~ Earn~ H1        2022  1.74e8      NA
6 USD      2022-12-30          1 -0.28      NA   -0.3       NA      NA Adj     NASDAQ  61b9~       0 Uxin  Earn~ Q2        2023  8.70e7      NA
7 USD      2022-12-29          1 -0.22      NA   NA         NA      NA Adj     NASDAQ  6127~       0 Miss~ NA    H1        2022  3.98e8      NA
8 USD      2022-12-29          1  0         NA   NA         NA      NA GAAP    NASDAQ  61ce~       0 Merc~ NA    H1        2022  7.83e5      NA
9 USD      2022-12-29          1 -0.01      NA   -0.01      NA      NA GAAP    OTC     6285~       0 Timb~ NA    Q4        2022  0           NA
10 USD      2022-12-29          1 -0.01      NA    0.16      NA      NA GAAP    AMEX    61c1~       0 Barn~ NA    Q4        2022  8.38e6      NA
# ... with 90 more rows, 7 more variables: revenue_prior <dbl>, revenue_surprise <dbl>, revenue_surprise_percent <dbl>, revenue_type <chr>,
#   ticker <chr>, time <time>, updated <int>, and abbreviated variable names 1: date_confirmed, 2: eps_prior, 3: eps_surprise,
#   4: eps_surprise_percent, 5: eps_type, 6: exchange, 7: importance, 8: period_year, 9: revenue_est
# i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names

最新更新