r语言 - 使用ARIMA和虚拟变量进行预测



我正在尝试包含一个虚拟回归器,该回归器注意到大流行的开始,并运行带有ARIMA错误的回归。

我的数据集围绕打破&2014年到2021年在多伦多举行。问题是,这一趋势将在2020年左右因covid-19而发生转变。

Auto.arima为我提供了一个ARIMA(1,0,1)模型,因为它没有考虑covid-19的影响,而是根据序列平均值的隐含收益率执行。

当尝试包含一个记录大流行开始的虚拟回归器并运行带有ARIMA错误的回归时,我得到以下错误:

In ifelse(time(BEDATA_GROUPEDtsssarima) >= yearmonth("2020-03"),  :
Incompatible methods ("Ops.ts", ">=.vctrs_vctr") for ">="

代码:

# Create a binary time series that indicates the start of the pandemic
library(fpp3)
library(forecast)
library(zoo)
# Check if timeseries
class(BEDATA_GROUPED)
#Convert timeseries
BEDATA_GROUPEDtsssarima <- ts(BEDATA_GROUPED[,2], frequency = 12, start = c(2014, 1))
class(BEDATA_GROUPEDtsssarima)
#Plot
forecast::autoplot(BEDATA_GROUPEDtsssarima)
# Assume that the pandemic began in March 2020
pandemic_dummy <- ifelse(time(BEDATA_GROUPEDtsssarima) >= yearmonth("2020-03"), 1, 0)
# Use auto.arima() to fit an ARIMA model with the dummy variable as an exogenous variable
beddatamodel <- auto.arima(BEDATA_GROUPEDtsssarima, xreg = pandemic_dummy, ic="aic", trace = TRUE)
# Create a binary time series that indicates the start of the pandemic
# In this example, we will assume that the pandemic began in March 2020
pandemic_dummy <- ifelse(time(BEDATA_GROUPEDtsssarima) >= yearmonth("2020-03"), 1, 0)
# Use auto.arima() to fit an ARIMA model with the dummy variable as an exogenous variable
beddatamodel <- auto.arima(BEDATA_GROUPEDtsssarima, xreg = pandemic_dummy, ic="aic", trace = TRUE)
# Create a binary time series for the forecast period that includes the pandemic dummy variable
forecast_period <- time(BEDATA_GROUPEDtsssarima)["2022/01/01/":"2023/12/31/"]
pandemic_dummy_forecast <- ifelse(forecast_period >= yearmonth("2020-03"), 1, 0)
# Use the forecast()
forecast(pandemic_dummy_forecast)

数据集:

structure(list(occurrence_yrmn = c("2014-January", "2014-February", 
"2014-March", "2014-April", "2014-May", "2014-June", "2014-July", 
"2014-August", "2014-September", "2014-October", "2014-November", 
"2014-December", "2015-January", "2015-February", "2015-March", 
"2015-April", "2015-May", "2015-June", "2015-July", "2015-August", 
"2015-September", "2015-October", "2015-November", "2015-December", 
"2016-January", "2016-February", "2016-March", "2016-April", 
"2016-May", "2016-June", "2016-July", "2016-August", "2016-September", 
"2016-October", "2016-November", "2016-December", "2017-January", 
"2017-February", "2017-March", "2017-April", "2017-May", "2017-June", 
"2017-July", "2017-August", "2017-September", "2017-October", 
"2017-November", "2017-December", "2018-January", "2018-February", 
"2018-March", "2018-April", "2018-May", "2018-June", "2018-July", 
"2018-August", "2018-September", "2018-October", "2018-November", 
"2018-December", "2019-January", "2019-February", "2019-March", 
"2019-April", "2019-May", "2019-June", "2019-July", "2019-August", 
"2019-September", "2019-October", "2019-November", "2019-December", 
"2020-January", "2020-February", "2020-March", "2020-April", 
"2020-May", "2020-June", "2020-July", "2020-August", "2020-September", 
"2020-October", "2020-November", "2020-December", "2021-January", 
"2021-February", "2021-March", "2021-April", "2021-May", "2021-June", 
"2021-July", "2021-August", "2021-September", "2021-October", 
"2021-November", "2021-December"), MCI = c(586, 482, 567, 626, 
625, 610, 576, 634, 636, 663, 657, 556, 513, 415, 510, 542, 549, 
618, 623, 666, 641, 632, 593, 617, 541, 523, 504, 536, 498, 552, 
522, 519, 496, 541, 602, 570, 571, 492, 560, 525, 507, 523, 593, 
623, 578, 657, 683, 588, 664, 582, 619, 512, 630, 644, 563, 654, 
635, 732, 639, 748, 719, 567, 607, 746, 739, 686, 805, 762, 696, 
777, 755, 675, 704, 617, 732, 609, 464, 487, 565, 609, 513, 533, 
505, 578, 526, 418, 428, 421, 502, 452, 509, 492, 478, 469, 457, 
457)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-96L))

我看到您已经使用了fpp3库,所以我已经尝试使用了tidyvert工具。我已经尝试了三个模型:一个普通的ARIMA模型,一个使用流行病虚拟变量的普通回归模型,以及一个同时使用ARIMA和虚拟变量的动态模型。希望这对你有帮助!: -)

library(tsibble)
library(fable)
library(fabletools)
library(feasts)
library(dplyr)

创建表:

BEDATA_GROUPED <- BEDATA_GROUPED |>
mutate(Month = yearmonth(occurrence_yrmn)) |>
as_tsibble(index = Month)
autoplot(BEDATA_GROUPED)

假设大流行始于2020年3月并创建一个虚拟变量:

pandemic_start <- yearmonth("2020-03-01")
BEDATA_GROUPED <- BEDATA_GROUPED |>
mutate(pandemic_dummy = ifelse(Month >= pandemic_start, 1, 0))

创建一个简单的ARIMA:

BEDATA_GROUPED_arima <- BEDATA_GROUPED |>
model(ARIMA(MCI, stepwise = FALSE))
BEDATA_GROUPED_arima |>
gg_tsresiduals()
BEDATA_GROUPED_arima |>
forecast(h = 5) |>
autoplot()

建立一个简单的回归:

BEDATA_GROUPED_TSLM <- BEDATA_GROUPED |>
model(TSLM(MCI ~ pandemic_dummy)) |>
report()
BEDATA_GROUPED_TSLM |>
gg_tsresiduals()

创建要预测的数据集:

new_data <- structure(list(Month = structure(c(18993, 19024, 19052, 19083, 
19113), class = c("yearmonth", "vctrs_vctr")), pandemic_dummy = c(1, 
                                           1, 1, 1, 1)), class = c("tbl_ts", "tbl_df", "tbl", "data.frame"
                                           ), row.names = c(NA, -5L), key = structure(list(.rows = structure(list(
                                             1:5), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", 
                                                                                 "list"))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
                                                                                                                                                    -1L)), index = structure("Month", ordered = TRUE), index2 = "Month", interval = structure(list(
                                                                                                                                                      year = 0, quarter = 0, month = 1, week = 0, day = 0, hour = 0, 
                                                                                                                                                      minute = 0, second = 0, millisecond = 0, microsecond = 0, 
                                                                                                                                                      nanosecond = 0, unit = 0), .regular = TRUE, class = c("interval", 
                                                                                                                                                                                                            "vctrs_rcrd", "vctrs_vctr")))

预测平原回归:

BEDATA_GROUPED_TSLM |>
forecast(new_data = new_data) |>
autoplot()

建立一个动态回归,使用ARIMA和流行病虚拟变量:

BEDATA_GROUPED_dyn_ARIMA <- BEDATA_GROUPED |>
model(ARIMA(MCI ~ pandemic_dummy)) |>
report()
BEDATA_GROUPED_dyn_ARIMA |>
gg_tsresiduals()
BEDATA_GROUPED_dyn_ARIMA |>
forecast(new_data = new_data) |>
autoplot()

相关内容

  • 没有找到相关文章

最新更新