比较不同 R 包中使用 MLE 拟合的 GPD 的返回水平



这个问题与这篇文章有关:根据不同 R 包中的 GPD 计算回报水平

我想将"pot值"数据限制在 6 年内,即每年 16 次观测,因为样本数量为 96。我想使用 extremes 包计算返回水平(mle 参数估计和 GP 模型(,并将结果与 extremeStat 包进行比较。

请注意参数truncate=0.4956645,以获得恰好阈值 50。

为什么第 51 行的结果 (d$quant[28, ,drop=FALSE]( 不完全等于第 45 行的结果 (rl.extremes2(,如果 extremeStat 包使用相同的包极值与 MLE 和 GP 进行计算?

th <- 50
# sample data:
potvalues <- c(
58.5,44.2,49.6,59.3,48.3,60.9,94.5,47.1,45.3,57.6,48.2,46.2,44.2,50.6,42.1,52.7,80.9,
58.5,51.3,48.4,51.7,71.9,60.1,64.4,43.5,55.5,49.3,58.2,47.5,43.7,45.2,52.8,42.2,46.4,
96.1,47.5,50.1,42.4,60.9,72.6,51.6,59.4,80.5,63.7,59.9,45.0,66.7,47.6,53.3,43.1,51.0,
46.2,53.6,59.8,51.7,46.7,42.6,44.5,45.0,50.0,44.0,89.9,44.2,47.8,53.3,43.0,55.7,44.6,
44.6,54.9,45.1,43.9,78.7,45.5,64.0,42.7,47.4,57.0,105.4,64.3,43.2,50.4,80.2,49.9,71.6,
47.4,44.1,47.6,55.2,44.4,78.6,50.8,42.4,47.1,43.5,51.4)
#------------------------------------------------------------------------------------------#
#Count events over threshold
excesses = potvalues > th
sum(excesses)
# Data corresponding to a period of 6 years
#-------------------------------------------------------------------------------------------#
# MLE Fitting of GPD - package extRemes
# If fit period is 6 years, then I have 16 obs by year
pot.ext2 <- extRemes::fevd(potvalues, method = "MLE", type="GP", threshold=th, 
time.units="16/year")

npy2=16  #pot.ext2$npy
span2=5.9375 #pot.ext2$span
w2 = 96/npy2   #Duration of the fit period (6 years)
lambda2 = sum(excesses)/w2
Tr=c(2,5,10,20,50,100)
myp2 = (1 - (1/(lambda2*Tr)))
myp2 = myp2[myp2>0]
#Get return level using quantile function!
vel1 = extRemes::qevd(myp2, loc = pot.ext2$threshold, scale = pot.ext2$results$par[1], 
shape = pot.ext2$results$par[2], 
threshold = pot.ext2$threshold, type = "GP")
vel1
# return levels with 6 years, 16 obs, using return.level function
rl.extremes2 <-  extRemes::return.level(pot.ext2, conf = 0.05,
return.period= c(2,5,10,20,50,100))
rl.extremes2 <- as.numeric(rl.extremes2)
rl.extremes2
#------------------------------------------------------------------------------------------#
npy=16
Tr=c(2,5,10,20,50,100)
p = (1 - (1/(npy*Tr)))
d <- extremeStat::distLquantile(potvalues, truncate=0.4956645, probs=p, quiet=TRUE, list=TRUE)
d$quant[28, ,drop=FALSE]
dlf <- extremeStat::distLextreme(potvalues, quiet=TRUE, npy=16, truncate=0.4956645)
dlf$returnlev["threshold",1]
dlf$returnlev[28, , drop=FALSE]

这里是极端统计作者的答案

"由于时间单位的原因,估计分布函数的形状和比例是不同的"......"概率略有不同,但这不会影响结果(参见注释测试(">

下面的代码显示了 1( extremeStat 如何使用 extreme 计算返回水平,以及 2( 如何使用两种方法直接使用 extremes::qevd 和 extremes::return.level

也许该主题的专家可以决定两种方法中的哪一种是计算回报水平的"更合适"或"正确"方法:bb_extRemes还是alexys_exRtemes?...无论如何,结果是相似的。

# CLEAN ------------------------------------------------------------------------
rm(list=ls())
potvalues <- c(
58.5,44.2,49.6,59.3,48.3,60.9,94.5,47.1,45.3,57.6,48.2,46.2,44.2,50.6,42.1,52.7,80.9,
58.5,51.3,48.4,51.7,71.9,60.1,64.4,43.5,55.5,49.3,58.2,47.5,43.7,45.2,52.8,42.2,46.4,
96.1,47.5,50.1,42.4,60.9,72.6,51.6,59.4,80.5,63.7,59.9,45.0,66.7,47.6,53.3,43.1,51.0,
46.2,53.6,59.8,51.7,46.7,42.6,44.5,45.0,50.0,44.0,89.9,44.2,47.8,53.3,43.0,55.7,44.6,
44.6,54.9,45.1,43.9,78.7,45.5,64.0,42.7,47.4,57.0,105.4,64.3,43.2,50.4,80.2,49.9,71.6,
47.4,44.1,47.6,55.2,44.4,78.6,50.8,42.4,47.1,43.5,51.4)
options(scipen=10) # nicer printing of 0.00000001

bb_extRemes <- function(x, truncate, RPs=c(2,5,10,20,50), npy)
{
normalthr <- berryFunctions::quantileMean(x[is.finite(x)], truncate)
z <- extRemes::fevd(x, method="MLE", type="GP", threshold=normalthr)
scale <- z$results$par["scale"]
shape <- z$results$par["shape"]
probs <- 1-1/(RPs*npy)
probs2 <- (probs-truncate)/(1-truncate) # correct probabilities for smaller sample proportion
probs2[probs < truncate] <- 0   # avoid negative values
probs2[probs2==0] <- NA
probs2[probs2==1] <- NA
#alexys
#excesses <- x > normalthr
#w2 <- length(x)/npy # Duration of the fit period (6 years)
#lambda2 <- sum(excesses)/w2
#probs <- 1 - 1/(lambda2*RPs)
#probs[probs<0] <- NA  
#alexys
out <- extRemes::qevd(p=probs2, scale=scale, shape=shape, threshold=z$threshold, type="GP")
names(out) <- paste0("RP.", RPs)
out <- list(RL=out, PAR=c(thr=normalthr, scale=scale, shape=shape, probs=probs2))
out
}
alexys_exRtemes <- function(x, threshold, RPs=c(2,5,10,20,50), npy)
{
unit <- paste0(npy,"/year")
z <- extRemes::fevd(x, method="MLE", type="GP", threshold=threshold, time.units=unit)
excesses <- x > threshold
w2 <- length(x)/npy # Duration of the fit period (6 years)
lambda2 <- sum(excesses)/w2
probs <- 1 - 1/(lambda2*RPs)
probs[probs<0] <- NA
### probs     0.9375000         0.9750000         0.9875000         0.9937500         0.9975000 
###probs <- c(0.93803727875591, 0.97521491150236, 0.98760745575118, 0.99380372787559, 0.99752149115024)
scale <- z$results$par[1]
shape <- z$results$par[2]
vel1 <- extRemes::qevd(probs, loc=z$threshold, scale=scale, shape=shape, threshold=z$threshold, type="GP")
out <- extRemes::return.level(z, return.period=RPs)
n <- names(out)
out <- as.numeric(out)
names(out) <- n
out <- list(RL=out, PAR=c(thr=z$threshold, scale=scale, shape=shape, probs=probs, lambda2=lambda2))
out
}
bb_extRemes(potvalues, truncate=0.4956645, npy=16)
alexys_exRtemes(potvalues, threshold=50, npy=16)

最新更新