r-如何从plm-FE回归中获得介于和总体R2之间的值



有没有办法让plm()为我计算R2和整个R2之间的值,并将它们包含在summary()输出中?

为了澄清我对R2之间、总体上和内部的理解,请参阅StackExchange上的答案。

我的理解是plm只在R2范围内计算。我正在运行模型内的Twoways效果。

一个随机的例子(改编自这里(:

library(plm)
# Create some random data
set.seed(1) 
x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
y=x+fe+e
data=data.frame(y,x,id,ti)
# Get plm within R2
reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
summary(reg)$r.squared

我现在还想得到整体和R2:之间

# Pooled Version (overall R2)
reg1=lm(y~x)
summary(reg1)$r.squared
# Between R2
y.means=tapply(y,id,mean)[id]
x.means=tapply(x,id,mean)[id]
reg2=lm(y.means~x.means)
summary(reg3)$r.squared

{plm}似乎无法报告整体或引号内的R平方值。你可以通过创建一个自定义的summaryprint方法来破解它:

summary.plm.full <- function (object, vcov = NULL, ...) 
{
vcov_arg <- vcov
#add plm::: for plm functions so they are calllex correctly
model <- plm:::describe(object, "model")
effect <- plm:::describe(object, "effect")
random.method <- plm:::describe(object, "random.method")
object$r.squared <- c(rsq = r.squared(object), 
adjrsq = r.squared(object, dfcor = TRUE),
# add the two new r squared terms here
rsq_overall = r.squared(object, model = "pooled"),
rsq_btw = r.squared(update(object, effect = "individual", model = "between")))
use.norm.chisq <- FALSE
if (model == "random") 
use.norm.chisq <- TRUE
if (length(formula(object))[2] >= 2) 
use.norm.chisq <- TRUE
if (model == "ht") 
use.norm.chisq <- TRUE
object$fstatistic <- pwaldtest(object, test = ifelse(use.norm.chisq, 
"Chisq", "F"), vcov = vcov_arg)
if (!is.null(vcov_arg)) {
if (is.matrix(vcov_arg)) 
rvcov <- vcov_arg
if (is.function(vcov_arg)) 
rvcov <- vcov_arg(object)
std.err <- sqrt(diag(rvcov))
}
else {
std.err <- sqrt(diag(stats::vcov(object)))
}
b <- coefficients(object)
z <- b/std.err
p <- if (use.norm.chisq) {
2 * pnorm(abs(z), lower.tail = FALSE)
}
else {
2 * pt(abs(z), df = object$df.residual, lower.tail = FALSE)
}
object$coefficients <- cbind(b, std.err, z, p)
colnames(object$coefficients) <- if (use.norm.chisq) {
c("Estimate", "Std. Error", "z-value", "Pr(>|z|)")
}
else {
c("Estimate", "Std. Error", "t-value", "Pr(>|t|)")
}
if (!is.null(vcov_arg)) {
object$rvcov <- rvcov
rvcov.name <- paste0(deparse(substitute(vcov)))
attr(object$rvcov, which = "rvcov.name") <- rvcov.name
}
object$df <- c(length(b), object$df.residual, length(object$aliased))
class(object) <- c("summary.plm.full", "plm", "panelmodel")
object
}

用于打印:

print.summary.plm.full <- function (x, digits = max(3, getOption("digits") - 2), width = getOption("width"), 
subset = NULL, ...) 
{
formula <- formula(x)
has.instruments <- (length(formula)[2] >= 2)
effect <- plm:::describe(x, "effect")
model <- plm:::describe(x, "model")
if (model != "pooling") {
cat(paste(plm:::effect.plm.list[effect], " ", sep = ""))
}
cat(paste(plm:::model.plm.list[model], " Model", sep = ""))
if (model == "random") {
ercomp <- describe(x, "random.method")
cat(paste(" n   (", random.method.list[ercomp], "'s transformation)n", 
sep = ""))
}
else {
cat("n")
}
if (has.instruments) {
cat("Instrumental variable estimationn")
if (model != "within") {
ivar <- plm:::describe(x, "inst.method")
cat(paste0("   (", plm:::inst.method.list[ivar], "'s transformation)n"))
}
}
if (!is.null(x$rvcov)) {
cat("nNote: Coefficient variance-covariance matrix supplied: ", 
attr(x$rvcov, which = "rvcov.name"), "n", sep = "")
}
cat("nCall:n")
print(x$call)
cat("n")
pdim <- pdim(x)
print(pdim)
if (model %in% c("fd", "between")) {
cat(paste0("Observations used in estimation: ", nobs(x), 
"n"))
}
if (model == "random") {
cat("nEffects:n")
print(x$ercomp)
}
cat("nResiduals:n")
df <- x$df
rdf <- df[2L]
if (rdf > 5L) {
save.digits <- unlist(options(digits = digits))
on.exit(options(digits = save.digits))
print(plm:::sumres(x))
}
else if (rdf > 0L) 
print(residuals(x), digits = digits)
if (rdf == 0L) {
cat("ALL", x$df[1L], "residuals are 0: no residual degrees of freedom!")
cat("n")
}
if (any(x$aliased, na.rm = TRUE)) {
naliased <- sum(x$aliased, na.rm = TRUE)
cat("nCoefficients: (", naliased, " dropped because of singularities)n", 
sep = "")
}
else cat("nCoefficients:n")
if (is.null(subset)) 
printCoefmat(coef(x), digits = digits)
else printCoefmat(coef(x)[subset, , drop = FALSE], digits = digits)
cat("n")
cat(paste("Total Sum of Squares:    ", signif(plm:::tss.plm(x), digits), 
"n", sep = ""))
cat(paste("Residual Sum of Squares: ", signif(deviance(x), 
digits), "n", sep = ""))
cat(paste("R-Squared:      ", signif(x$r.squared[1], digits), 
"n", sep = ""))
cat(paste("Adj. R-Squared: ", signif(x$r.squared[2], digits), 
"n", sep = ""))
# add the new r squared terms here
cat(paste("Overall R-Squared:      ", signif(x$r.squared[3], digits), 
"n", sep = ""))
cat(paste("Between R-Squared:      ", signif(x$r.squared[4], digits), 
"n", sep = ""))
fstat <- x$fstatistic
if (names(fstat$statistic) == "F") {
cat(paste("F-statistic: ", signif(fstat$statistic), " on ", 
fstat$parameter["df1"], " and ", fstat$parameter["df2"], 
" DF, p-value: ", format.pval(fstat$p.value, digits = digits), 
"n", sep = ""))
}
else {
cat(paste("Chisq: ", signif(fstat$statistic), " on ", 
fstat$parameter, " DF, p-value: ", format.pval(fstat$p.value, 
digits = digits), "n", sep = ""))
}
invisible(x)
}

现在,如果我们使用自定义功能:

library(plm)
# Create some random data
set.seed(1) 
x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
y=x+fe+e
data=data.frame(y,x,id,ti)
# Get plm within R2
reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
summary.plm.full(reg)

打印:

Twoways effects Within Model
Call:
plm(formula = y ~ x, data = data, effect = "twoways", model = "within", 
index = c("id", "ti"))
Balanced Panel: n = 10, T = 10, N = 100
Residuals:
Min.  1st Qu.   Median  3rd Qu.     Max. 
-2.36060 -0.56664 -0.11085  0.56070  2.00869 
Coefficients:
Estimate Std. Error t-value  Pr(>|t|)    
x  1.12765    0.11306  9.9741 1.086e-15 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Total Sum of Squares:    157.21
Residual Sum of Squares: 70.071
R-Squared:      0.55428
Adj. R-Squared: 0.44842
Overall R-Squared:      0.33672
Between R-Squared:      0.17445
F-statistic: 99.4829 on 1 and 80 DF, p-value: 1.0856e-15

"内部"估计器等效于最小二乘伪变量估计器,可以通过OLS进行估计。这报告了一个总体的r平方(……与上面paqmo函数给出的不一样——也许他们可以澄清?(

lsdv<-lm(y~-1+x+as.factor(id)+as.factor(ti),data=data)
summary(lsdv)

请注意,x的估计系数是相同的。

最新更新