data(mtcars)
head(mtcars)
Y = c("mpg")
X = c("cyl", "disp", "hp")
for(V in 1:length(X)){
MODEL=lm(Y~X[V],data=mtcars)
mtcars$paste(X[V], "residual")=MODEL$resid
}
我有数据mtcars,希望估计许多回归模型并将残差作为数据mtcars中的新变量存储。下面我提供例子。基本上,我希望在所有三个变量预测因子上分别回归"mpg"变量,从而为mtcars生成三个新变量,这是三个循环回归的模型残差,但我无法成功做到这一点。
按照您的方法,
for(V in X){
mtcars[,paste0(V,"_residual")] <- lm(paste("mpg", "~", V),mtcars)$resid
}
或与sapply
,
sapply(X, function(a) {
mtcars[,paste0(a,"_residual")] <<- lm(paste("mpg", "~", a),mtcars)$resid
})
我提出一个稍微不同的方法
library(tidyverse)
data(mtcars)
flm = function(data) lm(mpg~value, data)
fres = function(model) model$resid
mtcars %>% as_tibble() %>%
pivot_longer(cyl:carb) %>% #1
group_by(name) %>%
nest() %>% #2
mutate(model = map(data, ~flm(.x))) %>% #3
mutate(residuals = map(model, ~fres(.x))) %>% #4
unnest(residuals)
输出# A tibble: 320 x 4
# Groups: name [10]
name data model residuals
<chr> <list> <list> <dbl>
1 cyl <tibble [32 x 2]> <lm> 0.370
2 cyl <tibble [32 x 2]> <lm> 0.370
3 cyl <tibble [32 x 2]> <lm> -3.58
4 cyl <tibble [32 x 2]> <lm> 0.770
5 cyl <tibble [32 x 2]> <lm> 3.82
6 cyl <tibble [32 x 2]> <lm> -2.53
7 cyl <tibble [32 x 2]> <lm> -0.578
8 cyl <tibble [32 x 2]> <lm> -1.98
9 cyl <tibble [32 x 2]> <lm> -3.58
10 cyl <tibble [32 x 2]> <lm> -1.43
# ... with 310 more rows
由于这可能有点令人困惑,我将一步一步地向您展示这里发生了什么(参见注释号)。我们来看看第一步
之后是什么# A tibble: 320 x 3
mpg name value
<dbl> <chr> <dbl>
1 21 cyl 6
2 21 disp 160
3 21 hp 110
4 21 drat 3.9
5 21 wt 2.62
6 21 qsec 16.5
7 21 vs 0
8 21 am 1
9 21 gear 4
10 21 carb 4
# ... with 310 more rows
这很简单。我们只是把mtcars
变长了。
在第二步之后,我们得到了这样的内容
# A tibble: 10 x 2
# Groups: name [10]
name data
<chr> <list>
1 cyl <tibble [32 x 2]>
2 disp <tibble [32 x 2]>
3 hp <tibble [32 x 2]>
4 drat <tibble [32 x 2]>
5 wt <tibble [32 x 2]>
6 qsec <tibble [32 x 2]>
7 vs <tibble [32 x 2]>
8 am <tibble [32 x 2]>
9 gear <tibble [32 x 2]>
10 carb <tibble [32 x 2]>
可以看到,每个变量都有自己的tibble
,其中有两个变量mpg
和value
。第三步,我们添加lm
模型。
# A tibble: 10 x 3
# Groups: name [10]
name data model
<chr> <list> <list>
1 cyl <tibble [32 x 2]> <lm>
2 disp <tibble [32 x 2]> <lm>
3 hp <tibble [32 x 2]> <lm>
4 drat <tibble [32 x 2]> <lm>
5 wt <tibble [32 x 2]> <lm>
6 qsec <tibble [32 x 2]> <lm>
7 vs <tibble [32 x 2]> <lm>
8 am <tibble [32 x 2]> <lm>
9 gear <tibble [32 x 2]> <lm>
10 carb <tibble [32 x 2]> <lm>
另一方面,在这些模型的步骤四中,我们添加残数。
# A tibble: 10 x 4
# Groups: name [10]
name data model residuals
<chr> <list> <list> <list>
1 cyl <tibble [32 x 2]> <lm> <dbl [32]>
2 disp <tibble [32 x 2]> <lm> <dbl [32]>
3 hp <tibble [32 x 2]> <lm> <dbl [32]>
4 drat <tibble [32 x 2]> <lm> <dbl [32]>
5 wt <tibble [32 x 2]> <lm> <dbl [32]>
6 qsec <tibble [32 x 2]> <lm> <dbl [32]>
7 vs <tibble [32 x 2]> <lm> <dbl [32]>
8 am <tibble [32 x 2]> <lm> <dbl [32]>
9 gear <tibble [32 x 2]> <lm> <dbl [32]>
10 carb <tibble [32 x 2]> <lm> <dbl [32]>
这个解决方案非常快速和透明,并允许您保存所有可以用于进一步计算的模型。您可以根据需要自由调整解决方案,并对选定的变量执行计算。
这是sapply
的另一个选项。
e <- sapply(X, function(V){
fmla <- as.formula(paste(Y, V, sep = "~"))
model <- lm(fmla, data = mtcars)
resid(model)
})
colnames(e) <- paste(colnames(e), "residual", sep = "_")
mtcars <- cbind(mtcars, e)
head(mtcars)
# mpg cyl disp hp drat wt qsec vs am gear carb cyl_residual disp_residual hp_residual
#Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 0.3701643 -2.005436 -1.5937500
#Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 0.3701643 -2.005436 -1.5937500
#Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 -3.5814159 -2.348622 -0.9536307
#Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 0.7701643 2.433646 -1.1937500
#Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 3.8217446 3.937588 0.5410881
#Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 -2.5298357 -2.226453 -4.8348913
library(dplyr, warn.conflicts = FALSE)
Y = c("mpg")
X = c("cyl", "disp", "hp")
out <-
mtcars %>%
mutate(across(
all_of(X),
list(resid = ~ lm(reformulate(cur_column(), Y), data = mtcars)$resid)
))
head(out)
#> mpg cyl disp hp drat wt qsec vs am gear carb cyl_resid
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 0.3701643
#> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 0.3701643
#> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 -3.5814159
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 0.7701643
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 3.8217446
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 -2.5298357
#> disp_resid hp_resid
#> Mazda RX4 -2.005436 -1.5937500
#> Mazda RX4 Wag -2.005436 -1.5937500
#> Datsun 710 -2.348622 -0.9536307
#> Hornet 4 Drive 2.433646 -1.1937500
#> Hornet Sportabout 3.937588 0.5410881
#> Valiant -2.226453 -4.8348913
由reprex包(v2.0.1)在20121-10-09创建