r-显示多元回归方程



我在一个图中有三个回归,我试图显示每个回归的方程。我一直在努力解决这个问题,试图做到这一点。然而,过滤似乎没有任何作用,它显示了三次相同的方程。

最终目标是比较cpue与veg的关系,同时控制位置(块(,并获得三条回归线中每条回归线的斜率/r^2值。

数据

cpue<- structure(list(lake = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L), veg = c(254.8026498, 219.9422136, 450.9662078, 484.8605026, 
407.1662151, 286.7015617, 351.6441798, 179.9959443, 340.4276843, 
247.2907435, 502.4119071, 336.4259995, 349.1543197, 281.7493811, 
201.8284859, 325.6380404, 288.3855723, 230.8755861, 214.8890894, 
326.6376698, 214.7468224, 132.0511504, 335.2727641, 336.8727253, 
143.8923225, 277.3053436, 302.7005649, 355.0332852, 307.5736711, 
371.8407176, 168.7645221, 365.9156811, 349.205548, 273.8392697, 
171.4513348, 197.1067049, 350.5833827, 202.9605797, 365.3415045, 
413.2762633, 329.8539209, 377.1415341, 180.8524994, 217.4007852, 
258.5909286, 146.7092479, 258.7440138, 393.2014549, 492.6719497, 
208.5002392, 219.1466664, 182.1366352, 308.0534171, 317.6037795, 
131.7534807, 324.0011761, 469.5861988, 237.4492916, 318.6897863, 
47.94967582, 223.5382632, 386.2227607, 343.7657123, 493.6393726, 
204.2960349, 294.4218332, 178.7555635, 454.0358039, 207.1363947, 
364.6063223, 462.8508521, 292.8613255, 330.3893897, 209.1769838, 
237.4264742, 427.8856667), cpue = c(32.63512612, 47.98168449, 
33.26735173, 14.41435377, 30.94664495, 40.26817963, 41.26204388, 
31.63227286, 36.97932408, 21.54620143, 34.27556883, 6.506644061, 
32.24677471, 38.24536746, 30.95968644, 24.86408391, 31.15438304, 
21.69779047, 39.86223079, 27.92263229, 23.55684281, 34.6157024, 
42.06943746, 24.70597527, 28.36396188, 50.34591832, 55.06361184, 
48.69468021, 26.00084784, 44.77320597, 14.56328001, 33.29291085, 
21.55078237, 29.95980975, 40.61006429, 43.46931237, 26.26407484, 
15.87009067, 39.47297313, 20.50811378, 35.66157343, 35.64563497, 
44.47319537, 42.06574907, 40.16356125, 35.57462201, 32.10051291, 
34.1254268, 34.21084448, 28.18410732, 32.11249307, 38.39890418, 
31.24778375, 29.76951583, 41.52508487, 34.48914051, 28.30923803, 
29.33886042, 37.57268795, 59.29849175, 28.9317113, 41.27342427, 
38.44878019, 44.53768204, 44.48611219, 33.15553274, 34.48894561, 
34.86722967, 31.92515626, 50.04825584, 53.67528105, 37.53150868, 
33.16255301, 33.22374846, 28.28172263, 42.5795616), block = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1", 
"2", "3"), class = "factor")), row.names = c(NA, -76L), class = "data.frame")

代码

# Make lm() with blocking variable----------
lm_eqn2 <- function(df2){
m2 <- lmer(cpue ~ veg + (1|block), cpue);
eq2 <- substitute(italic(CPUE) == a + b*","~~italic(r)^2~"="~r2, # Write CPUE = a+b, r^2 = x
list(a = format(unname(coef(m2)[1]), digits = 4), # define 'a'
b = format(unname(coef(m2)[2]), digits = 2), # define 'b'
r2 = format(summary(m)$r.squared, digits = 3))) # define 'r2'
as.character(as.expression(eq)); # declare expression as a character
}

ggplot(cpue, aes(x=veg, y=cpue, col=block))+
geom_point()+ 
geom_smooth(method="lm", show.legend=F, se=F)+
annotate("text", x=100, y=20, label= lm_eqn2(cpue %>% filter(block==1)), parse=T)+
annotate("text", x=200, y=30, label= lm_eqn2(cpue %>% filter(block==2)), parse=T)+
annotate("text", x=300, y=40, label= lm_eqn2(cpue %>% filter(block==3)), parse=T)

当我试图用以下代码查看每一行的方程式时:

lm_eqn2(cpue %>% filter(block==2))

它为我过滤它的每个阻塞数返回相同的方程。这让我认为我制作模型和方程的代码有问题吗?与关联问题唯一不同的是,我的模型有一个阻塞变量。但不确定这是否真的会影响到任何事情。

如有任何帮助,我们将不胜感激。

这里有一些问题。首先,对数据帧和其中的向量使用相同的名称不是一种好的做法。这使得像lmer(cpue ~ veg + (1|block), cpue);ggplot(cpue, aes(x=veg, y=cpue, col=block))+这样的行让许多人感到困惑。

但是,在函数中的数据帧中使用cpue也意味着函数不在乎稍后传递给它的内容。使得m2 <- lmer(cpue ~ veg + (1|block), cpue);每次都是相同的——因此产生了相同的方程。由于函数中不存在df2,因此忽略cpue %>% filter(block==2)作为参数。所以你需要这样的东西:

lm_eqn2 <- function(df2){
m2 <- lmer(cpue ~ veg + (1|block), df2); ## note the change to df2 here
eq2 <- substitute(italic(CPUE) == a + b*","~~italic(r)^2~"="~r2, 
list(a = format(unname(coef(m2)[1]), digits = 4), 
b = format(unname(coef(m2)[2]), digits = 2), 
r2 = format(summary(m2)$r.squared, digits = 3))) 
as.character(as.expression(eq2)); 
}

**还要注意,(在您的原始代码中(找不到meq,所以我将它们分别更改为m2eq2

这给出了错误:

Error: grouping factors must have > 1 sampled level

这是有道理的,因为您在模型代码中拟合了block作为随机截距,但您正在通过阻塞因子过滤数据。所以只有一个";类型";线cpue %>% filter(block==1)cpue %>% filter(block==2)cpue %>% filter(block==3)中的每一个中的阻塞因子的值。这意味着在使用(1|block)时不会向回归中添加任何信息,因为block现在是一个常数。

你可能想解释一下你希望如何处理这个阻塞因素。一些相关帖子:https://stats.stackexchange.com/q/4700/238878和https://stats.stackexchange.com/q/31569/238878

最新更新