我在一个图中有三个回归,我试图显示每个回归的方程。我一直在努力解决这个问题,试图做到这一点。然而,过滤似乎没有任何作用,它显示了三次相同的方程。
最终目标是比较cpue与veg的关系,同时控制位置(块(,并获得三条回归线中每条回归线的斜率/r^2值。
数据
cpue<- structure(list(lake = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), veg = c(254.8026498, 219.9422136, 450.9662078, 484.8605026,
407.1662151, 286.7015617, 351.6441798, 179.9959443, 340.4276843,
247.2907435, 502.4119071, 336.4259995, 349.1543197, 281.7493811,
201.8284859, 325.6380404, 288.3855723, 230.8755861, 214.8890894,
326.6376698, 214.7468224, 132.0511504, 335.2727641, 336.8727253,
143.8923225, 277.3053436, 302.7005649, 355.0332852, 307.5736711,
371.8407176, 168.7645221, 365.9156811, 349.205548, 273.8392697,
171.4513348, 197.1067049, 350.5833827, 202.9605797, 365.3415045,
413.2762633, 329.8539209, 377.1415341, 180.8524994, 217.4007852,
258.5909286, 146.7092479, 258.7440138, 393.2014549, 492.6719497,
208.5002392, 219.1466664, 182.1366352, 308.0534171, 317.6037795,
131.7534807, 324.0011761, 469.5861988, 237.4492916, 318.6897863,
47.94967582, 223.5382632, 386.2227607, 343.7657123, 493.6393726,
204.2960349, 294.4218332, 178.7555635, 454.0358039, 207.1363947,
364.6063223, 462.8508521, 292.8613255, 330.3893897, 209.1769838,
237.4264742, 427.8856667), cpue = c(32.63512612, 47.98168449,
33.26735173, 14.41435377, 30.94664495, 40.26817963, 41.26204388,
31.63227286, 36.97932408, 21.54620143, 34.27556883, 6.506644061,
32.24677471, 38.24536746, 30.95968644, 24.86408391, 31.15438304,
21.69779047, 39.86223079, 27.92263229, 23.55684281, 34.6157024,
42.06943746, 24.70597527, 28.36396188, 50.34591832, 55.06361184,
48.69468021, 26.00084784, 44.77320597, 14.56328001, 33.29291085,
21.55078237, 29.95980975, 40.61006429, 43.46931237, 26.26407484,
15.87009067, 39.47297313, 20.50811378, 35.66157343, 35.64563497,
44.47319537, 42.06574907, 40.16356125, 35.57462201, 32.10051291,
34.1254268, 34.21084448, 28.18410732, 32.11249307, 38.39890418,
31.24778375, 29.76951583, 41.52508487, 34.48914051, 28.30923803,
29.33886042, 37.57268795, 59.29849175, 28.9317113, 41.27342427,
38.44878019, 44.53768204, 44.48611219, 33.15553274, 34.48894561,
34.86722967, 31.92515626, 50.04825584, 53.67528105, 37.53150868,
33.16255301, 33.22374846, 28.28172263, 42.5795616), block = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1",
"2", "3"), class = "factor")), row.names = c(NA, -76L), class = "data.frame")
代码
# Make lm() with blocking variable----------
lm_eqn2 <- function(df2){
m2 <- lmer(cpue ~ veg + (1|block), cpue);
eq2 <- substitute(italic(CPUE) == a + b*","~~italic(r)^2~"="~r2, # Write CPUE = a+b, r^2 = x
list(a = format(unname(coef(m2)[1]), digits = 4), # define 'a'
b = format(unname(coef(m2)[2]), digits = 2), # define 'b'
r2 = format(summary(m)$r.squared, digits = 3))) # define 'r2'
as.character(as.expression(eq)); # declare expression as a character
}
ggplot(cpue, aes(x=veg, y=cpue, col=block))+
geom_point()+
geom_smooth(method="lm", show.legend=F, se=F)+
annotate("text", x=100, y=20, label= lm_eqn2(cpue %>% filter(block==1)), parse=T)+
annotate("text", x=200, y=30, label= lm_eqn2(cpue %>% filter(block==2)), parse=T)+
annotate("text", x=300, y=40, label= lm_eqn2(cpue %>% filter(block==3)), parse=T)
当我试图用以下代码查看每一行的方程式时:
lm_eqn2(cpue %>% filter(block==2))
它为我过滤它的每个阻塞数返回相同的方程。这让我认为我制作模型和方程的代码有问题吗?与关联问题唯一不同的是,我的模型有一个阻塞变量。但不确定这是否真的会影响到任何事情。
如有任何帮助,我们将不胜感激。
这里有一些问题。首先,对数据帧和其中的向量使用相同的名称不是一种好的做法。这使得像lmer(cpue ~ veg + (1|block), cpue);
和ggplot(cpue, aes(x=veg, y=cpue, col=block))+
这样的行让许多人感到困惑。
但是,在函数中的数据帧中使用cpue
也意味着函数不在乎稍后传递给它的内容。使得m2 <- lmer(cpue ~ veg + (1|block), cpue);
每次都是相同的——因此产生了相同的方程。由于函数中不存在df2
,因此忽略cpue %>% filter(block==2)
作为参数。所以你需要这样的东西:
lm_eqn2 <- function(df2){
m2 <- lmer(cpue ~ veg + (1|block), df2); ## note the change to df2 here
eq2 <- substitute(italic(CPUE) == a + b*","~~italic(r)^2~"="~r2,
list(a = format(unname(coef(m2)[1]), digits = 4),
b = format(unname(coef(m2)[2]), digits = 2),
r2 = format(summary(m2)$r.squared, digits = 3)))
as.character(as.expression(eq2));
}
**还要注意,(在您的原始代码中(找不到m
和eq
,所以我将它们分别更改为m2
和eq2
。
这给出了错误:
Error: grouping factors must have > 1 sampled level
这是有道理的,因为您在模型代码中拟合了block
作为随机截距,但您正在通过阻塞因子过滤数据。所以只有一个";类型";线cpue %>% filter(block==1)
、cpue %>% filter(block==2)
和cpue %>% filter(block==3)
中的每一个中的阻塞因子的值。这意味着在使用(1|block)
时不会向回归中添加任何信息,因为block
现在是一个常数。
你可能想解释一下你希望如何处理这个阻塞因素。一些相关帖子:https://stats.stackexchange.com/q/4700/238878和https://stats.stackexchange.com/q/31569/238878