我正在尝试Hadley Wickham在本视频中描述的整洁方法:https://www.youtube.com/watch?v=rz3_FDVt9eg&t=1902s。通过这种方式获得某些统计数据很简单,只要整理后的数据帧中只有1行,但每个线性模型的斜率都隐藏在用Broom制作的整理后的第二行数据帧中。我的代码与哈德利的代码非常相似,看起来是这样的。
library(tidyverse)
corn_by_county <- corn_final_long %>% group_by(County) %>% nest()
# define & run linear models for each county
corn_county <- function(df){
lm(Yield ~ Year, data = df)}
corn_models <- corn_by_county %>% mutate(model = map(data, corn_county))
corn_output <- corn_models %>% mutate(tidy = map(model, broom::tidy),
glance = map(model, broom::glance),
augment = map(model, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl('estimate')) ## slope not working
";斜率";在嵌套的"整齐"数据帧的第二行;整洁的";";corn_output";。我试过这个代码
slope = tidy %>% filter(term == 'Year') %>% map_dbl('estimate')
然而,这是行不通的。如何提取坡度?非常感谢。
这是我的数据样本。
corn_final_long <- structure(list(Year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L), County = c("Aurora", "Beadle", "Bennett",
"Bon Homme", "Brookings", "Brown", "Brule", "Buffalo", "Butte",
"Campbell", "Charles Mix", "Clark", "Clay", "Codington", "Corson",
"Custer", "Davison", "Day", "Deuel", "Dewey", "Douglas", "Edmunds",
"Fall River", "Faulk", "Grant", "Gregory", "Haakon", "Hamlin",
"Hand", "Hanson", "Harding", "Hughes", "Hutchinson", "Hyde",
"Jackson", "Jerauld", "Jones", "Kingsbury", "Lake", "Lawrence",
"Lincoln", "Lyman", "Marshall", "Mccook", "Mcpherson", "Meade",
"Mellette", "Miner", "Minnehaha", "Moody", "Oglala Lakota", "Pennington",
"Perkins", "Potter", "Roberts", "Sanborn", "Spink", "Stanley",
"Sully", "Todd", "Tripp", "Turner", "Union", "Walworth", "Yankton",
"Ziebach", "Aurora", "Beadle", "Bennett", "Bon Homme", "Brookings",
"Brown", "Brule", "Buffalo", "Butte", "Campbell", "Charles Mix",
"Clark", "Clay", "Codington", "Corson", "Custer", "Davison",
"Day", "Deuel", "Dewey", "Douglas", "Edmunds", "Fall River",
"Faulk", "Grant", "Gregory", "Haakon", "Hamlin", "Hand", "Hanson",
"Harding", "Hughes", "Hutchinson", "Hyde"), Yield = c(47.3, 58.9,
103.8, 71.4, 71.7, 65.3, 53.9, 72.8, 84.8, 61, 59, 63.4, 92.4,
75.2, 41, 94.4, 62.7, 63.6, 74, 47.7, 57.7, 51.5, 102.1, 57.6,
72.4, 58, 39.1, 68.2, 68.6, 66, 73.3, 85, 78.8, 52.7, 45, 40.9,
76.7, 63.6, 80.6, 85, 96.3, 87, 65.8, 74.2, 55.9, 78.8, 47.8,
66.2, 92.6, 93.1, 60, 62.9, 53.5, 60.2, 70.5, 64.8, 68.9, 60,
59, 94.8, 42.2, 89.5, 105.1, 68.4, 78.9, 45, 25.4, 35.8, 43.5,
27.3, 63.2, 46, 32.3, NA, 83.3, 80.8, 34.2, 53.8, 68.1, 66.2,
16, 100, 26.3, 44.5, 70.6, 16.7, 27.2, 29.2, 93.7, 33.5, 64.4,
30.9, 30, 60.1, 30.7, 34.5, NA, 41.1, 38.9, 28.2)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
您只需选择"估计";broom
输出的列:
corn_output <- corn_models %>%
mutate(tidy = map(model, broom::tidy),
glance = map(model, broom::glance),
augment = map(model, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl(function(x) x$estimate[2]))
仅供参考的是,使用filter
和pull
来获得斜率的方法:
library(tidyverse)
library(broom)
corn_by_county <- corn_final_long %>% group_by(County) %>% nest()
# define & run linear models for each county
corn_county <- function(df){
lm(Yield ~ Year, data = df)}
corn_models <- corn_by_county %>% mutate(model = map(data, corn_county))
corn_output <- corn_models %>% mutate(tidy = map(model, broom::tidy),
glance = map(model, broom::glance),
augment = map(model, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl(~ filter(.x, term == "Year") %>% pull(estimate))) ## slope not working
head(corn_output)
#> # A tibble: 6 x 8
#> # Groups: County [6]
#> County data model tidy glance augment rsq slope
#> <chr> <list> <lis> <list> <list> <list> <dbl> <dbl>
#> 1 Aurora <tibble [2 ~ <lm> <tibble [2~ <tibble [1 ~ <tibble [2 ~ 1 -21.9
#> 2 Beadle <tibble [2 ~ <lm> <tibble [2~ <tibble [1 ~ <tibble [2 ~ 1 -23.1
#> 3 Bennett <tibble [2 ~ <lm> <tibble [2~ <tibble [1 ~ <tibble [2 ~ 1 -60.3
#> 4 Bon Hom~ <tibble [2 ~ <lm> <tibble [2~ <tibble [1 ~ <tibble [2 ~ 1 -44.1
#> 5 Brookin~ <tibble [2 ~ <lm> <tibble [2~ <tibble [1 ~ <tibble [2 ~ 1 -8.50
#> 6 Brown <tibble [2 ~ <lm> <tibble [2~ <tibble [1 ~ <tibble [2 ~ 1 -19.3