如何从用扫帚软件包R制作的整洁数据帧中提取斜率系数



我正在尝试Hadley Wickham在本视频中描述的整洁方法:https://www.youtube.com/watch?v=rz3_FDVt9eg&t=1902s。通过这种方式获得某些统计数据很简单,只要整理后的数据帧中只有1行,但每个线性模型的斜率都隐藏在用Broom制作的整理后的第二行数据帧中。我的代码与哈德利的代码非常相似,看起来是这样的。

library(tidyverse)
corn_by_county <- corn_final_long %>% group_by(County) %>% nest()

# define & run linear models for each county
corn_county <- function(df){
lm(Yield ~ Year, data = df)}

corn_models <- corn_by_county %>% mutate(model = map(data, corn_county))
corn_output <- corn_models %>% mutate(tidy = map(model, broom::tidy),
glance = map(model, broom::glance),
augment = map(model, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl('estimate')) ## slope not working

";斜率";在嵌套的"整齐"数据帧的第二行;整洁的";";corn_output";。我试过这个代码

slope = tidy %>% filter(term == 'Year') %>% map_dbl('estimate')

然而,这是行不通的。如何提取坡度?非常感谢。

这是我的数据样本。

corn_final_long <- structure(list(Year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L), County = c("Aurora", "Beadle", "Bennett", 
"Bon Homme", "Brookings", "Brown", "Brule", "Buffalo", "Butte", 
"Campbell", "Charles Mix", "Clark", "Clay", "Codington", "Corson", 
"Custer", "Davison", "Day", "Deuel", "Dewey", "Douglas", "Edmunds", 
"Fall River", "Faulk", "Grant", "Gregory", "Haakon", "Hamlin", 
"Hand", "Hanson", "Harding", "Hughes", "Hutchinson", "Hyde", 
"Jackson", "Jerauld", "Jones", "Kingsbury", "Lake", "Lawrence", 
"Lincoln", "Lyman", "Marshall", "Mccook", "Mcpherson", "Meade", 
"Mellette", "Miner", "Minnehaha", "Moody", "Oglala Lakota", "Pennington", 
"Perkins", "Potter", "Roberts", "Sanborn", "Spink", "Stanley", 
"Sully", "Todd", "Tripp", "Turner", "Union", "Walworth", "Yankton", 
"Ziebach", "Aurora", "Beadle", "Bennett", "Bon Homme", "Brookings", 
"Brown", "Brule", "Buffalo", "Butte", "Campbell", "Charles Mix", 
"Clark", "Clay", "Codington", "Corson", "Custer", "Davison", 
"Day", "Deuel", "Dewey", "Douglas", "Edmunds", "Fall River", 
"Faulk", "Grant", "Gregory", "Haakon", "Hamlin", "Hand", "Hanson", 
"Harding", "Hughes", "Hutchinson", "Hyde"), Yield = c(47.3, 58.9, 
103.8, 71.4, 71.7, 65.3, 53.9, 72.8, 84.8, 61, 59, 63.4, 92.4, 
75.2, 41, 94.4, 62.7, 63.6, 74, 47.7, 57.7, 51.5, 102.1, 57.6, 
72.4, 58, 39.1, 68.2, 68.6, 66, 73.3, 85, 78.8, 52.7, 45, 40.9, 
76.7, 63.6, 80.6, 85, 96.3, 87, 65.8, 74.2, 55.9, 78.8, 47.8, 
66.2, 92.6, 93.1, 60, 62.9, 53.5, 60.2, 70.5, 64.8, 68.9, 60, 
59, 94.8, 42.2, 89.5, 105.1, 68.4, 78.9, 45, 25.4, 35.8, 43.5, 
27.3, 63.2, 46, 32.3, NA, 83.3, 80.8, 34.2, 53.8, 68.1, 66.2, 
16, 100, 26.3, 44.5, 70.6, 16.7, 27.2, 29.2, 93.7, 33.5, 64.4, 
30.9, 30, 60.1, 30.7, 34.5, NA, 41.1, 38.9, 28.2)), row.names = c(NA, 
-100L), class = c("tbl_df", "tbl", "data.frame"))

您只需选择"估计";broom输出的列:

corn_output <- corn_models %>% 
mutate(tidy = map(model, broom::tidy),
glance = map(model, broom::glance),
augment = map(model, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl(function(x) x$estimate[2]))

仅供参考的是,使用filterpull来获得斜率的方法:

library(tidyverse)
library(broom)
corn_by_county <- corn_final_long %>% group_by(County) %>% nest()
# define & run linear models for each county
corn_county <- function(df){
lm(Yield ~ Year, data = df)}
corn_models <- corn_by_county %>% mutate(model = map(data, corn_county))
corn_output <- corn_models %>% mutate(tidy = map(model, broom::tidy),
glance = map(model, broom::glance),
augment = map(model, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl(~ filter(.x, term == "Year") %>% pull(estimate))) ## slope not working
head(corn_output)
#> # A tibble: 6 x 8
#> # Groups:   County [6]
#>   County   data         model tidy        glance       augment        rsq  slope
#>   <chr>    <list>       <lis> <list>      <list>       <list>       <dbl>  <dbl>
#> 1 Aurora   <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -21.9 
#> 2 Beadle   <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -23.1 
#> 3 Bennett  <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -60.3 
#> 4 Bon Hom~ <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -44.1 
#> 5 Brookin~ <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1  -8.50
#> 6 Brown    <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -19.3

最新更新