r-计算每个先前组号中的平均值

  • 本文关键字:平均值 计算 r dplyr
  • 更新时间 :
  • 英文 :


我想计算前一组中的sog平均值。

library(dplyr)
df <- tibble(grp = c(1, 1, 1, 1, 1, 1, 2, 3, 3, 3,
4, 4, 4, 4, 5, 5, 5, 6, 6, 6,
6, 7, 7, 7, 8),

sog = c(12.00, 12.00, 12.00, 11.00, 10.00,
9.00,  2.00,  8.80,  8.70,  7.80,
11.00 ,11.00, 12.00, 11.00, 9.54,
9.80, 10.40,  4.00,  4.00,  4.50,
3.60,  7.00, 8.00,  9.00,  0.00),

res = c(NA,NA,NA ,NA, NA,NA, NA ,
0,0,0,0,0,0,0 ,0,0,0,
66.66667, 66.66667,66.66667, 66.66667,
275.0, 275.0, 275.0, 0)
)

我想要一些类似的东西:

df %>%
group_by(grp) %>%
mutate(sog_avg = dplyr::lag(mean(sog), n = 'by every previous group', default = NA))

现在的数据是:

grp   sog   res
1     12     NA  
1     12     NA  
1     12     NA  
1     11     NA  
1     10     NA  
1      9     NA  
2      2     NA  
3     8.8    0  
3     8.7    0  
3     7.8    0  
4     11      0  
4     11      0  
4     12      0  
4     11      0  
5     9.54   0  
5     9.8    0  
5     10.4    0  
6     4     66.7
6     4     66.7
6     4.5   66.7
6     3.6   66.7
7     7    275  
7     8    275  
7     9    275  
8     0      0  

我想要的结果是:

grp   sog   res  sog_avg
1     12     NA  NA
1     12     NA  NA
1     12     NA  NA
1     11     NA  NA
1     10     NA  NA
1      9     NA  NA
2      2     NA  11
3     8.8    0    2
3     8.7    0    2
3     7.8    0    2
4     11     0   8.43
4     11     0   8.43 
4     12     0   8.43
4     11     0   8.43
5     9.54   0   11.25
5     9.8    0   11.25
5     10.4   0   11.25
6     4     66.7 9.91
6     4     66.7 9.91
6     4.5   66.7 9.91 
6     3.6   66.7 9.91
7     7    275   4.025 
7     8    275   4.025
7     9    275   4.025
8     0      0   8

我们可以计算每个组的平均值,使用lag将其向后移动一步,然后加入数据:

library(dplyr)
df %>%
group_by(grp) %>%
summarise(sog_avg = mean(sog, na.rm = TRUE)) %>%
mutate(sog_avg = lag(sog_avg)) %>%
left_join(df, by = 'grp')
#   grp sog_avg   sog   res
#1    1      NA 12.00    NA
#2    1      NA 12.00    NA
#3    1      NA 12.00    NA
#4    1      NA 11.00    NA
#5    1      NA 10.00    NA
#6    1      NA  9.00    NA
#7    2   11.00  2.00    NA
#8    3    2.00  8.80   0.0
#9    3    2.00  8.70   0.0
#10   3    2.00  7.80   0.0
#11   4    8.43 11.00   0.0
#12   4    8.43 11.00   0.0
#13   4    8.43 12.00   0.0
#14   4    8.43 11.00   0.0
#15   5   11.25  9.54   0.0
#16   5   11.25  9.80   0.0
#17   5   11.25 10.40   0.0
#18   6    9.91  4.00  66.7
#19   6    9.91  4.00  66.7
#20   6    9.91  4.50  66.7
#21   6    9.91  3.60  66.7
#22   7    4.03  7.00 275.0
#23   7    4.03  8.00 275.0
#24   7    4.03  9.00 275.0
#25   8    8.00  0.00   0.0

添加purrr的一个选项可以是:

df %>%
mutate(sog_avg = map_dbl(.x = grp - 1, ~ mean(sog[grp == .x]))) 
grp   sog   res sog_avg
<dbl> <dbl> <dbl>   <dbl>
1     1  12      NA     NaN
2     1  12      NA     NaN
3     1  12      NA     NaN
4     1  11      NA     NaN
5     1  10      NA     NaN
6     1   9      NA     NaN
7     2   2      NA      11
8     3   8.8     0       2
9     3   8.7     0       2
10     3   7.8     0       2

或者只使用dplyr:

df %>%
group_by(grp) %>%
mutate(sog_avg = mean(sog)) %>%
ungroup() %>%
mutate(sog_avg = lag(sog_avg)) %>%
group_by(grp) %>%
mutate(sog_avg = first(sog_avg)) 
grp   sog   res sog_avg
<dbl> <dbl> <dbl>   <dbl>
1     1  12      NA      NA
2     1  12      NA      NA
3     1  12      NA      NA
4     1  11      NA      NA
5     1  10      NA      NA
6     1   9      NA      NA
7     2   2      NA      11
8     3   8.8     0       2
9     3   8.7     0       2
10     3   7.8     0       2

最新更新