我想计算前一组中的sog平均值。
library(dplyr)
df <- tibble(grp = c(1, 1, 1, 1, 1, 1, 2, 3, 3, 3,
4, 4, 4, 4, 5, 5, 5, 6, 6, 6,
6, 7, 7, 7, 8),
sog = c(12.00, 12.00, 12.00, 11.00, 10.00,
9.00, 2.00, 8.80, 8.70, 7.80,
11.00 ,11.00, 12.00, 11.00, 9.54,
9.80, 10.40, 4.00, 4.00, 4.50,
3.60, 7.00, 8.00, 9.00, 0.00),
res = c(NA,NA,NA ,NA, NA,NA, NA ,
0,0,0,0,0,0,0 ,0,0,0,
66.66667, 66.66667,66.66667, 66.66667,
275.0, 275.0, 275.0, 0)
)
我想要一些类似的东西:
df %>%
group_by(grp) %>%
mutate(sog_avg = dplyr::lag(mean(sog), n = 'by every previous group', default = NA))
现在的数据是:
grp sog res
1 12 NA
1 12 NA
1 12 NA
1 11 NA
1 10 NA
1 9 NA
2 2 NA
3 8.8 0
3 8.7 0
3 7.8 0
4 11 0
4 11 0
4 12 0
4 11 0
5 9.54 0
5 9.8 0
5 10.4 0
6 4 66.7
6 4 66.7
6 4.5 66.7
6 3.6 66.7
7 7 275
7 8 275
7 9 275
8 0 0
我想要的结果是:
grp sog res sog_avg
1 12 NA NA
1 12 NA NA
1 12 NA NA
1 11 NA NA
1 10 NA NA
1 9 NA NA
2 2 NA 11
3 8.8 0 2
3 8.7 0 2
3 7.8 0 2
4 11 0 8.43
4 11 0 8.43
4 12 0 8.43
4 11 0 8.43
5 9.54 0 11.25
5 9.8 0 11.25
5 10.4 0 11.25
6 4 66.7 9.91
6 4 66.7 9.91
6 4.5 66.7 9.91
6 3.6 66.7 9.91
7 7 275 4.025
7 8 275 4.025
7 9 275 4.025
8 0 0 8
我们可以计算每个组的平均值,使用lag
将其向后移动一步,然后加入数据:
library(dplyr)
df %>%
group_by(grp) %>%
summarise(sog_avg = mean(sog, na.rm = TRUE)) %>%
mutate(sog_avg = lag(sog_avg)) %>%
left_join(df, by = 'grp')
# grp sog_avg sog res
#1 1 NA 12.00 NA
#2 1 NA 12.00 NA
#3 1 NA 12.00 NA
#4 1 NA 11.00 NA
#5 1 NA 10.00 NA
#6 1 NA 9.00 NA
#7 2 11.00 2.00 NA
#8 3 2.00 8.80 0.0
#9 3 2.00 8.70 0.0
#10 3 2.00 7.80 0.0
#11 4 8.43 11.00 0.0
#12 4 8.43 11.00 0.0
#13 4 8.43 12.00 0.0
#14 4 8.43 11.00 0.0
#15 5 11.25 9.54 0.0
#16 5 11.25 9.80 0.0
#17 5 11.25 10.40 0.0
#18 6 9.91 4.00 66.7
#19 6 9.91 4.00 66.7
#20 6 9.91 4.50 66.7
#21 6 9.91 3.60 66.7
#22 7 4.03 7.00 275.0
#23 7 4.03 8.00 275.0
#24 7 4.03 9.00 275.0
#25 8 8.00 0.00 0.0
添加purrr
的一个选项可以是:
df %>%
mutate(sog_avg = map_dbl(.x = grp - 1, ~ mean(sog[grp == .x])))
grp sog res sog_avg
<dbl> <dbl> <dbl> <dbl>
1 1 12 NA NaN
2 1 12 NA NaN
3 1 12 NA NaN
4 1 11 NA NaN
5 1 10 NA NaN
6 1 9 NA NaN
7 2 2 NA 11
8 3 8.8 0 2
9 3 8.7 0 2
10 3 7.8 0 2
或者只使用dplyr
:
df %>%
group_by(grp) %>%
mutate(sog_avg = mean(sog)) %>%
ungroup() %>%
mutate(sog_avg = lag(sog_avg)) %>%
group_by(grp) %>%
mutate(sog_avg = first(sog_avg))
grp sog res sog_avg
<dbl> <dbl> <dbl> <dbl>
1 1 12 NA NA
2 1 12 NA NA
3 1 12 NA NA
4 1 11 NA NA
5 1 10 NA NA
6 1 9 NA NA
7 2 2 NA 11
8 3 8.8 0 2
9 3 8.7 0 2
10 3 7.8 0 2