r-如何根据第一次观察得到行之间的差异

  • 本文关键字:之间 何根 第一次 观察 r dplyr
  • 更新时间 :
  • 英文 :


我有两个国家的起始年份和结束年份不同。我有不同社会阶层的平均收入。我想把第一年的收入=100作为每个社会阶层的指数。然后我想看看在接下来的几年里,他们是如何从100人进步的。总的来说,这应该很容易做到,但似乎由于我国有不同数量的观察结果,这是行不通的。

这是我尝试过的代码,但我只得到了缺失的值:

df=df %>%
group_by(cntry, year,class_m) %>%
mutate(base_year = (mean[first(year)]/mean)*100)

数据如下:

df= structure(list(cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), .Label = c("at", "be"), class = "factor"), 
year = structure(c(4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 
6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 
10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 
13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 
16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 
19L, 19L, 19L, 20L, 20L, 20L, 20L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L
), .Label = c("1995", "1997", "2000", "2003", "2004", "2005", 
"2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", 
"2014", "2015", "2016", "2017", "2018", "2019"), class = "factor"), 
class_m = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("Low-skilled working class", 
"Skilled working class", "Middle class", "Upper class"), class = "factor"), 
mean = c(21667.3165297756, 31141.2479100646, 38694.5317839067, 
48897.5586114381, 21893.6782367936, 29866.0796003899, 36846.1057208349, 
46115.8225807015, 19914.101136956, 30201.1848571751, 36688.5006276306, 
44334.4349912073, 20505.9102212244, 30071.1070352498, 37093.4347815202, 
44630.7476325564, 20265.9465807599, 29827.9369893851, 40549.4855257344, 
48107.2865241041, 22378.7756708627, 31334.7756747725, 39981.9785570756, 
50347.8600052063, 23101.010596959, 31412.9240693068, 40458.6454333296, 
51740.898756006, 19805.2965921531, 30817.6682795387, 41165.6041754244, 
52782.5026014194, 19078.5626059941, 30499.5262897878, 41177.4423103889, 
51240.6014436097, 20393.1169949116, 29796.8273849528, 39234.5103600113, 
50494.5284121857, 20786.560760249, 31306.6058474771, 40854.36428628, 
50339.5860855376, 20033.5844477617, 30424.7651611075, 39659.447696875, 
49191.4195426966, 18851.261369003, 30412.4669765863, 41857.2930659497, 
51097.4975692186, 22333.7894908968, 30863.010648668, 41852.0093099513, 
54112.6228115753, 21709.19921875, 30039.5068801246, 41097.4541047158, 
49862.44140625, 20113.5586718618, 30733.8952367545, 41658.1716627373, 
51754.4018503782, 21818.4311173551, 33225.8409123812, 43882.2512500977, 
51037.5228976151, 15858.5028150308, 18782.8272745439, 22871.4020551682, 
26288.6154497508, 26599.1650213236, 31720.3186300543, 41940.5016413888, 
51187.0060567118, 18564.6510736198, 21526.72898147, 24807.2116933588, 
29207.4658820585, 23058.5603825146, 31862.7097532934, 37588.62928007, 
45160.9518839946, 25495.8949453907, 31851.1999874662, 38276.6899334939, 
46318.331560595, 23165.6350767837, 32586.7829065825, 37256.5740814167, 
45285.0662561028, 23975.7581116063, 30787.3910726117, 37346.8507982085, 
45180.6091420909, 23786.1529599028, 32413.707905246, 38596.3467614532, 
47026.6344280445, 24272.92088131, 31167.7104944988, 37745.6268718255, 
46128.4799968946, 24583.9968164343, 29819.2298432657, 40053.8477213667, 
48223.1556254353, 23227.04705051, 29611.9190298389, 39086.0012315702, 
46742.9511396314, 20980.1647228858, 29627.2417955117, 38648.6829503705, 
45677.0658477392, 21397.8125304146, 30675.2233482807, 40735.634479222, 
46355.3748374436, 22836.5595055445, 29859.0336509053, 40335.3885497182, 
47934.8837121327, 21465.185981748, 30436.1330929852, 40091.5582937488, 
48743.3268548605, 21375.6534656544, 31060.2359133816, 40006.7183770635, 
47618.8685730448, 20901.803025412, 29971.1886677767, 39526.0725185188, 
46793.098588355, 21710.1246251194, 30894.12481284, 39699.3077814615, 
47179.3071888513)), class = c("grouped_df", "tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -140L), groups = structure(list(
cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("at", 
"be"), class = "factor"), year = structure(c(4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
13L, 14L, 15L, 16L, 17L, 18L), .Label = c("1995", "1997", 
"2000", "2003", "2004", "2005", "2006", "2007", "2008", "2009", 
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", 
"2018", "2019"), class = "factor"), .rows = structure(list(
1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 25:28, 29:32, 33:36, 
37:40, 41:44, 45:48, 49:52, 53:56, 57:60, 61:64, 65:68, 
69:72, 73:76, 77:80, 81:84, 85:88, 89:92, 93:96, 97:100, 
101:104, 105:108, 109:112, 113:116, 117:120, 121:124, 
125:128, 129:132, 133:136, 137:140), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -35L), .drop = TRUE))

这就是您想要的吗?

df %>%
group_by(cntry, class_m) %>%
mutate(base_year = 100 * (1 - (first(mean) - mean) / mean))

# # A tibble: 140 x 5
# # Groups:   cntry, class_m [8]
#    cntry year  class_m                     mean base_year
#    <fct> <fct> <fct>                      <dbl>     <dbl>
#  1 at    2003  Low-skilled working class 21667.     100  
#  2 at    2003  Skilled working class     31141.     100  
#  3 at    2003  Middle class              38695.     100  
#  4 at    2003  Upper class               48898.     100  
#  5 at    2004  Low-skilled working class 21894.     101. 
#  6 at    2004  Skilled working class     29866.      95.7
#  7 at    2004  Middle class              36846.      95.0
#  8 at    2004  Upper class               46116.      94.0
#  9 at    2005  Low-skilled working class 19914.      91.2
# 10 at    2005  Skilled working class     30201.      96.9
# # ... with 130 more rows

相关内容

  • 没有找到相关文章

最新更新