我有两个国家的起始年份和结束年份不同。我有不同社会阶层的平均收入。我想把第一年的收入=100作为每个社会阶层的指数。然后我想看看在接下来的几年里,他们是如何从100人进步的。总的来说,这应该很容易做到,但似乎由于我国有不同数量的观察结果,这是行不通的。
这是我尝试过的代码,但我只得到了缺失的值:
df=df %>%
group_by(cntry, year,class_m) %>%
mutate(base_year = (mean[first(year)]/mean)*100)
数据如下:
df= structure(list(cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("at", "be"), class = "factor"),
year = structure(c(4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L,
16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L,
19L, 19L, 19L, 20L, 20L, 20L, 20L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L
), .Label = c("1995", "1997", "2000", "2003", "2004", "2005",
"2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013",
"2014", "2015", "2016", "2017", "2018", "2019"), class = "factor"),
class_m = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("Low-skilled working class",
"Skilled working class", "Middle class", "Upper class"), class = "factor"),
mean = c(21667.3165297756, 31141.2479100646, 38694.5317839067,
48897.5586114381, 21893.6782367936, 29866.0796003899, 36846.1057208349,
46115.8225807015, 19914.101136956, 30201.1848571751, 36688.5006276306,
44334.4349912073, 20505.9102212244, 30071.1070352498, 37093.4347815202,
44630.7476325564, 20265.9465807599, 29827.9369893851, 40549.4855257344,
48107.2865241041, 22378.7756708627, 31334.7756747725, 39981.9785570756,
50347.8600052063, 23101.010596959, 31412.9240693068, 40458.6454333296,
51740.898756006, 19805.2965921531, 30817.6682795387, 41165.6041754244,
52782.5026014194, 19078.5626059941, 30499.5262897878, 41177.4423103889,
51240.6014436097, 20393.1169949116, 29796.8273849528, 39234.5103600113,
50494.5284121857, 20786.560760249, 31306.6058474771, 40854.36428628,
50339.5860855376, 20033.5844477617, 30424.7651611075, 39659.447696875,
49191.4195426966, 18851.261369003, 30412.4669765863, 41857.2930659497,
51097.4975692186, 22333.7894908968, 30863.010648668, 41852.0093099513,
54112.6228115753, 21709.19921875, 30039.5068801246, 41097.4541047158,
49862.44140625, 20113.5586718618, 30733.8952367545, 41658.1716627373,
51754.4018503782, 21818.4311173551, 33225.8409123812, 43882.2512500977,
51037.5228976151, 15858.5028150308, 18782.8272745439, 22871.4020551682,
26288.6154497508, 26599.1650213236, 31720.3186300543, 41940.5016413888,
51187.0060567118, 18564.6510736198, 21526.72898147, 24807.2116933588,
29207.4658820585, 23058.5603825146, 31862.7097532934, 37588.62928007,
45160.9518839946, 25495.8949453907, 31851.1999874662, 38276.6899334939,
46318.331560595, 23165.6350767837, 32586.7829065825, 37256.5740814167,
45285.0662561028, 23975.7581116063, 30787.3910726117, 37346.8507982085,
45180.6091420909, 23786.1529599028, 32413.707905246, 38596.3467614532,
47026.6344280445, 24272.92088131, 31167.7104944988, 37745.6268718255,
46128.4799968946, 24583.9968164343, 29819.2298432657, 40053.8477213667,
48223.1556254353, 23227.04705051, 29611.9190298389, 39086.0012315702,
46742.9511396314, 20980.1647228858, 29627.2417955117, 38648.6829503705,
45677.0658477392, 21397.8125304146, 30675.2233482807, 40735.634479222,
46355.3748374436, 22836.5595055445, 29859.0336509053, 40335.3885497182,
47934.8837121327, 21465.185981748, 30436.1330929852, 40091.5582937488,
48743.3268548605, 21375.6534656544, 31060.2359133816, 40006.7183770635,
47618.8685730448, 20901.803025412, 29971.1886677767, 39526.0725185188,
46793.098588355, 21710.1246251194, 30894.12481284, 39699.3077814615,
47179.3071888513)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -140L), groups = structure(list(
cntry = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("at",
"be"), class = "factor"), year = structure(c(4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L), .Label = c("1995", "1997",
"2000", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor"), .rows = structure(list(
1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 25:28, 29:32, 33:36,
37:40, 41:44, 45:48, 49:52, 53:56, 57:60, 61:64, 65:68,
69:72, 73:76, 77:80, 81:84, 85:88, 89:92, 93:96, 97:100,
101:104, 105:108, 109:112, 113:116, 117:120, 121:124,
125:128, 129:132, 133:136, 137:140), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -35L), .drop = TRUE))
这就是您想要的吗?
df %>%
group_by(cntry, class_m) %>%
mutate(base_year = 100 * (1 - (first(mean) - mean) / mean))
# # A tibble: 140 x 5
# # Groups: cntry, class_m [8]
# cntry year class_m mean base_year
# <fct> <fct> <fct> <dbl> <dbl>
# 1 at 2003 Low-skilled working class 21667. 100
# 2 at 2003 Skilled working class 31141. 100
# 3 at 2003 Middle class 38695. 100
# 4 at 2003 Upper class 48898. 100
# 5 at 2004 Low-skilled working class 21894. 101.
# 6 at 2004 Skilled working class 29866. 95.7
# 7 at 2004 Middle class 36846. 95.0
# 8 at 2004 Upper class 46116. 94.0
# 9 at 2005 Low-skilled working class 19914. 91.2
# 10 at 2005 Skilled working class 30201. 96.9
# # ... with 130 more rows