我有一张我修改过的桌子。我只想对年份=2010:2020的人口进行插值。该表仅包括1个国家,但该表有16107行。
我知道插值方程是:mutate_at(vars(population, gdp.x), na.approx, method="linear"
我尝试过申请:apply(ssp[5,6], ssp[2]==2010:2020, mutate_at(vars(population, gdp.x), na.approx, method="linear"))
Groups: Country [177]
Country Year Model Scenario population gdp.x SSP Version Date Series.Name Series.Code Country.Code gdp.y
<chr> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 Afghanistan 2010 OECD Env-Growth SSP2_v9_130325 28.0 0.0159 SSP2 v9 130325 NA NA NA NA
2 Afghanistan 2011 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 NA NA NA NA
3 Afghanistan 2012 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 NA NA NA NA
4 Afghanistan 2013 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 NA NA NA NA
5 Afghanistan 2014 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 NA NA NA NA
6 Afghanistan 2015 OECD Env-Growth SSP2_v9_130325 31.8 0.0206 SSP2 v9 130325 NA NA NA NA
7 Afghanistan 2016 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 NA NA NA NA
8 Afghanistan 2017 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 GDP growth, constant 2010 USD NYGDPMKTPKDZ AFG 2.7
9 Afghanistan 2018 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 GDP growth, constant 2010 USD NYGDPMKTPKDZ AFG 1.8
10 Afghanistan 2019 OECD Env-Growth SSP2_v9_130325 NA NA SSP2 v9 130325 GDP growth, constant 2010 USD NYGDPMKTPKDZ AFG 2.9
# ... with 16,097 more rows
dput前20行
structure(list(Country = c("Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan"), Year = c(2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024,
2025, 2026, 2027, 2028, 2029), Model = c("OECD Env-Growth", "OECD Env-Growth",
"OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth",
"OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth",
"OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth",
"OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth", "OECD Env-Growth",
"OECD Env-Growth", "OECD Env-Growth"), Scenario = c("SSP2_v9_130325",
"SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325",
"SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325",
"SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325",
"SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325",
"SSP2_v9_130325", "SSP2_v9_130325", "SSP2_v9_130325"), population = c(27.962207,
NA, NA, NA, NA, 31.82735098, NA, NA, NA, NA, 36.142924, NA, NA,
NA, NA, 40.89646358, NA, NA, NA, NA), gdp.x = c(0.01593680064,
NA, NA, NA, NA, 0.02062235783, NA, NA, NA, NA, 0.02575387325,
NA, NA, NA, NA, 0.03327524539, NA, NA, NA, NA), SSP = c("SSP2",
"SSP2", "SSP2", "SSP2", "SSP2", "SSP2", "SSP2", "SSP2", "SSP2",
"SSP2", "SSP2", "SSP2", "SSP2", "SSP2", "SSP2", "SSP2", "SSP2",
"SSP2", "SSP2", "SSP2"), Version = c("v9", "v9", "v9", "v9",
"v9", "v9", "v9", "v9", "v9", "v9", "v9", "v9", "v9", "v9", "v9",
"v9", "v9", "v9", "v9", "v9"), Date = c("130325", "130325", "130325",
"130325", "130325", "130325", "130325", "130325", "130325", "130325",
"130325", "130325", "130325", "130325", "130325", "130325", "130325",
"130325", "130325", "130325"), Series.Name = c(NA, NA, NA, NA,
NA, NA, NA, "GDP growth, constant 2010 USD", "GDP growth, constant 2010 USD",
"GDP growth, constant 2010 USD", "GDP growth, constant 2010 USD",
"GDP growth, constant 2010 USD", NA, NA, NA, NA, NA, NA, NA,
NA), Series.Code = c(NA, NA, NA, NA, NA, NA, NA, "NYGDPMKTPKDZ",
"NYGDPMKTPKDZ", "NYGDPMKTPKDZ", "NYGDPMKTPKDZ", "NYGDPMKTPKDZ",
NA, NA, NA, NA, NA, NA, NA, NA), Country.Code = c(NA, NA, NA,
NA, NA, NA, NA, "AFG", "AFG", "AFG", "AFG", "AFG", NA, NA, NA,
NA, NA, NA, NA, NA), gdp.y = c(NA, NA, NA, NA, NA, NA, NA, 2.7,
1.8, 2.9, -5.5, 1, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-20L), groups = structure(list(Country = "Afghanistan", .rows = structure(list(
1:20), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = 1L, class = c("tbl_df", "tbl", "data.frame"
), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
))
-
不要混合使用
dplyr
和基本R语法。 -
mutate_at
已被取代,您现在可以使用across
。
library(dplyr)
ssp %>%
group_by(year) %>%
mutate(across(c(population, gdp.x), ~if(first(year) %in% 2010:2020)
na.approx(., method="linear") else .)) -> result
result
我设法解决了这个问题:
我使用对整个数据帧进行了总体插值
mutate_at(vars(population), na.approx, method="linear")
对于gdp,我只想对特定年份进行插值,我使用了case_when:
ssp<-ssp%>%mutate(gdp = case_when(Year>=2010 & Year<=2020 ~ na.approx(gdp)))