使用应用于R中不同变量和名称的函数来自动化重复过程



我有一个类似原始的数据集

id <- c(1,1,1,2,3,3,3,4,4)
period <- c(1,1,2,2,1,2,3,1,3)
iso <- c("USA", "USA", "CHN", "ESP", "UK", "FRA", "KOR", "KOR", "ITA")
via <- c(1, 1, 2, 7, 5, 4, 4, 3, 2)
region <- c(4, 4, 4, 1, 27, 35, 9, 35, 35)

original <- data.frame(id, period, iso, via, region)

我想为每个主变量生成一个1行(按周期(的数据集(iso,via,region->在我的数据集中,我有更多的主变量(。因此,目的地(iso(的虚拟数据集看起来像目的地。dummies:

dest.dummies <- original %>%
select(id, period, iso) %>%
filter(., iso != "") %>%
distinct() %>%
mutate(iso_=1)

dest.dummies <- reshape(dest.dummies, idvar=c("id","period"), timevar="iso", direction="wide")
dest.dummies[is.na(dest.dummies)==T] <- 0
dest.dummies <- dest.dummies %>%
dplyr::rename_all(
funs(stringr::str_replace_all(., "iso_.", "iso_"))     #change pattern in names "iso_." by "iso_"
)

我的意图是创建一个函数,这样我就可以在上面的代码(dest.dummies(中替换名称";iso";通过定义的每个核心变量(via、region等(。

我尝试创建一个函数((,在每种情况下输入变量的名称:

dummyfier <- function(data, var){
df <- data %>%
select(id, period, {{var}}) %>%
filter(., {{var}} != "") %>%
distinct() %>%
mutate('{{var}}_' :=1)

df <- reshape(df, idvar=c("id","period"), timevar={{var}}, direction="wide")
df[is.na(df)==T] <- 0
df <- df %>%
dplyr::rename_all(
funs(stringr::str_replace_all(., "var_.", "var_"))     #change pattern in names "var_." by "var_"
)
}
dest.dummies <-  dummyfier(original, "iso")
via.dummies <- dummyfier(original, "via")
region.dummies <- dummyfier(original, "region") 

但这并不奏效。

你知道如何避免这种重复编码吗?非常感谢。

您可以简化最初的尝试:

library(dplyr)
library(tidyr)
original %>%
select(id, period, iso) %>%
filter(iso != "") %>%
distinct() %>%
pivot_wider(names_from = iso, values_from = iso, 
values_fn = length, values_fill = 0, names_prefix = 'iso_')
#     id period iso_USA iso_CHN iso_ESP iso_UK iso_FRA iso_KOR iso_ITA
#  <dbl>  <dbl>   <int>   <int>   <int>  <int>   <int>   <int>   <int>
#1     1      1       1       0       0      0       0       0       0
#2     1      2       0       1       0      0       0       0       0
#3     2      2       0       0       1      0       0       0       0
#4     3      1       0       0       0      1       0       0       0
#5     3      2       0       0       0      0       1       0       0
#6     3      3       0       0       0      0       0       1       0
#7     4      1       0       0       0      0       0       1       0
#8     4      3       0       0       0      0       0       0       1

现在把它放在一个函数中:

dummyfier <- function(data, var) {
col <- deparse(substitute(var))
original %>%
select(id, period, {{var}}) %>%
filter({{var}} != "") %>%
distinct() %>%
pivot_wider(names_from = {{var}}, values_from = {{var}}, 
values_fn = length, values_fill = 0, 
names_prefix = paste0(col, '_'))
}
dest.dummies <-  dummyfier(original, iso)
via.dummies <- dummyfier(original, via)
region.dummies <- dummyfier(original, region) 

最新更新