我有一个类似原始的数据集
id <- c(1,1,1,2,3,3,3,4,4)
period <- c(1,1,2,2,1,2,3,1,3)
iso <- c("USA", "USA", "CHN", "ESP", "UK", "FRA", "KOR", "KOR", "ITA")
via <- c(1, 1, 2, 7, 5, 4, 4, 3, 2)
region <- c(4, 4, 4, 1, 27, 35, 9, 35, 35)
original <- data.frame(id, period, iso, via, region)
我想为每个主变量生成一个1行(按周期(的数据集(iso,via,region->在我的数据集中,我有更多的主变量(。因此,目的地(iso(的虚拟数据集看起来像目的地。dummies:
dest.dummies <- original %>%
select(id, period, iso) %>%
filter(., iso != "") %>%
distinct() %>%
mutate(iso_=1)
dest.dummies <- reshape(dest.dummies, idvar=c("id","period"), timevar="iso", direction="wide")
dest.dummies[is.na(dest.dummies)==T] <- 0
dest.dummies <- dest.dummies %>%
dplyr::rename_all(
funs(stringr::str_replace_all(., "iso_.", "iso_")) #change pattern in names "iso_." by "iso_"
)
我的意图是创建一个函数,这样我就可以在上面的代码(dest.dummies(中替换名称";iso";通过定义的每个核心变量(via、region等(。
我尝试创建一个函数((,在每种情况下输入变量的名称:
dummyfier <- function(data, var){
df <- data %>%
select(id, period, {{var}}) %>%
filter(., {{var}} != "") %>%
distinct() %>%
mutate('{{var}}_' :=1)
df <- reshape(df, idvar=c("id","period"), timevar={{var}}, direction="wide")
df[is.na(df)==T] <- 0
df <- df %>%
dplyr::rename_all(
funs(stringr::str_replace_all(., "var_.", "var_")) #change pattern in names "var_." by "var_"
)
}
dest.dummies <- dummyfier(original, "iso")
via.dummies <- dummyfier(original, "via")
region.dummies <- dummyfier(original, "region")
但这并不奏效。
你知道如何避免这种重复编码吗?非常感谢。
您可以简化最初的尝试:
library(dplyr)
library(tidyr)
original %>%
select(id, period, iso) %>%
filter(iso != "") %>%
distinct() %>%
pivot_wider(names_from = iso, values_from = iso,
values_fn = length, values_fill = 0, names_prefix = 'iso_')
# id period iso_USA iso_CHN iso_ESP iso_UK iso_FRA iso_KOR iso_ITA
# <dbl> <dbl> <int> <int> <int> <int> <int> <int> <int>
#1 1 1 1 0 0 0 0 0 0
#2 1 2 0 1 0 0 0 0 0
#3 2 2 0 0 1 0 0 0 0
#4 3 1 0 0 0 1 0 0 0
#5 3 2 0 0 0 0 1 0 0
#6 3 3 0 0 0 0 0 1 0
#7 4 1 0 0 0 0 0 1 0
#8 4 3 0 0 0 0 0 0 1
现在把它放在一个函数中:
dummyfier <- function(data, var) {
col <- deparse(substitute(var))
original %>%
select(id, period, {{var}}) %>%
filter({{var}} != "") %>%
distinct() %>%
pivot_wider(names_from = {{var}}, values_from = {{var}},
values_fn = length, values_fill = 0,
names_prefix = paste0(col, '_'))
}
dest.dummies <- dummyfier(original, iso)
via.dummies <- dummyfier(original, via)
region.dummies <- dummyfier(original, region)