创建具有大型数据帧的新变量 Multiple ifelse 语句



示例数据 (仅一行(

CNTRCT_NBR   year CF_ANNUAL_1   CF_ANNUAL_2 CF_ANNUAL_3 CF_ANNUAL_4 CF_ANNUAL_5 CF_ANNUAL_6 CF_ANNUAL_7 CF_ANNUAL_8 CF_ANNUAL_9 CF_ANNUAL_10    CF_ANNUAL_11    CF_ANNUAL_12    CF_ANNUAL_13    CF_ANNUAL_14    CF_ANNUAL_15    CF_ANNUAL_16    CF_ANNUAL_17    CF_ANNUAL_18    CF_ANNUAL_19    CF_ANNUAL_20    CF_ANNUAL_21    CF_ANNUAL_22    CF_ANNUAL_23    CF_ANNUAL_24    CF_ANNUAL_25    CF_ANNUAL_26    CF_ANNUAL_27    CF_ANNUAL_28    CF_ANNUAL_29    CF_ANNUAL_30
00222L 2 351.1 175.55   175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55  175.55


我正在使用一个非常大的数据帧(400,000+ 行(,并正在尝试创建一个基于条件的新变量。此新变量将等于已存在的列值,具体取决于年份值。

aux304$CF_FINAL <- NA
within(aux304, aux304$CF_FINAL <- ifelse(aux304$year == 1, aux304     $CF_ANNUAL_1, 
ifelse(aux304$year == 2, aux304$CF_ANNUAL_2,
ifelse(aux304$year == 3, aux304$CF_ANNUAL_3,
ifelse(aux304$year == 4, aux304$CF_ANNUAL_4,
ifelse(aux304$year == 5, aux304$CF_ANNUAL_5, 
ifelse(aux304$year == 6, aux304$CF_ANNUAL_6, 
ifelse(aux304$year == 7, aux304$CF_ANNUAL_7, 
ifelse(aux304$year == 8, aux304$CF_ANNUAL_8, 
ifelse(aux304$year == 9, aux304$CF_ANNUAL_9, 
ifelse(aux304$year == 10, aux304$CF_ANNUAL_10, 
ifelse(aux304$year == 11, aux304$CF_ANNUAL_11, 
ifelse(aux304$year == 12, aux304$CF_ANNUAL_12, 
ifelse(aux304$year == 13, aux304$CF_ANNUAL_13, 
ifelse(aux304$year == 14, aux304$CF_ANNUAL_14, 
ifelse(aux304$year == 15, aux304$CF_ANNUAL_15, 
ifelse(aux304$year == 16, aux304$CF_ANNUAL_16, 
ifelse(aux304$year == 17, aux304$CF_ANNUAL_17, 
ifelse(aux304$year == 18, aux304$CF_ANNUAL_18, 
ifelse(aux304$year == 19, aux304$CF_ANNUAL_19, 
ifelse(aux304$year == 20, aux304$CF_ANNUAL_20, 
ifelse(aux304$year == 21, aux304$CF_ANNUAL_21, 
ifelse(aux304$year == 22, aux304$CF_ANNUAL_22, 
ifelse(aux304$year == 23, aux304$CF_ANNUAL_23, 
ifelse(aux304$year == 24, aux304$CF_ANNUAL_24, 
ifelse(aux304$year == 25, aux304$CF_ANNUAL_25, 
ifelse(aux304$year == 26, aux304$CF_ANNUAL_26, 
ifelse(aux304$year == 27, aux304$CF_ANNUAL_27, 
ifelse(aux304$year == 28, aux304$CF_ANNUAL_28, 
ifelse(aux304$year == 29, aux304$CF_ANNUAL_29, 
ifelse(aux304$year == 30, aux304$CF_ANNUAL_30, 0)))))))))))))))))))))))))))))))
aux30 <- subset(aux304, select=c("CNTRCT_NBR", "year", "CF_FINAL" ))

我遇到错误:

Error in `$<-.data.frame`(`*tmp*`, CF_FINAL, value = logical(0)) : 
replacement has 0 rows, data has 441300

然后,当我尝试对该数据库进行子集化时,它告诉我我创建的变量(CF_FINAL(不存在。我假设我的嵌套ifelse语句遇到了问题,因为有很多,所以它没有正确创建变量。

# generate example data
year <- 1:30
mat  <- diag(101:130)
df   <- as.data.frame(cbind(year, mat))
colnames(df) <- c("year", paste0("annual",1:30))
# solution
df$final <- NA
for(i in 1:30) {
df[df$year==i, "final"] <- df[df$year==i, paste0("annual",i)]
}

就像@AndS建议的那样,您可以使用mutatecase_when

library(tidyverse)
aux304 <- aux304 %>% # a pipe moves to next command
mutate( #to create a new variable
CF_FINAL = 
case_when( #runs a series of ifelse statements
year == 1 ~ CF_ANNUAL_1, #framework: if ~ then
year == 2 ~ CF_ANNUAL_2,
...,
TRUE ~ 0
)
)

由于这里有一个明确的模式,你可以研究map,但如果这只发生一次,或者你想暴力破解它,case_when提供了一个更干净的选项,有助于减少代码错误。

祝你好运!

最新更新