示例数据 (仅一行(
CNTRCT_NBR year CF_ANNUAL_1 CF_ANNUAL_2 CF_ANNUAL_3 CF_ANNUAL_4 CF_ANNUAL_5 CF_ANNUAL_6 CF_ANNUAL_7 CF_ANNUAL_8 CF_ANNUAL_9 CF_ANNUAL_10 CF_ANNUAL_11 CF_ANNUAL_12 CF_ANNUAL_13 CF_ANNUAL_14 CF_ANNUAL_15 CF_ANNUAL_16 CF_ANNUAL_17 CF_ANNUAL_18 CF_ANNUAL_19 CF_ANNUAL_20 CF_ANNUAL_21 CF_ANNUAL_22 CF_ANNUAL_23 CF_ANNUAL_24 CF_ANNUAL_25 CF_ANNUAL_26 CF_ANNUAL_27 CF_ANNUAL_28 CF_ANNUAL_29 CF_ANNUAL_30
00222L 2 351.1 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55 175.55
我正在使用一个非常大的数据帧(400,000+ 行(,并正在尝试创建一个基于条件的新变量。此新变量将等于已存在的列值,具体取决于年份值。
aux304$CF_FINAL <- NA
within(aux304, aux304$CF_FINAL <- ifelse(aux304$year == 1, aux304 $CF_ANNUAL_1,
ifelse(aux304$year == 2, aux304$CF_ANNUAL_2,
ifelse(aux304$year == 3, aux304$CF_ANNUAL_3,
ifelse(aux304$year == 4, aux304$CF_ANNUAL_4,
ifelse(aux304$year == 5, aux304$CF_ANNUAL_5,
ifelse(aux304$year == 6, aux304$CF_ANNUAL_6,
ifelse(aux304$year == 7, aux304$CF_ANNUAL_7,
ifelse(aux304$year == 8, aux304$CF_ANNUAL_8,
ifelse(aux304$year == 9, aux304$CF_ANNUAL_9,
ifelse(aux304$year == 10, aux304$CF_ANNUAL_10,
ifelse(aux304$year == 11, aux304$CF_ANNUAL_11,
ifelse(aux304$year == 12, aux304$CF_ANNUAL_12,
ifelse(aux304$year == 13, aux304$CF_ANNUAL_13,
ifelse(aux304$year == 14, aux304$CF_ANNUAL_14,
ifelse(aux304$year == 15, aux304$CF_ANNUAL_15,
ifelse(aux304$year == 16, aux304$CF_ANNUAL_16,
ifelse(aux304$year == 17, aux304$CF_ANNUAL_17,
ifelse(aux304$year == 18, aux304$CF_ANNUAL_18,
ifelse(aux304$year == 19, aux304$CF_ANNUAL_19,
ifelse(aux304$year == 20, aux304$CF_ANNUAL_20,
ifelse(aux304$year == 21, aux304$CF_ANNUAL_21,
ifelse(aux304$year == 22, aux304$CF_ANNUAL_22,
ifelse(aux304$year == 23, aux304$CF_ANNUAL_23,
ifelse(aux304$year == 24, aux304$CF_ANNUAL_24,
ifelse(aux304$year == 25, aux304$CF_ANNUAL_25,
ifelse(aux304$year == 26, aux304$CF_ANNUAL_26,
ifelse(aux304$year == 27, aux304$CF_ANNUAL_27,
ifelse(aux304$year == 28, aux304$CF_ANNUAL_28,
ifelse(aux304$year == 29, aux304$CF_ANNUAL_29,
ifelse(aux304$year == 30, aux304$CF_ANNUAL_30, 0)))))))))))))))))))))))))))))))
aux30 <- subset(aux304, select=c("CNTRCT_NBR", "year", "CF_FINAL" ))
我遇到错误:
Error in `$<-.data.frame`(`*tmp*`, CF_FINAL, value = logical(0)) :
replacement has 0 rows, data has 441300
然后,当我尝试对该数据库进行子集化时,它告诉我我创建的变量(CF_FINAL
(不存在。我假设我的嵌套ifelse
语句遇到了问题,因为有很多,所以它没有正确创建变量。
# generate example data
year <- 1:30
mat <- diag(101:130)
df <- as.data.frame(cbind(year, mat))
colnames(df) <- c("year", paste0("annual",1:30))
# solution
df$final <- NA
for(i in 1:30) {
df[df$year==i, "final"] <- df[df$year==i, paste0("annual",i)]
}
就像@AndS建议的那样,您可以使用mutate
和case_when
。
library(tidyverse)
aux304 <- aux304 %>% # a pipe moves to next command
mutate( #to create a new variable
CF_FINAL =
case_when( #runs a series of ifelse statements
year == 1 ~ CF_ANNUAL_1, #framework: if ~ then
year == 2 ~ CF_ANNUAL_2,
...,
TRUE ~ 0
)
)
由于这里有一个明确的模式,你可以研究map
,但如果这只发生一次,或者你想暴力破解它,case_when
提供了一个更干净的选项,有助于减少代码错误。
祝你好运!