数据
data=data.frame("student"=c(1,2,3,4,5),
"score1"=c(77,NA,52,99,89),
"score2"=c(95,89,79,89,73),
"score3"=c(92,52,73,64,90),
"score4"=c(84,57,78,81,66),
"score1x"=c(0,NA,0,1,1),
"score2x"=c(1,1,0,1,0),
"score3x"=c(1,0,0,0,1),
"score4x"=c(1,0,0,1,0))
我有学生id和score1-score4的数据,希望以简单快速的方式创建score1x-score4x。规则是,如果说score1小于80,则score1x为0,否则为1。我可以通过以下方式做到这一点:data$score1x=ifelse(score1<80,0,1)
,但我想知道有没有一种方法可以同时为所有人做这件事,以更快地创建score1x-score4x?
尝试:
cbind(data, (data[, 1:4] < 80) * 1)
data.table
解决方案:
setDT(data)
cols <- paste0("score", 1:4)
data[, paste0(cols, "x") := lapply(.SD, function(x) as.integer(x > 80)), .SDcols = cols]
data
student score1 score2 score3 score4 score1x score2x score3x score4x
1: 1 77 95 92 84 0 1 1 1
2: 2 NA 89 52 57 NA 1 0 0
3: 3 52 79 73 78 0 0 0 0
4: 4 99 89 64 81 1 1 0 1
5: 5 89 73 90 66 1 0 1 0
您可以使用此dplyr
解决方案,该解决方案使用mutate_at()
为名称中具有"score"
的列创建新变量,然后使用rename_at()
将列名末尾的"_x"
更改为"x"
:
library(dplyr)
data[1:5] %>%
mutate_at(vars(contains("score")), list(x = ~as.integer(. > 80))) %>%
rename_at(vars(contains("_x")), ~gsub("_", "", ., fixed = T))
student score1 score2 score3 score4 score1x score2x score3x score4x
1 1 77 95 92 84 0 1 1 1
2 2 NA 89 52 57 NA 1 0 0
3 3 52 79 73 78 0 0 0 0
4 4 99 89 64 81 1 1 0 1
5 5 89 73 90 66 1 0 1 0