在R中循环遍历一个变量列表,并创建一个以变量值为条件的新变量



每当12个其他变量(var1到var12(中的任何一个满足以下条件之一时,我都会尝试创建一个新的变量(类别(:

for (i in 1:12) {
new_df <- df %>% mutate(
category = case_when(df[paste0("var",i)] == "09F" ~ "arts",
df[paste0("var",i)] == "09O" ~ "arts_o",
TRUE ~ " "))
}

但是类别变量仍然是空的。。。有人知道为什么这个代码不起作用吗?我应该用其他方法吗?

谢谢!

我们可以使用mutate_at将函数应用于多列,而不是for循环。尝试:

library(dplyr)
new_df <- df %>% 
mutate_at(vars(var1:var12), ~list(category = case_when(. == "09F" ~ "arts",
. == "09O" ~ "arts_o",
TRUE ~ " ")))

使用一些只有to个变量的随机示例数据(var1var2,但可以推广到您的情况(,根据var1var2的值为每个obs分配一个类别的问题可以这样解决:

set.seed(42)
# Example data
df <- data.frame(
id = 1:10,
var1 = sample(c("09F", "09O", "10F"), 10, replace = TRUE),
var2 = sample(c("09F", "09O", "10F"), 10, replace = TRUE),
stringsAsFactors = FALSE
)
library(tidyr)
library(dplyr)
df %>% 
# Convert to long
pivot_longer(var1:var2) %>% 
# Assign category
group_by(id) %>% 
mutate(category = case_when(
any(value == "09F") ~ "arts",
any(value == "09O") ~ "arts_o",
TRUE ~ " ")) %>% 
ungroup() %>% 
# Convert back to wide
pivot_wider(id_cols = c(id, category), names_from = "name", values_from = "value")
#> # A tibble: 10 x 4
#>       id category var1  var2 
#>    <int> <chr>    <chr> <chr>
#>  1     1 "arts"   09F   09F  
#>  2     2 "arts"   09F   09F  
#>  3     3 "arts"   09F   09O  
#>  4     4 "arts"   09F   09O  
#>  5     5 "arts_o" 09O   09O  
#>  6     6 "arts_o" 09O   10F  
#>  7     7 "arts_o" 09O   10F  
#>  8     8 "arts"   09F   09F  
#>  9     9 "arts"   10F   09F  
#> 10    10 " "      10F   10F

由reprex包于2020-04-15创建(v0.3.0(

整洁的方法:

set.seed(234)
sample(c("09F", "09O", "xyz"), 12 * 10, replace = TRUE, prob = c(0.05, 0.05, 0.9)) %>%
matrix(ncol = 12) %>%
`colnames<-`(str_c("var", 1:12)) %>%
as_tibble() -> d
d %>%
mutate(category = map_chr(
pmap(d %>%
select(starts_with("var")),
c),
~case_when(any(.x == "09F") ~ "arts",
any(.x == "09O") ~ "arts_O",
TRUE ~ " ")))
## A tibble: 10 x 13
#   var1  var2  var3  var4  var5  var6  var7  var8  var9  var10 var11 var12 category
#   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>   
# 1 xyz   xyz   xyz   xyz   xyz   xyz   xyz   09O   xyz   xyz   xyz   xyz   "arts_O"
# 2 xyz   xyz   xyz   xyz   09O   xyz   xyz   xyz   xyz   xyz   xyz   xyz   "arts_O"
# 3 xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   " "     
# 4 xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   " "     
# 5 xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   09O   xyz   xyz   xyz   "arts_O"
# 6 xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   09F   xyz   xyz   "arts"  
# 7 09O   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   "arts_O"
# 8 xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   09F   xyz   xyz   xyz   "arts"  
# 9 09O   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   "arts_O"
#10 xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   xyz   " "    

或基本R:

d$category2 <- ifelse(apply(d[grepl("var", names(d))], 1, function(x) any(x == "09F")), "arts", 
ifelse(apply(d[grepl("var", names(d))], 1, function(x) any(x == "09O")), "arts_o", " "))    

最新更新