每当12个其他变量(var1到var12(中的任何一个满足以下条件之一时,我都会尝试创建一个新的变量(类别(:
for (i in 1:12) {
new_df <- df %>% mutate(
category = case_when(df[paste0("var",i)] == "09F" ~ "arts",
df[paste0("var",i)] == "09O" ~ "arts_o",
TRUE ~ " "))
}
但是类别变量仍然是空的。。。有人知道为什么这个代码不起作用吗?我应该用其他方法吗?
谢谢!
我们可以使用mutate_at
将函数应用于多列,而不是for
循环。尝试:
library(dplyr)
new_df <- df %>%
mutate_at(vars(var1:var12), ~list(category = case_when(. == "09F" ~ "arts",
. == "09O" ~ "arts_o",
TRUE ~ " ")))
使用一些只有to个变量的随机示例数据(var1
和var2
,但可以推广到您的情况(,根据var1
和var2
的值为每个obs分配一个类别的问题可以这样解决:
set.seed(42)
# Example data
df <- data.frame(
id = 1:10,
var1 = sample(c("09F", "09O", "10F"), 10, replace = TRUE),
var2 = sample(c("09F", "09O", "10F"), 10, replace = TRUE),
stringsAsFactors = FALSE
)
library(tidyr)
library(dplyr)
df %>%
# Convert to long
pivot_longer(var1:var2) %>%
# Assign category
group_by(id) %>%
mutate(category = case_when(
any(value == "09F") ~ "arts",
any(value == "09O") ~ "arts_o",
TRUE ~ " ")) %>%
ungroup() %>%
# Convert back to wide
pivot_wider(id_cols = c(id, category), names_from = "name", values_from = "value")
#> # A tibble: 10 x 4
#> id category var1 var2
#> <int> <chr> <chr> <chr>
#> 1 1 "arts" 09F 09F
#> 2 2 "arts" 09F 09F
#> 3 3 "arts" 09F 09O
#> 4 4 "arts" 09F 09O
#> 5 5 "arts_o" 09O 09O
#> 6 6 "arts_o" 09O 10F
#> 7 7 "arts_o" 09O 10F
#> 8 8 "arts" 09F 09F
#> 9 9 "arts" 10F 09F
#> 10 10 " " 10F 10F
由reprex包于2020-04-15创建(v0.3.0(
整洁的方法:
set.seed(234)
sample(c("09F", "09O", "xyz"), 12 * 10, replace = TRUE, prob = c(0.05, 0.05, 0.9)) %>%
matrix(ncol = 12) %>%
`colnames<-`(str_c("var", 1:12)) %>%
as_tibble() -> d
d %>%
mutate(category = map_chr(
pmap(d %>%
select(starts_with("var")),
c),
~case_when(any(.x == "09F") ~ "arts",
any(.x == "09O") ~ "arts_O",
TRUE ~ " ")))
## A tibble: 10 x 13
# var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 category
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 xyz xyz xyz xyz xyz xyz xyz 09O xyz xyz xyz xyz "arts_O"
# 2 xyz xyz xyz xyz 09O xyz xyz xyz xyz xyz xyz xyz "arts_O"
# 3 xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz " "
# 4 xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz " "
# 5 xyz xyz xyz xyz xyz xyz xyz xyz 09O xyz xyz xyz "arts_O"
# 6 xyz xyz xyz xyz xyz xyz xyz xyz xyz 09F xyz xyz "arts"
# 7 09O xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz "arts_O"
# 8 xyz xyz xyz xyz xyz xyz xyz xyz 09F xyz xyz xyz "arts"
# 9 09O xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz "arts_O"
#10 xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz xyz " "
或基本R:
d$category2 <- ifelse(apply(d[grepl("var", names(d))], 1, function(x) any(x == "09F")), "arts",
ifelse(apply(d[grepl("var", names(d))], 1, function(x) any(x == "09O")), "arts_o", " "))