给定一个数据框df
text <- "
model,var,value
M1,a,12211
M1,b1,10.21
M1,b2,5.07
M1,c1,41.8
M1,c2,58.2
M1,d,1.6
M2,a,11922
M2,b1,15.6
M2,b2,8.9
M2,c1,38.1
M2,c2,61.9
M2,d,1.8
M2,a,13101
M2,b1,9.21
M2,b2,6.56
M2,c1,36.07
M2,c2,63.93
M2,d,1.75
"
df <- read.table(textConnection(text), sep=",", header = T)
我想通过 dplyr mutate
根据var
的值添加一个 col var2
。
基于以下逻辑。
如果是var == 'b1'
或var == 'b2'
则All B
如果是var == 'c1'
或var == 'c2'
则All C
否则var
我想按如下方式存储映射并使用它来构建上述逻辑
mapping <- c("All B"= list(c('b1', 'b2')), "All C" = list(c('c1', 'c2')))
> mapping
$`All B`
[1] "b1" "b2"
$`All C`
[1] "c1" "c2"
预期输出为
model var value var2
1 M1 a 12211.00 a
2 M1 b1 10.21 All B
3 M1 b2 5.07 All B
4 M1 c1 41.80 All C
5 M1 c2 58.20 All C
6 M1 d 1.60 d
7 M2 a 11922.00 a
8 M2 b1 15.60 All B
9 M2 b2 8.90 All B
10 M2 c1 38.10 All C
11 M2 c2 61.90 All C
12 M2 d 1.80 d
13 M2 a 13101.00 a
14 M2 b1 9.21 All B
15 M2 b2 6.56 All B
16 M2 c1 36.07 All C
17 M2 c2 63.93 All C
18 M2 d 1.75 d
我计划将 dplyr 与ifelse
df %>%
mutate(var2 = ifelse(# what should go here )
我们可以创建一个函数,如果var
存在于mapping
则返回列表名称,否则它将返回var
。我们可以使用 rowwise()
为每一行执行此函数。
get_right_mapping <- function(var) {
names(which(sapply(mapping, function(x) var %in% x)))
}
library(dplyr)
df %>%
rowwise() %>%
mutate(var2 = ifelse(var %in% unlist(mapping), get_right_mapping(var), var))
# model var value var2
# <fct> <chr> <dbl> <chr>
# 1 M1 a 12211 a
# 2 M1 b1 10.2 All B
# 3 M1 b2 5.07 All B
# 4 M1 c1 41.8 All C
# 5 M1 c2 58.2 All C
# 6 M1 d 1.60 d
# 7 M2 a 11922 a
# 8 M2 b1 15.6 All B
# 9 M2 b2 8.90 All B
#10 M2 c1 38.1 All C
#11 M2 c2 61.9 All C
#12 M2 d 1.80 d
#13 M2 a 13101 a
#14 M2 b1 9.21 All B
#15 M2 b2 6.56 All B
#16 M2 c1 36.1 All C
#17 M2 c2 63.9 All C
#18 M2 d 1.75 d
数据
mapping <- c( "All A"= list(c('a1', 'a2')), "All B" = list(c('b1', 'b2')),
"All C" = list(c('c1','c2')))
df$var <- as.character(df$var)
下面是一个使用 case_when
的示例解决方案(如注释中建议的那样(:
df %>%
mutate(
var = as.character(var),
var2 = case_when(
var == "b1" | var == "b2" ~ "All B",
var == "c1" | var == "c2" ~ "All C",
TRUE ~ var))
# model var value var2
#1 M1 a 12211.00 a
#2 M1 b1 10.21 All B
#3 M1 b2 5.07 All B
#4 M1 c1 41.80 All C
#5 M1 c2 58.20 All C
#6 M1 d 1.60 d
#7 M2 a 11922.00 a
#8 M2 b1 15.60 All B
#9 M2 b2 8.90 All B
#10 M2 c1 38.10 All C
#11 M2 c2 61.90 All C
#12 M2 d 1.80 d
#13 M2 a 13101.00 a
#14 M2 b1 9.21 All B
#15 M2 b2 6.56 All B
#16 M2 c1 36.07 All C
#17 M2 c2 63.93 All C
#18 M2 d 1.75 d
顺便说一句,在 base R 中有一个很少使用的 levels<-
函数,它执行的操作与所需结果几乎相同。如果在右侧传递命名列表,则列表中的每个值都将替换为名称。因此,如果保留mapping
作为源很重要,请尝试:
mapping <- c("B"= list(c('b1', 'b2')), "C" = list(c('c1', 'c2')))
df$var2 <- df$var
othval <- setdiff(df$var, unlist(mapping))
levels(df$var2) <- c(mapping, setNames(othval,othval))
# [1] a B B C C d a B B C C d a B B C C d
#Levels: B C a d
(这里的大部分内容都是考虑到mapping
未涵盖的情况(
我已经简化了你的逻辑 - 如果我错了,请纠正我。
如果 var
= b
或 c
后跟数字,则分别替换 All B
或 All C
。
df %>%
mutate(var2 = gsub("^([bc])\d+",
paste("All", "\U\1"),
var,
perl = TRUE))
model var value var2
1 M1 a 12211.00 a
2 M1 b1 10.21 All B
3 M1 b2 5.07 All B
4 M1 c1 41.80 All C
5 M1 c2 58.20 All C
6 M1 d 1.60 d
7 M2 a 11922.00 a
8 M2 b1 15.60 All B
9 M2 b2 8.90 All B
10 M2 c1 38.10 All C
11 M2 c2 61.90 All C
12 M2 d 1.80 d
13 M2 a 13101.00 a
14 M2 b1 9.21 All B
15 M2 b2 6.56 All B
16 M2 c1 36.07 All C
17 M2 c2 63.93 All C
18 M2 d 1.75 d
<</div>
div class="one_answers">如果您将mapping
转换为data.frame
并gather
值,那么您就远离了目标。
library(tidyverse)
as_tibble(mapping) %>% rename_all(~paste0("All_",.x)) %>%
gather(var2,var) %>% right_join(df) %>% # main chunk
mutate(var2=ifelse(is.na(var2),var,var2)) # replace nas
# # A tibble: 18 x 4
# var2 var model value
# <chr> <chr> <fctr> <dbl>
# 1 a a M1 12211.00
# 2 All_B b1 M1 10.21
# 3 All_B b2 M1 5.07
# 4 All_C c1 M1 41.80
# 5 All_C c2 M1 58.20
# 6 d d M1 1.60
# 7 a a M2 11922.00
# 8 All_B b1 M2 15.60
# 9 All_B b2 M2 8.90
# 10 All_C c1 M2 38.10
# 11 All_C c2 M2 61.90
# 12 d d M2 1.80
# 13 a a M2 13101.00
# 14 All_B b1 M2 9.21
# 15 All_B b2 M2 6.56
# 16 All_C c1 M2 36.07
# 17 All_C c2 M2 63.93
# 18 d d M2 1.75