我想在 R 中添加一个新列,它将我的子组汇总为组。
这是我的例子:
id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))
> data
id subgroup
1 1 lightred
2 2 marine
3 2 cyan
4 3 rose
5 4 bordeaux
6 4 darkred
7 4 sky
8 5 gras
9 5 bottle
10 5 lightgreen
11 6 darkred
12 6 marine
13 6 lightgreen
现在我想添加一个新列"颜色",它将属性分为 3 个格罗普斯"红色"、"绿色"和"蓝色"。 是否可以先将子组分配给变量,然后再将它们分配给组?
red = "lightred", "darkred" , "rose" , "bordeaux"
blue = "marine", "cyan", "sky"
green = "gras", "bottle" , "lightgreen"
最后它应该看起来像这样:
> data
id subgroup colour
1 1 lightred red
2 2 marine blue
3 2 cyan blue
4 3 rose red
5 4 bordeaux red
6 4 darkred red
7 4 sky blue
8 5 gras green
9 5 bottle green
10 5 lightgreen green
11 6 darkred red
12 6 marine blue
13 6 lightgreen green
谢谢!
使用dplyr
中的case_when
:
red <- c("lightred", "darkred", "rose", "bordeaux")
blue <- c("marine", "cyan", "sky")
green <- c("gras", "bottle", "lightgreen")
data$colour <-
case_when(
data$subgroup %in% red ~ "red",
data$subgroup %in% blue ~ "blue",
data$subgroup %in% green ~ "green",
TRUE ~ data$subgroup
)
cut
的一点非常规方法。我们创建一个键值对的list
,然后用data$subgroup
match
这些值。我们创建break
值作为每个列表的length
的累积总和,labels
作为列表的names
。
new_list <- list('red' = c("lightred", "darkred" , "rose" , "bordeaux"),
'blue' = c("marine", "cyan", "sky"),
'green' = c("gras", "bottle" , "lightgreen"))
data$colour <- cut(match(data$subgroup, unlist(new_list)),
breaks = c(0,cumsum(lengths(new_list))),
labels = names(new_list))
data
# id subgroup colour
#1 1 lightred red
#2 2 marine blue
#3 2 cyan blue
#4 3 rose red
#5 4 bordeaux red
#6 4 darkred red
#7 4 sky blue
#8 5 gras green
#9 5 bottle green
#10 5 lightgreen green
#11 6 darkred red
#12 6 marine blue
#13 6 lightgreen green
哪里
cumsum(lengths(new_list))
# red blue green
# 4 7 10
另一种选择(由 @Jaap 建议(是我们从new_list
创建一个数据帧,使用stack
values
作为单个颜色,ind
作为相应的组。然后我们只需match
,subgroup
values
并得到相应的组(ind
(。
ref <- stack(new_list)
data$colour <- ref$ind[match(data$subgroup, ref$values)]
id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))
library(dplyr)
data <- data %>%
dplyr::mutate(
colour = dplyr::case_when(
grepl("(lightred)|(darkred)|(rose)|(bordeaux)", subgroup, perl = TRUE) ~ "red",
grepl("(marine)|(cyan)|(sky)", subgroup, perl = TRUE) ~ "blue",
grepl("(gras)|(bottle)|(lightgreen)", subgroup, perl = TRUE) ~ "green",
TRUE ~ "else"
)
)
data
data$colour <- ifelse(data$subgroup %in% red, "red", ifelse(data$subgroup %in% blue, "blue", "green"))
id subgroup colour
1 1 lightred red
2 2 marine blue
3 2 cyan blue
4 3 rose red
5 4 bordeaux red
6 4 darkred red
7 4 sky blue
8 5 gras green
9 5 bottle green
10 5 lightgreen green
11 6 darkred red
12 6 marine blue
13 6 lightgreen green
(a=merge(data,stack(list(red=red,blue=blue,green=green)),by.x="subgroup",by.y="values"))
subgroup id ind
1 bordeaux 4 red
2 bottle 5 green
3 cyan 2 blue
4 darkred 4 red
5 darkred 6 red
6 gras 5 green
7 lightgreen 5 green
8 lightgreen 6 green
9 lightred 1 red
10 marine 2 blue
11 marine 6 blue
12 rose 3 red
13 sky 4 blue
a[order(a$id),]
subgroup id ind
9 lightred 1 red
3 cyan 2 blue
10 marine 2 blue
12 rose 3 red
1 bordeaux 4 red
4 darkred 4 red
13 sky 4 blue
2 bottle 5 green
6 gras 5 green
7 lightgreen 5 green
5 darkred 6 red
8 lightgreen 6 green
11 marine 6 blue
你可以做:
colors=unlist(list(red=red,blue=blue,green=green))
names(colors)=sub("\d+","",names(colors))
data$color=names(colors[match(subgroup,colors)])
data
id subgroup color
1 1 lightred red
2 2 marine blue
3 2 cyan blue
4 3 rose red
5 4 bordeaux red
6 4 darkred red
7 4 sky blue
8 5 gras green
9 5 bottle green
10 5 lightgreen green
11 6 darkred red
12 6 marine blue
13 6 lightgreen green
使用dplyr
和plyr
:
mapvalues
从x
到y
。此处x
表示唯一subgroup
,并y
要映射的相应color
值:
x = c("lightred", "darkred" , "rose" , "bordeaux", "marine", "cyan", "sky", "gras", "bottle" , "lightgreen" )
y = c(rep("red",4), rep("blue", 3), rep("green",3))
data %>% dplyr::mutate(color = plyr::mapvalues(subgroup, x,y))