r-在一列中为另一列中某个因子的每个级别添加级别

  • 本文关键字:一列 添加 r for-loop levels
  • 更新时间 :
  • 英文 :


我有一个这样的数据集:

dat<- data.frame(var1= c(1:15), 
var2= c(rep("A", 4), "control",  rep("B", 4),"control", rep("C", 4), "control"),
var3= rep(c("full","full", "half", "half", "control"),3),
value= c(12, 62, 60, 61, 83, 97,1, 22, 99, 47, 63, 49, 25, 81, 26))
dat

对于每个level(var3)=="control",我想将这些行添加到dat$var2的每个级别。结果数据帧看起来像:

res<- data.frame(var1= c(1:21), 
var2= c(rep("A", 7),  rep("B", 7), rep("C", 7)),
var3= rep(c("full","full", "half", "half", "control","control", "control"),3),
value= c(12, 62, 60, 61, 83, 47, 26,97,1, 22, 99,83, 47, 26, 63, 49, 25, 81,83, 47,26))
res

这里有一个选项

library(dplyr)
datsub <- dat %>% 
filter(var3 == "control") %>% 
select(var3, value)
out <- dat %>% 
filter(var3 != 'control') %>%
group_by(var2) %>% 
group_modify(~ .x %>% bind_rows(datsub))  %>%
ungroup %>%
mutate(var1 = row_number()) %>%
select(names(dat))

-使用预期输出进行检查

> all.equal(out, res, check.attributes = FALSE)
[1] TRUE

类似的方法,但有点不同:

dat_to_augment = dat %>% filter(var3 != "control")
dat %>%
filter(var3 == "control") %>%
select(var3, value) %>%
full_join(distinct(dat_to_augment, var2), by = character()) %>%
bind_rows(dat_to_augment) %>%
select(var1, var2, var3, value) %>%
arrange(var2, desc(var3)) %>%
mutate(var1 = row_number())
#    var1 var2    var3 value
# 1     1    A    half    60
# 2     2    A    half    61
# 3     3    A    full    12
# 4     4    A    full    62
# 5     5    A control    83
# 6     6    A control    47
# 7     7    A control    26
# 8     8    B    half    22
# 9     9    B    half    99
# 10   10    B    full    97
# ...

使用数据表:

library(data.table)
setDT(dat)     # convert to data.table
#
result <- rbind(dat[var2!='control'],
dat[var2=='control', .(var2=unique(dat[var2!='control']$var2)), by=.(var3, value)],
fill=TRUE)[order(var2)]
result[, var1:=seq(.N)]
#
setDT(res)
all.equal(res, result)

最新更新