我有两个数据帧:
df1:
a
s1
s2
s3
s4
df2:
a b
s1 w
s1 x
s4 y
s2 z
s4 x
我想将df2$b(w,x,y,z(中唯一值的多少列附加到df1,如果df2$a有一行将它们关联,则为每个df1$a和新列添加一个1。这很难解释,也许显示所需的输出更好:
a w x y z
s1 1 1 0 0
s2 0 0 0 1
s3 0 0 0 0
s4 0 1 1 0
我试过
for (col_name in unique(df2$b)){
df1 %<>%
mutate(!!as.character(col_name) := ifelse(col_name %in% filter(df2,
a == df1$a)$b,
yes = 1,
no = 0))
}
但这不起作用,我想问题出在上
a == df1$a
位,但我不知道哪种语法是正确的。感谢您的帮助!
我们可以在base R
中使用table
(假设我们想要"df1"中"a"列中所有值的行,则将"df2"的"a"栏转换为factor
,并将levels
指定为"df1’中"a’的unique
值
table(transform(df2, a = factor(a, levels = unique(df1$a))))
-输出
# b
#a w x y z
# s1 1 1 0 0
# s2 0 0 0 1
# s3 0 0 0 0
# s4 0 1 1 0
数据
df1 <- structure(list(a = c("s1", "s2", "s3", "s4")), class = "data.frame",
row.names = c(NA,
-4L))
df2 <- structure(list(a = c("s1", "s1", "s4", "s2", "s4"), b = c("w",
"x", "y", "z", "x")), class = "data.frame", row.names = c(NA,
-5L))
另一个dplyr
和tidyr
解决方案。
library(dplyr)
library(tidyr)
df3 <- df2 %>%
mutate(value = 1) %>%
complete(a = df1$a, b) %>%
spread(b, value, fill = 0)
df4 <- df1 %>%
left_join(df3, by = "a")
数据
df1 <- tibble(
a = paste0("s", 1:4)
)
df2 <- read.table(text = "
a b
s1 w
s1 x
s4 y
s2 z
s4 x",
header = TRUE)
试试这个:
library(dplyr)
library(tidyr)
#Code
newdf <- df1 %>% full_join(df2) %>%
mutate(val=ifelse(!is.na(b),1,NA),
b=ifelse(is.na(b),'Emp',b)) %>%
pivot_wider(names_from = b,values_from=val,
values_fill=0,names_sort = T) %>%
select(-Emp)
输出:
# A tibble: 4 x 5
a w x y z
<chr> <dbl> <dbl> <dbl> <dbl>
1 s1 1 1 0 0
2 s2 0 0 0 1
3 s3 0 0 0 0
4 s4 0 1 1 0