我有这个数据帧:
df <- structure(list(lg0 = c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE,
TRUE), lg2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE
), lg4 = c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), ld0 = c(TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE, FALSE), ld1 = c(FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE), ld2 = c(FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE), ld4 = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA,
-7L))
lg0 lg2 lg4 ld0 ld1 ld2 ld4
1 FALSE FALSE TRUE TRUE FALSE FALSE FALSE
2 FALSE TRUE FALSE TRUE FALSE FALSE FALSE
3 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE TRUE FALSE
6 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
如何添加用FALSE
填充的缺失列lg1
、lg3
、ld3
。
我希望"准"完成/填充/扩展缺失的列,以获得以下内容:
这应该独立于哪些列不存在,这可能会改变。
所需输出:
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
到目前为止,我已经尝试过:
library(tidyr)
library(dplyr)
library(readr)
df2 <- df1 %>%
pivot_longer(
everything()
) %>%
mutate(id = parse_number(name))
expand(df2, id, name)
在base R
中,我们可以构建FALSE
的新数据集,并根据匹配的列名将"df"分配回它
nm1 <- paste0(rep(c("lg", "ld"), each = 5), 0:4)
df2 <- as.data.frame(matrix(FALSE, nrow = nrow(df),
ncol = length(nm1), dimnames = list(NULL, nm1)))
df2[names(df)] <- df
-输出
> df2
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
如果我们想自动构建nm1
d1 <- as.numeric(sub("\D+", "", names(df)))
nm1 <- c(t(outer(unique(sub("\d+", "", names(df))),
min(d1):max(d1), FUN = paste0)))
df[setdiff(nm1, names(df))] <- FALSE
df[nm1]
我认为您可以使用以下解决方案:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(everything()) %>%
extract(name, c("base", "pre"), regex = "(\D+)(\d+)") %>%
mutate(id = rep(1:ncol(df), each = ncol(df))) %>%
group_by(id, base) %>%
mutate(pre = as.integer(pre)) %>%
complete(pre = seq(min(pre), max(pre), 1), fill = list(value = FALSE)) %>%
unite("New_Name", c(base, pre), sep = "") %>%
pivot_wider(names_from = New_Name, values_from = value) %>%
ungroup() %>%
select(-id) %>%
relocate(starts_with("lg"))
# A tibble: 7 x 10
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
<lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE