r-按列填充/完成/展开



我有这个数据帧:

df <- structure(list(lg0 = c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, 
TRUE), lg2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE
), lg4 = c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), ld0 = c(TRUE, 
TRUE, TRUE, FALSE, FALSE, TRUE, FALSE), ld1 = c(FALSE, FALSE, 
FALSE, TRUE, FALSE, FALSE, TRUE), ld2 = c(FALSE, FALSE, FALSE, 
FALSE, TRUE, FALSE, FALSE), ld4 = c(FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA, 
-7L))
lg0   lg2   lg4   ld0   ld1   ld2   ld4
1 FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE
2 FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE
3  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE
4  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE
5  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
6  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE
7  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE

如何添加用FALSE填充的缺失列lg1lg3ld3

我希望"准"完成/填充/扩展缺失的列,以获得以下内容:

这应该独立于哪些列不存在,这可能会改变。

所需输出:

lg0   lg1   lg2   lg3   lg4   ld0   ld1   ld2   ld3   ld4  
FALSE FALSE FALSE FALSE TRUE  TRUE  FALSE FALSE FALSE FALSE
FALSE FALSE TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE FALSE
TRUE  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE FALSE
TRUE  FALSE FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE
TRUE  FALSE FALSE FALSE FALSE FALSE FALSE TRUE  FALSE FALSE
TRUE  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE FALSE
TRUE  FALSE FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE

到目前为止,我已经尝试过:

library(tidyr)
library(dplyr)
library(readr)
df2 <- df1 %>% 
pivot_longer(
everything()
) %>% 
mutate(id = parse_number(name)) 
expand(df2, id, name)

base R中,我们可以构建FALSE的新数据集,并根据匹配的列名将"df"分配回它

nm1 <- paste0(rep(c("lg", "ld"), each = 5), 0:4)
df2 <- as.data.frame(matrix(FALSE, nrow = nrow(df), 
ncol = length(nm1), dimnames = list(NULL, nm1)))
df2[names(df)] <- df

-输出

> df2
lg0   lg1   lg2   lg3   lg4   ld0   ld1   ld2   ld3   ld4
1 FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
3  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
4  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
5  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
6  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
7  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE

如果我们想自动构建nm1

d1 <- as.numeric(sub("\D+", "", names(df)))
nm1 <- c(t(outer(unique(sub("\d+", "", names(df))),  
min(d1):max(d1), FUN = paste0)))
df[setdiff(nm1, names(df))] <- FALSE
df[nm1]

我认为您可以使用以下解决方案:

library(dplyr)
library(tidyr)
df %>%
pivot_longer(everything()) %>% 
extract(name, c("base", "pre"), regex = "(\D+)(\d+)") %>% 
mutate(id = rep(1:ncol(df), each = ncol(df))) %>%
group_by(id, base) %>%
mutate(pre = as.integer(pre)) %>%
complete(pre = seq(min(pre), max(pre), 1), fill = list(value = FALSE)) %>%
unite("New_Name", c(base, pre), sep = "") %>%
pivot_wider(names_from = New_Name, values_from = value) %>%
ungroup() %>%
select(-id) %>%
relocate(starts_with("lg"))

# A tibble: 7 x 10
lg0   lg1   lg2   lg3   lg4   ld0   ld1   ld2   ld3   ld4  
<lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 FALSE FALSE FALSE FALSE TRUE  TRUE  FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE  FALSE FALSE TRUE  FALSE FALSE FALSE FALSE
3 TRUE  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE FALSE
4 TRUE  FALSE FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE
5 TRUE  FALSE FALSE FALSE FALSE FALSE FALSE TRUE  FALSE FALSE
6 TRUE  FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE FALSE
7 TRUE  FALSE FALSE FALSE FALSE FALSE TRUE  FALSE FALSE FALSE

最新更新