在r中按矩阵中的每列拆分数字字符串



我有下面的矩阵,数字为0和1,每列的字符串数始终相同,但带有NA的列除外。我想将每个数字拆分为单独的列,即每列和每行只出现一个数字。NA也应该出现在拆分字符串的每一列中(如下例所示(。

r1 <- c("01","001","0001","01","100")
r2 <- c("10","001",NA,"10","100")
r3 <- c("01","100","1000","10","010")
r4 <- c("01","010","0100",NA,"001")
r5<- c("01","010","0010","10","001")
n.mat <- rbind(r1,r2,r3,r4,r5)

输出:

r1 <- c("0","1","0","0","1","0","0","0","1","0","1","1","0","0")
r2 <- c("1","0","0","0","1",NA ,NA ,NA ,NA,"1","0","1","0","0")
r3 <- c("0","1","1","0","0","1","0","0","0","1","0","0","1","0")
r4 <- c("0","1","0","1","0","0","1","0","0", NA, NA,"0","0","1")
r5 <- c("0","1","0","1","0","0","0","1","0","1","0","0","0","1")
n.mat_new <- rbind(r1,r2,r3,r4,r5)

我有以下代码:

n.mat_new <- as.character(n.mat)
n.mat_new <- strsplit(n.mat, "")
n.mat_new <- data.frame(matrix(unlist(n.mat), nrow=length(n.mat), byrow=T))

但是我没有得到我想要的正确的n.mat_new矩阵。有人能帮我吗?

您可以拆分每列中的每个字符,使其长度相等,并组合输出。

do.call(cbind, apply(n.mat, 2, function(x) {
tmp <-strsplit(x, '')
t(sapply(tmp, `[`, 1:max(lengths(tmp))))
}))
#  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#r1 "0"  "1"  "0"  "0"  "1"  "0"  "0"  "0"  "1"  "0"   "1"   "1"   "0"   "0"  
#r2 "1"  "0"  "0"  "0"  "1"  NA   NA   NA   NA   "1"   "0"   "1"   "0"   "0"  
#r3 "0"  "1"  "1"  "0"  "0"  "1"  "0"  "0"  "0"  "1"   "0"   "0"   "1"   "0"  
#r4 "0"  "1"  "0"  "1"  "0"  "0"  "1"  "0"  "0"  NA    NA    "0"   "0"   "1"  
#r5 "0"  "1"  "0"  "1"  "0"  "0"  "0"  "1"  "0"  "1"   "0"   "0"   "0"   "1"  

这里有一个tidyverse选项

library(dplyr)
library(tidyr)
library(data.table)
reshape2::melt(n.mat) %>% 
group_by(Var2) %>% 
mutate(value = replace(value, is.na(value), 
list(rep(NA, max(nchar(value[!is.na(value)])))))) %>% 
ungroup %>% 
unnest(c(value)) %>%
separate_rows(value, sep="(?<=.)(?=.)") %>% 
mutate(rn = rowid(Var1)) %>% 
select(-Var2) %>% 
pivot_wider(names_from = rn, values_from = value) %>% 
column_to_rownames('Var1')
#   1 2 3 4 5    6    7    8    9   10   11 12 13 14
#r1 0 1 0 0 1    0    0    0    1    0    1  1  0  0
#r2 1 0 0 0 1 <NA> <NA> <NA> <NA>    1    0  1  0  0
#r3 0 1 1 0 0    1    0    0    0    1    0  0  1  0
#r4 0 1 0 1 0    0    1    0    0 <NA> <NA>  0  0  1
#r5 0 1 0 1 0    0    0    1    0    1    0  0  0  1

最新更新