我有下面的矩阵,数字为0和1,每列的字符串数始终相同,但带有NA的列除外。我想将每个数字拆分为单独的列,即每列和每行只出现一个数字。NA也应该出现在拆分字符串的每一列中(如下例所示(。
r1 <- c("01","001","0001","01","100")
r2 <- c("10","001",NA,"10","100")
r3 <- c("01","100","1000","10","010")
r4 <- c("01","010","0100",NA,"001")
r5<- c("01","010","0010","10","001")
n.mat <- rbind(r1,r2,r3,r4,r5)
输出:
r1 <- c("0","1","0","0","1","0","0","0","1","0","1","1","0","0")
r2 <- c("1","0","0","0","1",NA ,NA ,NA ,NA,"1","0","1","0","0")
r3 <- c("0","1","1","0","0","1","0","0","0","1","0","0","1","0")
r4 <- c("0","1","0","1","0","0","1","0","0", NA, NA,"0","0","1")
r5 <- c("0","1","0","1","0","0","0","1","0","1","0","0","0","1")
n.mat_new <- rbind(r1,r2,r3,r4,r5)
我有以下代码:
n.mat_new <- as.character(n.mat)
n.mat_new <- strsplit(n.mat, "")
n.mat_new <- data.frame(matrix(unlist(n.mat), nrow=length(n.mat), byrow=T))
但是我没有得到我想要的正确的n.mat_new
矩阵。有人能帮我吗?
您可以拆分每列中的每个字符,使其长度相等,并组合输出。
do.call(cbind, apply(n.mat, 2, function(x) {
tmp <-strsplit(x, '')
t(sapply(tmp, `[`, 1:max(lengths(tmp))))
}))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#r1 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
#r2 "1" "0" "0" "0" "1" NA NA NA NA "1" "0" "1" "0" "0"
#r3 "0" "1" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
#r4 "0" "1" "0" "1" "0" "0" "1" "0" "0" NA NA "0" "0" "1"
#r5 "0" "1" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
这里有一个tidyverse
选项
library(dplyr)
library(tidyr)
library(data.table)
reshape2::melt(n.mat) %>%
group_by(Var2) %>%
mutate(value = replace(value, is.na(value),
list(rep(NA, max(nchar(value[!is.na(value)])))))) %>%
ungroup %>%
unnest(c(value)) %>%
separate_rows(value, sep="(?<=.)(?=.)") %>%
mutate(rn = rowid(Var1)) %>%
select(-Var2) %>%
pivot_wider(names_from = rn, values_from = value) %>%
column_to_rownames('Var1')
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14
#r1 0 1 0 0 1 0 0 0 1 0 1 1 0 0
#r2 1 0 0 0 1 <NA> <NA> <NA> <NA> 1 0 1 0 0
#r3 0 1 1 0 0 1 0 0 0 1 0 0 1 0
#r4 0 1 0 1 0 0 1 0 0 <NA> <NA> 0 0 1
#r5 0 1 0 1 0 0 0 1 0 1 0 0 0 1