r-根据数据帧创建计数矩阵



我需要帮助才能从矩阵开始

这里有一个例子:

我有一个数据帧,例如:

Groups Number Species_name 
G1     5      Homo_sapiens
G1     5      Canis_lupus
G1     6      Cattus_domesticus
G1     6      Pan_troglodys   
G1     6      Danio_rerio
G2     5      Homo_sapiens
G2     5      Canis_lupus 
G3     1      Mus_musculus 
G3     3      Canis_lupus
G3     3      Cattus_domesticus
G3     3      Pan_troglodys 

我需要将其转换为:

G1-5 G1-6 G2-5 G3-1 G3-3
Homo_sapiens      1    0    1    0    0
Canis_lupus       1    0    1    0    1
Cattus_domesticus 0    1    0    0    1
Pan_troglodys     0    1    0    0    1
Apis_mellifera    0    0    0    0    0
Danio_rerio       0    1    0    0    0
Mus_musculus      0    0    0    1    0

正如你所看到的,Apis_mellifera没有任何值,但我仍然根据行名列表将其添加到最终矩阵中:

list_rownames <-c("Homo_sapiens","Canis_lupus","Cattus_domesticus","Pan_troglodys","Apis_mellifera","Danio_rerio","Mus_musculus")

有人有主意吗?

如果有帮助的话,以下是数据:

structure(list(Groups = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
3L, 3L, 3L, 3L), .Label = c("G1", "G2", "G3"), class = "factor"), 
Number = c(5L, 5L, 6L, 6L, 6L, 5L, 5L, 1L, 3L, 3L, 3L), Species_name = structure(c(5L, 
1L, 3L, 8L, 4L, 5L, 2L, 6L, 1L, 3L, 7L), .Label = c("Canis_lupus", 
"Canis_lupus ", "Cattus_domesticus", "Danio_rerio", "Homo_sapiens", 
"Mus_musculus ", "Pan_troglodys ", "Pan_troglodys   "), class = "factor")), class = "data.frame", row.names = c(NA, 
-11L))

您可以将tableSpecies_namepaste一起使用GroupsandNumber,并将Apis_mellifera添加到Species_namelevels中。

levels(x$Species_name) <- c(levels(x$Species_name), "Apis_mellifera")
table(x$Species_name, paste(x$Groups, x$Number, sep="-"))
#                    G1-5 G1-6 G2-5 G3-1 G3-3
#  Canis_lupus          1    0    0    0    1
#  Canis_lupus          0    0    1    0    0
#  Cattus_domesticus    0    1    0    0    1
#  Danio_rerio          0    1    0    0    0
#  Homo_sapiens         1    0    1    0    0
#  Mus_musculus         0    0    0    1    0
#  Pan_troglodys        0    0    0    0    1
#  Pan_troglodys        0    1    0    0    0
#  Apis_mellifera       0    0    0    0    0

或者,如果仅限于list_rownames中的名称(需要trimws,因为在匹配过程中会出现一些空格(:

y <- x[trimws(x$Species_name) %in% list_rownames,]
y$Species_name <- factor(trimws(y$Species_name), list_rownames)
table(y$Species_name, paste(y$Groups, y$Number, sep="-"))
#                    G1-5 G1-6 G2-5 G3-1 G3-3
#  Homo_sapiens         1    0    1    0    0
#  Canis_lupus          1    0    1    0    1
#  Cattus_domesticus    0    1    0    0    1
#  Pan_troglodys        0    1    0    0    1
#  Apis_mellifera       0    0    0    0    0
#  Danio_rerio          0    1    0    0    0
#  Mus_musculus         0    0    0    1    0

pivot_widercomplete可以帮助您获得所需的结果。

library(dplyr)
library(tidyr)
result <- df %>%
mutate(Species_name = trimws(Species_name)) %>%
pivot_wider(names_from = c(Groups, Number), 
values_from = Number, 
values_fn = length, 
values_fill = 0) %>%
complete(Species_name = list_rownames) %>%
replace(is.na(.), 0)
#  Species_name       G1_5  G1_6  G2_5  G3_1  G3_3
#  <chr>             <int> <int> <int> <int> <int>
#1 Apis_mellifera        0     0     0     0     0
#2 Canis_lupus           1     0     1     0     1
#3 Cattus_domesticus     0     1     0     0     1
#4 Danio_rerio           0     1     0     0     0
#5 Homo_sapiens          1     0     1     0     0
#6 Mus_musculus          0     0     0     1     0
#7 Pan_troglodys         0     1     0     0     1

最新更新