对于这些数据,我们正在研究绵羊乳房中存在的感染。每只动物都有一个唯一的ID(在列中:EweID(、采样日期、半乳(在本例中为R或L(、样本类型(BC1或BC2(、细菌1_ID(每个样本中已识别细菌的名称(、细菌_1_Level(动物中存在多少细菌,数字1-4(、细菌2_ID和细菌2_Level。
数据帧有20行和以下列:
EweID DateSampled Sample UdderHalf Bacteria1_ID Bacteria1_Level
numeric date format char str char char. str. numeric
1 . BC2 L No Growth NA
1 . BC1 L Staph Aureus 3
2 . BC2 L Staph Equorum 4
2 . BC1 L Staph Oralis 2
3 . BC2 L NA NA
3 . BC1 L NA NA
4 . BC2 R NA NA
4 . BC1 R NA NA
5 . BC2 R NA NA
5 . BC1 R NA NA
文本框截断了Bacteria2_ID和Bacteria2_Level,但这两列是右侧的最后两列。
我使用以下代码将长格式转换为宽格式。这样做的目标是获得以下新的柱来取代目前的细菌柱:
代码:
MDFSO <- reshape(data=Data,
idvar= c("EweID","DateSampled", "UdderHalf"),
v.names = c("Bacteria1_ID","Bacteria1_Level", "Bacteria2_ID", "Bacteria2_Level"),
timevar = "Sample",
direction="wide")
所需结果列:
EweID | DateSampled | UdderHalf | Bacteria1_ID.BC1 | Bacteria1_Level.BC1| Bacteria1_ID.BC2 | Bacteria1_Level.BC2 | Bacteria2_ID.BC1 | Bacteria2_Level.BC1| Bacteria2_ID.BC2 | Bacteria2_Level.BC2 |
相反,Bacteria2_Level列没有转换为宽格式,我得到的列如下:
EweID | DateSampled | UdderHalf | Bacteria2_Level | Bacteria1_ID.BC1 | Bacteria1_Level.BC1| Bacteria1_ID.BC2 | Bacteria1_Level.BC2 | Bacteria2_ID.BC1 | Bacteria2_ID.BC2 |
以下是重新创建数据帧的代码:
Data <- data.frame(matrix(ncol = 8, nrow = 20))
colnames(DataN) <- c('EweID', 'DateSampled', 'Sample', 'UdderHalf', 'Bacteria1_ID', 'Bacteria1_Level', 'Bacteria2_ID', 'Bacteria2_Level')
Data$EweID <- c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10)
Data$DateSampled <- as.Date(c("2021-10-13", "2021-10-13", "2021-10-20", "2021-10-20", "2021-10-27", "2021-10-27", "2021-11-03", "2021-11-03", "2021-11-10", "2021-11-10", "2021-11-17", "2021-11-17", "2021-11-24", "2021-11-24", "2021-12-01", "2021-12-01", "2021-10-13", "2021-10-13", "2021-10-20", "2021-10-20"))
Data$Sample <- c("BC2", "BC1","BC2", "BC1","BC2", "BC1","BC2", "BC1","BC2", "BC1","BC2", "BC1","BC2", "BC1","BC2", "BC1","BC2", "BC1","BC2", "BC1")
Data$UdderHalf <- c("L","L","L","L","L","L","L","L","L","L","L","L","L","L","L","L","R","R","R", "R")
Data$Bacteria1_ID <- c("No Growth", "Staph Auerus", "Staph Equorum", "Staph Oralis", "No Growth","No Growth", "No Growth", "No Growth", "No Growth",NA, NA, NA, "Staph Sp", "Staph Auerus", "Staph Oralis", NA, NA, NA,"No Growth","No Growth")
Data$Bacteria1_Level <- c(NA, 3, 4, 2, NA, NA, NA, NA, NA, NA, NA, NA, 2, 1, 4, NA, NA, NA,NA,NA)
Data$Bacteria2_ID <- c("No Growth", "Staph Auerus", "Staph Sp", NA, NA, NA, "Staph Aureus", "No Growth", NA, "No Growth", "No Growth", "No Growth", "No Growth", "No Growth", NA, "Staph Sp", "Staph Aureus", NA, NA, NA )
Data$Bacteria2_Level <- c(NA, 4, 1, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA, NA, NA, 2, 1, NA, NA, NA)
我感谢你的帮助。非常感谢。
似乎你所缺少的只是"Bacteria2_Level";来自v.names参数。
MDFSO <- reshape(
data=Data,
idvar= c("EweID","DateSampled", "UdderHalf"),
v.names = c("Bacteria1_ID","Bacteria1_Level", "Bacteria2_ID", "Bacteria2_Level"),
timevar = "Sample",
direction="wide"
)
使用tidyr包中的pivot_wider()
函数更容易实现。
library(tidyr)
pivot_wider(Data,
id_cols=c(EweID, DateSampled, UdderHalf),
names_from = Sample,
values_from = c(Bacteria1_ID, Bacteria1_Level, Bacteria2_ID, Bacteria2_Level))
EweID DateSampled UdderHalf Bacteria1_ID_BC2 Bacteria1_ID_BC1 Bacteria1_Level_BC2 Bacteria1_Level_BC1 Bacteria2_ID_BC2 Bacteria2_ID_BC1 Bacteria2_Level_BC2 Bacteria2_Level_BC1
<dbl> <date> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
1 1 2021-10-13 L No Growth Staph Auerus NA 3 No Growth Staph Auerus NA 4
2 2 2021-10-20 L Staph Equorum Staph Oralis 4 2 Staph Sp NA 1 NA
3 3 2021-10-27 L No Growth No Growth NA NA NA NA NA NA
4 4 2021-11-03 L No Growth No Growth NA NA Staph Aureus No Growth 2 NA
5 5 2021-11-10 L No Growth NA NA NA NA No Growth NA NA
...
要重新排列列:
answer[, c("EweID", "DateSampled", "UdderHalf", "Bacteria1_ID_BC1", "Bacteria1_Level_BC1", "Bacteria1_ID_BC2", "Bacteria1_Level_BC2", "Bacteria2_ID_BC1", "Bacteria2_Level_BC1", "Bacteria2_ID_BC2", "Bacteria2_Level_BC2")]