r-将宽格式正/负数据重塑为长格式二进制结果

  • 本文关键字:格式 二进制 结果 数据 r reshape
  • 更新时间 :
  • 英文 :


我有数据可以查看特定农场小母牛的妊娠测试结果。

例如。

Farm   Breed    Pregnant  Empty  Total  ADG
1      J        5         3      8      12
2      F        2         1      3      10
3      J        3         0      3      11

我希望怀孕和空数据变成二进制输出,而其他值只是得到重复的

例如。

Farm   Breed  Pregnant  ADG
1      J      1         12
1      J      1         12
1      J      1         12
1      J      1         12
1      J      1         12
1      J      0         12
1      J      0         12
1      J      0         12
2      F      1         10
2      F      1         10
2      F      0         10

我一直在尝试使用整形代码,但很困惑,想知道是否有人能帮我。

我建议将"data.table"包与rep结合使用。您希望用PregnantEmpty列中表示的数字重复值10,因此可以轻松地为此创建rep语句。默认情况下,行将按用于容纳新数据的key"展开"。

假设您从一个名为"mydf"的data.frame开始,请尝试:

library(data.table)
as.data.table(mydf)[, list(Pregnant = rep(c(1, 0), c(Pregnant, Empty))), 
                    by = list(Farm, Breed, ADG)]
#     Farm Breed ADG Pregnant
#  1:    1     J  12        1
#  2:    1     J  12        1
#  3:    1     J  12        1
#  4:    1     J  12        1
#  5:    1     J  12        1
#  6:    1     J  12        0
#  7:    1     J  12        0
#  8:    1     J  12        0
#  9:    2     F  10        1
# 10:    2     F  10        1
# 11:    2     F  10        0
# 12:    3     J  11        1
# 13:    3     J  11        1
# 14:    3     J  11        1

我能想到的最接近基本R的方法是首先"扩展"源数据集的相关列(很容易,使用rep),然后使用apply来获得"怀孕"的二进制结果,如下所示:

cbind(mydf[rep(rownames(mydf), mydf$Total), 
           c("Farm", "Breed", "ADG")], 
      Pregnant = unlist(apply(mydf[c("Pregnant", "Empty")], 1, 
                              function(x) rep(c(1, 0), c(x[1], x[2])))))
#     Farm Breed ADG Pregnant
# 1      1     J  12        1
# 1.1    1     J  12        1
# 1.2    1     J  12        1
# 1.3    1     J  12        1
# 1.4    1     J  12        1
# 1.5    1     J  12        0
# 1.6    1     J  12        0
# 1.7    1     J  12        0
# 2      2     F  10        1
# 2.1    2     F  10        1
# 2.2    2     F  10        0
# 3      3     J  11        1
# 3.1    3     J  11        1
# 3.2    3     J  11        1

以下内容不言自明:

# repeat column values taking one row at a time and convert outcome list to a matrices:
> m1 = matrix(unlist(apply(ddf[,c(1,2,3,6)], 1, function(x) rep(c(x[c(1,2,4)],1), x[3]))), nrow=4)
> m2 = matrix(unlist(apply(ddf[,c(1,2,4,6)], 1, function(x) rep(c(x[c(1,2,4)],0), x[3]))), nrow=4)
> 
> 
> m1
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] "1"  "1"  "1"  "1"  "1"  "2"  "2"  "3"  "3"  "3"  
[2,] "J"  "J"  "J"  "J"  "J"  "F"  "F"  "J"  "J"  "J"  
[3,] "12" "12" "12" "12" "12" "10" "10" "11" "11" "11" 
[4,] "1"  "1"  "1"  "1"  "1"  "1"  "1"  "1"  "1"  "1"  
> m2
     [,1] [,2] [,3] [,4]
[1,] "1"  "1"  "1"  "2" 
[2,] "J"  "J"  "J"  "F" 
[3,] "12" "12" "12" "10"
[4,] "0"  "0"  "0"  "0" 
# combine, transpose and rename columns of matrices:
> mm = t(cbind(m1, m2))
> colnames(mm) = c('Farm','Breed','ADG','Preg_empty')
> mm
      Farm Breed ADG  Preg_empty
 [1,] "1"  "J"   "12" "1"       
 [2,] "1"  "J"   "12" "1"       
 [3,] "1"  "J"   "12" "1"       
 [4,] "1"  "J"   "12" "1"       
 [5,] "1"  "J"   "12" "1"       
 [6,] "2"  "F"   "10" "1"       
 [7,] "2"  "F"   "10" "1"       
 [8,] "3"  "J"   "11" "1"       
 [9,] "3"  "J"   "11" "1"       
[10,] "3"  "J"   "11" "1"       
[11,] "1"  "J"   "12" "0"       
[12,] "1"  "J"   "12" "0"       
[13,] "1"  "J"   "12" "0"       
[14,] "2"  "F"   "10" "0"       

使用dplyr

library(dplyr)
library(tidyr)
 mydf %>%
   gather(Var, Val, Pregnant, Empty) %>% 
   mutate(N= row_number())%>%
   do(.[rep(.$N, .$Val),]) %>%
   select(Farm, Breed, ADG, Pregnant=Var) %>%
   mutate(Pregnant=c(1,0)[as.numeric(factor(Pregnant))]) %>%
   arrange(Farm, Breed)

给出结果

  #    Farm Breed ADG Pregnant
  #1     1     J  12        1
  #2     1     J  12        1
  #3     1     J  12        1
  #4     1     J  12        1
  #5     1     J  12        1
  #6     1     J  12        0
  #7     1     J  12        0
  #8     1     J  12        0
  #9     2     F  10        1
  #10    2     F  10        1
  #11    2     F  10        0
  #12    3     J  11        1
  #13    3     J  11        1
  #14    3     J  11        1

数据

 mydf <- structure(list(Farm = 1:3, Breed = c("J", "F", "J"), Pregnant = c(5L, 
 2L, 3L), Empty = c(3L, 1L, 0L), Total = c(8L, 3L, 3L), ADG = c(12L, 
 10L, 11L)), .Names = c("Farm", "Breed", "Pregnant", "Empty", 
 "Total", "ADG"), class = "data.frame", row.names = c(NA, -3L))

尝试melt

require(reshape2)
newdata<-melt(data[c(1:3,6)],id=c("Farm","Breed","Pregnant"))

最新更新