获取变量随时间的累积比例

  • 本文关键字:变量 时间 获取 r
  • 更新时间 :
  • 英文 :


我试图获得一个变量(性别;Sf_sex)跨越时间(startday;天).

我的代码如下

with(df,
cbind( Freq=table(startday, sf_sex),
Cumul =cumsum(table(startday, sf_sex)),
relative = prop.table(table(startday, sf_sex),1)))

我得到的结果在某种程度上是正确的,但不完全是我想看到的每天的累积比例,我得到的是每天情况的比例。所以,换一种方式来说——每天和所有之前的总和的情况,并且有它的比例,而不是只考虑一天的比例,而不考虑其他的。

数据

df <- structure(list(sf_sex = c("Female", "Male", "Male", "Female", 
"Male", "Female", "Female", "Female", "Male", "Male", "Female", 
"Male", "Male", "Male", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Male", "Male", "Male", "Male", 
"Female", "Female", "Male", "Male", "Female", "Male", "Male", 
"Male", "Male", "Male", "Male", "Female", "Male", "Female", "Female", 
"Male", "Male", "Male", "Female", "Male", "Male", "Male", "Female", 
"Male", "Male", "Female", "Female", "Female", "Female", "Male", 
"Female", "Female", "Female", "Female", "Male", "Female", "Female", 
"Male", "Female", "Female", "Male", "Female", "Female", "Male", 
"Female", "Male", "Female", "Male", "Female", "Male", "Male", 
"Male", "Male", "Female", "Female", "Male", "Female", "Male", 
"Female", "Female", "Female", "Male", "Female", "Male", "Male", 
"Female", "Female", "Male", "Female", "Female", "Female", "Male", 
"Male", "Female", "Male", "Female", "Female", "Female", "Female", 
"Male", "Female", "Male", "Female", "Female", "Female", "Female", 
"Female", "Female", "Male", "Male", "Female", "Male", "Male", 
"Female", "Male", "Male", "Male", "Female", "Male", "Male", "Male", 
"Female", "Male", "Male", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Male", "Female", 
"Male", "Male", "Female", "Female", "Female", "Female", "Male", 
"Male", "Male", "Female", "Female", "Female", "Male", "Female", 
"Male", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Male", "Male", "Male", "Female", "Male", "Female", 
"Female", "Male", "Female", "Male", "Male", "Male", "Male", "Female", 
"Male", "Female", "Female", "Male", "Male", "Female", "Female", 
"Female", "Female", "Male", "Female", "Female", "Male", "Male", 
"Female", "Male", "Female", "Female", "Male", "Male", "Female", 
"Female", "Male", "Female"), startday = c("05/27/2019", "06/12/2019", 
"05/20/2019", "06/16/2019", "05/25/2019", "05/20/2019", "06/16/2019", 
"05/20/2019", "05/20/2019", "05/20/2019", "05/25/2019", "05/20/2019", 
"05/21/2019", "06/28/2019", "05/27/2019", "05/20/2019", "05/21/2019", 
"05/20/2019", "05/24/2019", "05/20/2019", "06/19/2019", "05/23/2019", 
"05/20/2019", "05/20/2019", "07/04/2019", "06/14/2019", "06/08/2019", 
"06/03/2019", "06/13/2019", "06/04/2019", "06/15/2019", "05/20/2019", 
"06/17/2019", "06/03/2019", "06/03/2019", "06/11/2019", "05/20/2019", 
"06/04/2019", "05/22/2019", "05/20/2019", "05/20/2019", "06/08/2019", 
"05/20/2019", "05/21/2019", "05/20/2019", "05/27/2019", "05/27/2019", 
"05/22/2019", "05/21/2019", "05/21/2019", "06/18/2019", "06/05/2019", 
"05/20/2019", "05/20/2019", "06/09/2019", "05/28/2019", "05/28/2019", 
"06/24/2019", "05/28/2019", "05/27/2019", "06/05/2019", "05/20/2019", 
"06/10/2019", "05/20/2019", "05/30/2019", "05/21/2019", "05/20/2019", 
"05/20/2019", "05/31/2019", "06/16/2019", "05/28/2019", "05/25/2019", 
"06/04/2019", "06/02/2019", "06/04/2019", "06/05/2019", "06/03/2019", 
"05/25/2019", "05/26/2019", "05/28/2019", "06/30/2019", "05/21/2019", 
"06/03/2019", "05/21/2019", "06/16/2019", "06/04/2019", "05/24/2019", 
"06/04/2019", "05/31/2019", "06/06/2019", "05/27/2019", "05/30/2019", 
"06/01/2019", "06/06/2019", "05/20/2019", "05/22/2019", "06/15/2019", 
"06/03/2019", "05/21/2019", "06/18/2019", "06/27/2019", "05/21/2019", 
"05/30/2019", "05/22/2019", "05/25/2019", "06/06/2019", "06/05/2019", 
"06/09/2019", "05/23/2019", "05/21/2019", "06/15/2019", "06/14/2019", 
"05/20/2019", "06/15/2019", "05/24/2019", "05/22/2019", "05/20/2019", 
"05/23/2019", "05/21/2019", "05/24/2019", "05/22/2019", "06/22/2019", 
"06/06/2019", "05/20/2019", "05/20/2019", "05/28/2019", "05/23/2019", 
"05/20/2019", "06/04/2019", "06/21/2019", "06/26/2019", "05/24/2019", 
"05/22/2019", "06/05/2019", "06/06/2019", "05/23/2019", "05/26/2019", 
"05/26/2019", "06/04/2019", "07/02/2019", "05/20/2019", "05/20/2019", 
"05/20/2019", "06/04/2019", "05/20/2019", "05/23/2019", "06/05/2019", 
"05/20/2019", "05/27/2019", "05/24/2019", "06/13/2019", "06/21/2019", 
"06/10/2019", "05/20/2019", "06/13/2019", "06/05/2019", "05/23/2019", 
"06/04/2019", "05/20/2019", "05/20/2019", "05/28/2019", "06/29/2019", 
"06/17/2019", "06/03/2019", "06/03/2019", "06/02/2019", "06/04/2019", 
"05/26/2019", "06/03/2019", "06/22/2019", "05/20/2019", "06/03/2019", 
"05/22/2019", "05/23/2019", "06/03/2019", "05/29/2019", "05/28/2019", 
"07/02/2019", "06/07/2019", "06/21/2019", "06/01/2019", "06/03/2019", 
"05/21/2019", "05/30/2019", "06/07/2019", "06/04/2019", "05/20/2019", 
"06/17/2019", "06/06/2019", "05/20/2019", "05/26/2019", "05/21/2019", 
"06/03/2019", "06/03/2019", "06/04/2019", "06/16/2019", "05/22/2019", 
"07/03/2019", "05/20/2019", "05/21/2019")), row.names = c("16573", 
"2114", "17632", "14249", "5169", "20505", "22268", "11519", 
"8500", "14597", "15830", "5964", "24088", "6259", "1345", "24484", 
"10225", "24985", "12584", "19627", "24278", "10814", "10559", 
"18905", "23512", "17167", "11760", "23947", "1003", "16229", 
"15348", "14627", "24806", "12419", "18681", "13595", "9228", 
"16354", "18363", "20331", "21097", "23457", "22440", "9176", 
"9862", "3682", "13423", "25536", "20187", "8217", "6137", "8648", 
"6552", "4859", "18014", "9015", "2624", "12437", "23884", "20404", 
"3365", "3291", "23731", "15372", "24447", "11009", "16533", 
"22990", "22145", "25122", "8335", "7527", "16011", "16865", 
"12429", "4709", "3269", "19721", "5001", "11731", "7933", "18174", 
"16398", "3432", "2890", "5792", "4057", "15877", "20939", "15928", 
"15896", "23313", "9982", "20427", "4510", "14587", "17223", 
"7665", "14281", "594", "7076", "23310", "609", "20217", "8099", 
"18773", "4546", "10367", "18237", "22110", "9658", "6909", "12047", 
"24545", "5082", "13545", "8783", "10961", "20754", "7086", "15179", 
"4822", "20599", "23359", "23749", "22878", "11783", "11278", 
"3232", "19277", "18045", "1862", "1503", "19801", "18922", "22789", 
"3673", "12472", "14335", "17846", "1269", "9050", "15449", "20114", 
"9692", "5020", "24011", "10208", "411", "2741", "10972", "23409", 
"17518", "12153", "16689", "22623", "13604", "16199", "12978", 
"24524", "20858", "21581", "20166", "18741", "7929", "22840", 
"14782", "16208", "11057", "5126", "9278", "10843", "19346", 
"8898", "15046", "12816", "1714", "375", "23216", "8672", "11015", 
"2847", "5564", "23642", "5673", "20655", "4787", "9709", "4399", 
"11853", "6448", "7210", "2195", "1176", "4342", "5421", "12508", 
"23105", "1505", "17312"), class = "data.frame")

Thanks in advance

如果我理解正确的话

library(reshape2)
df2=dcast(df,startday~sf_sex,length)
df2[,c("FemaleC","MaleC")]=cumsum(df2[,c("Female","Male")])
df2[,c("FemaleC","MaleC")]/rowSums(df2[,c("FemaleC","MaleC")])

看起来像这样(前10行)

startday Female Male FemaleC MaleC   FemaleP     MaleP
1  05/20/2019     22   19      22    19 0.5365854 0.4634146
2  05/21/2019     10    5      32    24 0.5714286 0.4285714
3  05/22/2019      7    2      39    26 0.6000000 0.4000000
4  05/23/2019      5    3      44    29 0.6027397 0.3972603
5  05/24/2019      3    3      47    32 0.5949367 0.4050633
6  05/25/2019      2    3      49    35 0.5833333 0.4166667
7  05/26/2019      5    0      54    35 0.6067416 0.3932584
8  05/27/2019      3    4      57    39 0.5937500 0.4062500
9  05/28/2019      5    3      62    42 0.5961538 0.4038462
10 05/29/2019      1    0      63    42 0.6000000 0.4000000

相关内容

  • 没有找到相关文章