r语言 - 时间序列数据的百分比变化差异



我想计算变量 'wt' 和 'wc' 在时间 1、时间 2 和时间 3 从时间 = 1 开始的百分比变化。在时间 1 时,它将为 0。在时间 2 时,它看起来像 t2 = t2-t1/t1*100 时 'wt' 的百分比变化,而 t3 它应该看起来像 'wt' = t3-t1/t1*100。然后,我想将其作为新变量添加到现有的 excel 数据表中。我尝试寻找其他示例,但没有一个与我的数据格式匹配。谢谢!

structure(list(code = c(100, 100, 100, 101, 101, 101, 102, 102, 
102), treatment = c(1, 1, 1, 2, 2, 2, 1, 1, 1), time = c(1, 2, 
3, 1, 2, 3, 1, 2, 3), wt = c(80, 78, 76, 75, 74, 74, 78, 74, 
72), wc = c(90, 89, 87, 92, 91, 90, 89, 86, 84)), .Names = c("code", 
"treatment", "time", "wt", "wc"), row.names = c(NA, -9L), 
class =c("tbl_df", 
"tbl", "data.frame"))

我试图遵循下面的建议。但是我收到错误

> data <- read.csv("All Data with BMI and other tweaks.csv", header = TRUE, na.strings = ".", stringsAsFactors = FALSE)
> names(data)
 [1] "code"           "treatment"      "age"            "sex"           
 [5] "time"           "bicep"          "tricep"         "subscapular"   
 [9] "suprailiac"     "weight"         "pwc"            "wc"            
[13] "bia"            "height"         "bmi"            "wthr"          
[17] "density"        "X.fat"          "fm"             "ffm"           
[21] "dietary.recall" "reportingdate"  "NumFoods"       "NumCodes"      
[25] "kcal"           "prot"           "tfat"           "carb"          
[29] "mois"           "alc"            "caff"           "theo"          
[33] "sugr"           "fibe"           "calc"           "iron"          
[37] "magn"           "phos"           "pota"           "sodi"          
[41] "zinc"           "copp"           "sele"           "vc"            
[45] "vb1"            "vb2"            "niac"           "vb6"           
[49] "fola"           "fa"             "ff"             "fdfe"          
[53] "vb12"           "vara"           "ret"            "bcar"          
[57] "acar"           "cryp"           "lyco"           "lz"            
[61] "atoc"           "vk"             "vitd"           "choln"         
[65] "chole"          "sfat"           "s040"           "s060"          
[69] "s080"           "s100"           "s120"           "s140"          
[73] "s160"           "s180"           "mfat"           "m161"          
[77] "m181"           "m201"           "m221"           "pfat"          
[81] "p182"           "p183"           "p184"           "p204"          
[85] "p205"           "p225"           "p226"           "vite_add"      
[89] "b12_add"        "datacomp"      
> library(dplyr)
> data <- data %>%
+ group_by(code) %>%
+ mutate(wt.pch = (data$weight - data$weight[1]) / data$weight * 100, wc.pch = (data$wc - data$wc[1]) / data$wc[1] * 100)
Error in mutate_impl(.data, dots) : 
  Column `wt.pch` must be length 3 (the group size) or one, not 114

这里有一种方法:

library(dplyr)
df %>% group_by(code) %>% mutate(wt.pch = (wt - wt[1]) / wt[1] * 100, 
                                 wc.pch = (wc - wc[1]) / wc[1] * 100)
# A tibble: 9 x 7
# Groups:   code [3]
#    code treatment  time    wt    wc wt.pch wc.pch
#   <dbl>     <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>
# 1   100      1.00  1.00  80.0  90.0   0      0   
# 2   100      1.00  2.00  78.0  89.0  -2.50  -1.11
# 3   100      1.00  3.00  76.0  87.0  -5.00  -3.33
# 4   101      2.00  1.00  75.0  92.0   0      0   
# 5   101      2.00  2.00  74.0  91.0  -1.33  -1.09
# 6   101      2.00  3.00  74.0  90.0  -1.33  -2.17
# 7   102      1.00  1.00  78.0  89.0   0      0   
# 8   102      1.00  2.00  74.0  86.0  -5.13  -3.37
# 9   102      1.00  3.00  72.0  84.0  -7.69  -5.62

你可以尝试做:

df %>% 
    group_by(code) %>% 
    mutate(pct_change_wt = ((wt - lag(wt))/ lag(wt)) * 100,
           pct_change_wc = ((wc - lag(wc))/ lag(wc)) * 100)
print(df)
   code treatment  time    wt    wc pct_change_wt pct_change_wc
  <dbl>     <dbl> <dbl> <dbl> <dbl>         <dbl>         <dbl>
1   100      1.00  1.00  80.0  90.0         NA            NA   
2   100      1.00  2.00  78.0  89.0        - 2.50        - 1.11
3   100      1.00  3.00  76.0  87.0        - 2.56        - 2.25
4   101      2.00  1.00  75.0  92.0         NA            NA   
5   101      2.00  2.00  74.0  91.0        - 1.33        - 1.09
6   101      2.00  3.00  74.0  90.0          0           - 1.10
7   102      1.00  1.00  78.0  89.0         NA            NA   
8   102      1.00  2.00  74.0  86.0        - 5.13        - 3.37
9   102      1.00  3.00  72.0  84.0        - 2.70        - 2.33

解释:

1. 代码group_by确保我们计算每个组的百分比变化。
2. lag函数在每个组中取前一个值.

最新更新