我想计算变量 'wt' 和 'wc' 在时间 1、时间 2 和时间 3 从时间 = 1 开始的百分比变化。在时间 1 时,它将为 0。在时间 2 时,它看起来像 t2 = t2-t1/t1*100 时 'wt' 的百分比变化,而 t3 它应该看起来像 'wt' = t3-t1/t1*100。然后,我想将其作为新变量添加到现有的 excel 数据表中。我尝试寻找其他示例,但没有一个与我的数据格式匹配。谢谢!
structure(list(code = c(100, 100, 100, 101, 101, 101, 102, 102,
102), treatment = c(1, 1, 1, 2, 2, 2, 1, 1, 1), time = c(1, 2,
3, 1, 2, 3, 1, 2, 3), wt = c(80, 78, 76, 75, 74, 74, 78, 74,
72), wc = c(90, 89, 87, 92, 91, 90, 89, 86, 84)), .Names = c("code",
"treatment", "time", "wt", "wc"), row.names = c(NA, -9L),
class =c("tbl_df",
"tbl", "data.frame"))
我试图遵循下面的建议。但是我收到错误
> data <- read.csv("All Data with BMI and other tweaks.csv", header = TRUE, na.strings = ".", stringsAsFactors = FALSE)
> names(data)
[1] "code" "treatment" "age" "sex"
[5] "time" "bicep" "tricep" "subscapular"
[9] "suprailiac" "weight" "pwc" "wc"
[13] "bia" "height" "bmi" "wthr"
[17] "density" "X.fat" "fm" "ffm"
[21] "dietary.recall" "reportingdate" "NumFoods" "NumCodes"
[25] "kcal" "prot" "tfat" "carb"
[29] "mois" "alc" "caff" "theo"
[33] "sugr" "fibe" "calc" "iron"
[37] "magn" "phos" "pota" "sodi"
[41] "zinc" "copp" "sele" "vc"
[45] "vb1" "vb2" "niac" "vb6"
[49] "fola" "fa" "ff" "fdfe"
[53] "vb12" "vara" "ret" "bcar"
[57] "acar" "cryp" "lyco" "lz"
[61] "atoc" "vk" "vitd" "choln"
[65] "chole" "sfat" "s040" "s060"
[69] "s080" "s100" "s120" "s140"
[73] "s160" "s180" "mfat" "m161"
[77] "m181" "m201" "m221" "pfat"
[81] "p182" "p183" "p184" "p204"
[85] "p205" "p225" "p226" "vite_add"
[89] "b12_add" "datacomp"
> library(dplyr)
> data <- data %>%
+ group_by(code) %>%
+ mutate(wt.pch = (data$weight - data$weight[1]) / data$weight * 100, wc.pch = (data$wc - data$wc[1]) / data$wc[1] * 100)
Error in mutate_impl(.data, dots) :
Column `wt.pch` must be length 3 (the group size) or one, not 114
这里有一种方法:
library(dplyr)
df %>% group_by(code) %>% mutate(wt.pch = (wt - wt[1]) / wt[1] * 100,
wc.pch = (wc - wc[1]) / wc[1] * 100)
# A tibble: 9 x 7
# Groups: code [3]
# code treatment time wt wc wt.pch wc.pch
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 100 1.00 1.00 80.0 90.0 0 0
# 2 100 1.00 2.00 78.0 89.0 -2.50 -1.11
# 3 100 1.00 3.00 76.0 87.0 -5.00 -3.33
# 4 101 2.00 1.00 75.0 92.0 0 0
# 5 101 2.00 2.00 74.0 91.0 -1.33 -1.09
# 6 101 2.00 3.00 74.0 90.0 -1.33 -2.17
# 7 102 1.00 1.00 78.0 89.0 0 0
# 8 102 1.00 2.00 74.0 86.0 -5.13 -3.37
# 9 102 1.00 3.00 72.0 84.0 -7.69 -5.62
你可以尝试做:
df %>%
group_by(code) %>%
mutate(pct_change_wt = ((wt - lag(wt))/ lag(wt)) * 100,
pct_change_wc = ((wc - lag(wc))/ lag(wc)) * 100)
print(df)
code treatment time wt wc pct_change_wt pct_change_wc
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 100 1.00 1.00 80.0 90.0 NA NA
2 100 1.00 2.00 78.0 89.0 - 2.50 - 1.11
3 100 1.00 3.00 76.0 87.0 - 2.56 - 2.25
4 101 2.00 1.00 75.0 92.0 NA NA
5 101 2.00 2.00 74.0 91.0 - 1.33 - 1.09
6 101 2.00 3.00 74.0 90.0 0 - 1.10
7 102 1.00 1.00 78.0 89.0 NA NA
8 102 1.00 2.00 74.0 86.0 - 5.13 - 3.37
9 102 1.00 3.00 72.0 84.0 - 2.70 - 2.33
解释:
1. 代码group_by
确保我们计算每个组的百分比变化。
2. lag
函数在每个组中取前一个值.