如何在 R 的纵向数据中添加带有条件的新变量



在下面的数据中,我想添加另一个变量说z.

mydata
y  x  sl
1   199.92989  1   1
2    27.73883  2   1
3   144.00000  3   1
4    72.00000  4   1
5     0.00000  5   1
6   392.60636  1   2
7   749.52499  2   2
8  3120.00000  3   2
9  1600.00000  4   2
10 1000.00000  5   2
11 5840.00000  6   2
12 3960.00000  7   2
13 4700.00000  8   2
14 1660.00000  9   2
15 5620.00000 10   2
16    0.00000  1 585
17    0.00000  2 585
18    0.00000  3 585
19 3062.32962  1 587
20 2048.97458  2 587
21 1280.00000  3 587
22 1440.00000  4 587
23 2960.00000  5 587
24  460.00000  6 587
25  530.00000  7 587
26 5190.00000  8 587
27 3200.00000  9 587
28 4620.00000 10 587
29    0.00000  1 651
30    0.00000  2 651
31    0.00000  3 651
32    0.00000  4 651

z=c(5,7,8),值5应重复5次,属于sl=17应重复10次,属于sl=28应重复10次,属于sl=587。如果y的所有观察都是为了0任何sl585651,那么z必须取值0z列必须如下所示z=c(rep(5,5), rep(7,10), rep(0,3), rep(8,10), rep(0,4))=c(5 5 5 5 5 7 7 7 7 7 7 7 7 7 7 0 0 0 8 8 8 8 8 8 8 8 8 8 0 0 0 0)

在上述条件下我该怎么做?

我们可以使用dplyr中的case_when并指定条件。

library(dplyr)
df %>%
mutate(z = case_when(sl == 1 ~ 5, 
sl == 2 ~ 7,
sl == 587 ~ 8, 
all(y[sl == 585] == 0) ~ 0, 
all(y[sl == 651] == 0) ~ 0))

返回:

#            y  x  sl z
#1   199.92989  1   1 5
#2    27.73883  2   1 5
#3   144.00000  3   1 5
#4    72.00000  4   1 5
#5     0.00000  5   1 5
#6   392.60636  1   2 7
#7   749.52499  2   2 7
#8  3120.00000  3   2 7
#9  1600.00000  4   2 7
#10 1000.00000  5   2 7
#11 5840.00000  6   2 7
#12 3960.00000  7   2 7
#13 4700.00000  8   2 7
#14 1660.00000  9   2 7
#15 5620.00000 10   2 7
#16    0.00000  1 585 0
#17    0.00000  2 585 0
#18    0.00000  3 585 0
#19 3062.32962  1 587 8
#20 2048.97458  2 587 8
#21 1280.00000  3 587 8
#22 1440.00000  4 587 8
#23 2960.00000  5 587 8
#24  460.00000  6 587 8
#25  530.00000  7 587 8
#26 5190.00000  8 587 8
#27 3200.00000  9 587 8
#28 4620.00000 10 587 8
#29    0.00000  1 651 0
#30    0.00000  2 651 0
#31    0.00000  3 651 0
#32    0.00000  4 651 0

如果我们不知道哪个sl会全部为 0,或者有多个这样的sl我们可以使用

df %>%
mutate(z = case_when(sl == 1 ~ 5, 
sl == 2 ~ 7,
sl == 587 ~ 8)) %>%
group_by(sl) %>%
mutate(z = replace(z, all(y == 0), 0)) 

数据

df <- structure(list(y = c(199.92989, 27.73883, 144, 72, 0, 392.60636, 
749.52499, 3120, 1600, 1000, 5840, 3960, 4700, 1660, 5620, 0, 
0, 0, 3062.32962, 2048.97458, 1280, 1440, 2960, 460, 530, 5190, 
3200, 4620, 0, 0, 0, 0), x = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L), sl = c(1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 585L, 585L, 585L, 
587L, 587L, 587L, 587L, 587L, 587L, 587L, 587L, 587L, 587L, 651L, 
651L, 651L, 651L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32"))

相关内容

  • 没有找到相关文章

最新更新