r语言 - 如何更改列相对于 3 组的值



我有这个数据

SAMPN MODE1 HHVEH PERNO PLANO loop
30    23     2     3     1    25    2
31    23     1     3     2     2    2
32    23     2     3     2     5    2
33    24     1     1     1     2    2
34    24     1     1     1     3    2
35    24     1     1     1     4    3
36    24     1     1     1     5    3
37    24     2     1     2     2    2
38    24     3     1     2     4    2
39    25     2     2     1     2    2
40    25     2     2     1     4    2
41    25     2     2     2     2    2
42    25     2     2     2     3    2
43    27     4     1     1     2    2
44    29     1     0     1     2    2
45    29     1     0     1     3    2

我想做两件事:

1- SAMPN是每个家庭中每个人的家庭和PERNO指数。PLANO是每个人的旅行,循环是每个人的旅行。(每次旅行都有一些旅行(。和每次行程的模式 1 模式。

如果 MODE1==2,我希望相同的 SAMPN、PERNO 和循环的模式也是 2。

dput(r[30:45,1:6])
structure(list(SAMPN = c("   23", "   23", "   23", "   24", 
"   24", "   24", "   24", "   24", "   24", "   25", "   25", 
"   25", "   25", "   27", "   29", "   29"), MODE1 = structure(c(2L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 1L, 1L), .Label = c("1", 
"2", "3", "4"), class = "factor"), HHVEH = structure(c(4L, 4L, 
4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 1L, 1L), .Label = c("0", 
"1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"), PERNO = structure(c(1L, 
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("1", 
"2", "3", "4", "5", "6", "7"), class = "factor"), PLANO = structure(c(20L, 
1L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 1L, 2L), .Label = c(" 2", 
" 3", " 4", " 5", " 6", " 7", " 8", " 9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "20", "23", "25", "29"), class = "factor"), 
loop = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2", "3", "4", "5", 
"6", "7", "8"), class = "factor")), row.names = 30:45, class = "data.frame")

输出:

SAMPN MODE1 HHVEH PERNO PLANO loop
30    23     2     3     1    25    2
31    23     2     3     2     2    2
32    23     2     3     2     5    2
33    24     1     1     1     2    2
34    24     1     1     1     3    2
35    24     1     1     1     4    3
36    24     1     1     1     5    3
37    24     2     1     2     2    2
38    24     2     1     2     4    2
39    25     2     2     1     2    2
40    25     2     2     1     4    2
41    25     2     2     2     2    2
42    25     2     2     2     3    2
43    27     4     1     1     2    2
44    29     1     0     1     2    2
45    29     1     0     1     3    2

当 SAMP 为 23 且 PERNO=2 且循环 = 2(第二行(时,由于第三列,1 应为 2。 第 38 行相同。

我们可以使用case_when. 按"SAMPN"、"PERNO"分组,检查"模式 1"中是否有any2,然后返回 2 或返回"模式 1">

library(dplyr)
df1 %>%
group_by(SAMPN, PERNO, loop) %>%
mutate(MODE1 =  case_when(any(MODE1 == 2)~ 2L,
TRUE ~ as.integer(MODE1)))
# A tibble: 16 x 6
# Groups:   SAMPN, PERNO, loop [9]
#   SAMPN   MODE1 HHVEH PERNO PLANO loop 
#   <chr>   <int> <fct> <fct> <fct> <fct>
# 1 "   23"     2 3     1     25    2    
# 2 "   23"     2 3     2     " 2"  2    
# 3 "   23"     2 3     2     " 5"  2    
# 4 "   24"     1 1     1     " 2"  2    
# 5 "   24"     1 1     1     " 3"  2    
# 6 "   24"     1 1     1     " 4"  3    
# 7 "   24"     1 1     1     " 5"  3    
# 8 "   24"     2 1     2     " 2"  2    
# 9 "   24"     2 1     2     " 4"  2    
#10 "   25"     2 2     1     " 2"  2    
#11 "   25"     2 2     1     " 4"  2    
#12 "   25"     2 2     2     " 2"  2    
#13 "   25"     2 2     2     " 3"  2    
#14 "   27"     4 1     1     " 2"  2    
#15 "   29"     1 0     1     " 2"  2    
#16 "   29"     1 0     1     " 3"  2    

或使用data.table

library(data.table)
i1 <- setDT(df1)[, .I[any(MODE1 ==2)],.(SAMPN, PERNO, loop)]$V1
df1[i1, MODE1 := 2L][]

最新更新