在 R 中,我是否可以在所有波次中为满足几个特定波次中确定的标准的唯一 ID 创建治疗标识符?



慢慢提高我的R技能/我正在分析多达10波的调查数据。某些好处被切断,只存在于第1-3波中(10波中)。我希望那些在前3个波中受益的人成为我的治疗对象,并为超过1-3个波的个人提供标识符。

我可以创建一个虚拟变量,表示一个人是否在第1-3波中获得了好处,但我需要找到一种方法来创建治疗。

实际上,我想要这个特定的虚拟变量的唯一ID,在所有波中都有一个虚拟标识符甚至波浪后的效益被削减。

简化的例子:

wave <- c("wave1","wave2","wave3","wave4","wave1","wave2","wave3","wave4")
personal_ID <- c(101,101,101,101,201,201,201,201)
benefit_dummy_by_wave <- c(1,1,1,0,1,1,0,0)
df <- data.frame(personal_ID, benefit_dummy_by_wave, wave)

personal_ID benefit_dummy_by_wave  wave
1         101                     1 wave1
2         101                     1 wave2
3         101                     1 wave3
4         101                     0 wave4
5         201                     1 wave1
6         201                     1 wave2
7         201                     0 wave3
8         201                     0 wave4

我想要这样的东西

personal_ID benefit_dummy_by_wave  wave benefit_1_through_3
1         101                     1 wave1                   1
2         101                     1 wave2                   1
3         101                     1 wave3                   1
4         101                     0 wave4                   1
5         201                     1 wave1                   0
6         201                     1 wave2                   0
7         201                     0 wave3                   0
8         201                     0 wave4                   0
如果有人有任何想法,我真的很感谢你的帮助。真的很高兴这个社区的存在!

不确定我是否理解对了你的问题。您可以使用dplyr:

library(dplyr)
df %>% 
group_by(personal_ID) %>% 
arrange(personal_ID, wave) %>% 
mutate(benefit_1_through_3 = sum(wave == "wave3" & cumsum(benefit_dummy_by_wave) == 3)) %>% 
ungroup()

这返回

# A tibble: 8 x 4
personal_ID benefit_dummy_by_wave wave  benefit_1_through_3
<dbl>                 <dbl> <chr>               <int>
1         101                     1 wave1                   1
2         101                     1 wave2                   1
3         101                     1 wave3                   1
4         101                     0 wave4                   1
5         201                     1 wave1                   0
6         201                     1 wave2                   0
7         201                     0 wave3                   0
8         201                     0 wave4                   0

library(tidyverse)
wave <- c(1,2,3,4,1,2,3,4)
personal_ID <- c(101,101,101,101,201,201,201,201)
benefit_dummy_by_wave <- c(1,1,1,0,1,1,0,0)
data <- tibble(wave, personal_ID, benefit_dummy_by_wave)
data
#> # A tibble: 8 x 3
#>    wave personal_ID benefit_dummy_by_wave
#>   <dbl>       <dbl>                 <dbl>
#> 1     1         101                     1
#> 2     2         101                     1
#> 3     3         101                     1
#> 4     4         101                     0
#> 5     1         201                     1
#> 6     2         201                     1
#> 7     3         201                     0
#> 8     4         201                     0
benefit_1_through_3_data <-
data %>%
group_by(personal_ID) %>%
pivot_wider(names_from = wave, values_from = benefit_dummy_by_wave, names_prefix = "wave") %>%
mutate(benefit_1_through_3 = as.integer(
wave1 == 1 & wave2 == 1 & wave3 == 1
)) %>%
select(personal_ID, benefit_1_through_3)
benefit_1_through_3_data
#> # A tibble: 2 x 2
#> # Groups:   personal_ID [2]
#>   personal_ID benefit_1_through_3
#>         <dbl>               <int>
#> 1         101                   1
#> 2         201                   0
data %>% left_join(benefit_1_through_3_data)
#> Joining, by = "personal_ID"
#> # A tibble: 8 x 4
#>    wave personal_ID benefit_dummy_by_wave benefit_1_through_3
#>   <dbl>       <dbl>                 <dbl>               <int>
#> 1     1         101                     1                   1
#> 2     2         101                     1                   1
#> 3     3         101                     1                   1
#> 4     4         101                     0                   1
#> 5     1         201                     1                   0
#> 6     2         201                     1                   0
#> 7     3         201                     0                   0
#> 8     4         201                     0                   0

由reprex包(v2.0.1)在2018-10-18上创建

这是另一种方法

library(dplyr)
check_waves <- paste0('wave', 1:3)
df %>%
group_by(personal_ID) %>%
mutate(benefit_1_through_3 = as.integer(all(check_waves %in% wave & 
benefit_dummy_by_wave[match(check_waves, wave)] == 1))) %>%
ungroup()
#  personal_ID benefit_dummy_by_wave wave  benefit_1_through_3
#        <dbl>                 <dbl> <chr>               <int>
#1         101                     1 wave1                   1
#2         101                     1 wave2                   1
#3         101                     1 wave3                   1
#4         101                     0 wave4                   1
#5         201                     1 wave1                   0
#6         201                     1 wave2                   0
#7         201                     0 wave3                   0
#8         201                     0 wave4                   0

我们也可以这样做

library(dplyr)
library(stringr)
df %>% 
group_by(personal_ID) %>% 
mutate(benefit_1_through_3  = +(all(str_c('wave', 1:3) %in% 
wave[as.logical(benefit_dummy_by_wave)]))) %>%
ungroup
# A tibble: 8 × 4
personal_ID benefit_dummy_by_wave wave  benefit_1_through_3
<dbl>                 <dbl> <chr>               <int>
1         101                     1 wave1                   1
2         101                     1 wave2                   1
3         101                     1 wave3                   1
4         101                     0 wave4                   1
5         201                     1 wave1                   0
6         201                     1 wave2                   0
7         201                     0 wave3                   0
8         201                     0 wave4                   0

相关内容

最新更新