慢慢提高我的R技能/我正在分析多达10波的调查数据。某些好处被切断,只存在于第1-3波中(10波中)。我希望那些在前3个波中受益的人成为我的治疗对象,并为超过1-3个波的个人提供标识符。
我可以创建一个虚拟变量,表示一个人是否在第1-3波中获得了好处,但我需要找到一种方法来创建治疗。
实际上,我想要这个特定的虚拟变量的唯一ID,在所有波中都有一个虚拟标识符甚至波浪后的效益被削减。
简化的例子:
wave <- c("wave1","wave2","wave3","wave4","wave1","wave2","wave3","wave4")
personal_ID <- c(101,101,101,101,201,201,201,201)
benefit_dummy_by_wave <- c(1,1,1,0,1,1,0,0)
df <- data.frame(personal_ID, benefit_dummy_by_wave, wave)
personal_ID benefit_dummy_by_wave wave
1 101 1 wave1
2 101 1 wave2
3 101 1 wave3
4 101 0 wave4
5 201 1 wave1
6 201 1 wave2
7 201 0 wave3
8 201 0 wave4
我想要这样的东西
personal_ID benefit_dummy_by_wave wave benefit_1_through_3
1 101 1 wave1 1
2 101 1 wave2 1
3 101 1 wave3 1
4 101 0 wave4 1
5 201 1 wave1 0
6 201 1 wave2 0
7 201 0 wave3 0
8 201 0 wave4 0
如果有人有任何想法,我真的很感谢你的帮助。真的很高兴这个社区的存在!不确定我是否理解对了你的问题。您可以使用dplyr
:
library(dplyr)
df %>%
group_by(personal_ID) %>%
arrange(personal_ID, wave) %>%
mutate(benefit_1_through_3 = sum(wave == "wave3" & cumsum(benefit_dummy_by_wave) == 3)) %>%
ungroup()
这返回
# A tibble: 8 x 4
personal_ID benefit_dummy_by_wave wave benefit_1_through_3
<dbl> <dbl> <chr> <int>
1 101 1 wave1 1
2 101 1 wave2 1
3 101 1 wave3 1
4 101 0 wave4 1
5 201 1 wave1 0
6 201 1 wave2 0
7 201 0 wave3 0
8 201 0 wave4 0
library(tidyverse)
wave <- c(1,2,3,4,1,2,3,4)
personal_ID <- c(101,101,101,101,201,201,201,201)
benefit_dummy_by_wave <- c(1,1,1,0,1,1,0,0)
data <- tibble(wave, personal_ID, benefit_dummy_by_wave)
data
#> # A tibble: 8 x 3
#> wave personal_ID benefit_dummy_by_wave
#> <dbl> <dbl> <dbl>
#> 1 1 101 1
#> 2 2 101 1
#> 3 3 101 1
#> 4 4 101 0
#> 5 1 201 1
#> 6 2 201 1
#> 7 3 201 0
#> 8 4 201 0
benefit_1_through_3_data <-
data %>%
group_by(personal_ID) %>%
pivot_wider(names_from = wave, values_from = benefit_dummy_by_wave, names_prefix = "wave") %>%
mutate(benefit_1_through_3 = as.integer(
wave1 == 1 & wave2 == 1 & wave3 == 1
)) %>%
select(personal_ID, benefit_1_through_3)
benefit_1_through_3_data
#> # A tibble: 2 x 2
#> # Groups: personal_ID [2]
#> personal_ID benefit_1_through_3
#> <dbl> <int>
#> 1 101 1
#> 2 201 0
data %>% left_join(benefit_1_through_3_data)
#> Joining, by = "personal_ID"
#> # A tibble: 8 x 4
#> wave personal_ID benefit_dummy_by_wave benefit_1_through_3
#> <dbl> <dbl> <dbl> <int>
#> 1 1 101 1 1
#> 2 2 101 1 1
#> 3 3 101 1 1
#> 4 4 101 0 1
#> 5 1 201 1 0
#> 6 2 201 1 0
#> 7 3 201 0 0
#> 8 4 201 0 0
由reprex包(v2.0.1)在2018-10-18上创建
这是另一种方法
library(dplyr)
check_waves <- paste0('wave', 1:3)
df %>%
group_by(personal_ID) %>%
mutate(benefit_1_through_3 = as.integer(all(check_waves %in% wave &
benefit_dummy_by_wave[match(check_waves, wave)] == 1))) %>%
ungroup()
# personal_ID benefit_dummy_by_wave wave benefit_1_through_3
# <dbl> <dbl> <chr> <int>
#1 101 1 wave1 1
#2 101 1 wave2 1
#3 101 1 wave3 1
#4 101 0 wave4 1
#5 201 1 wave1 0
#6 201 1 wave2 0
#7 201 0 wave3 0
#8 201 0 wave4 0
我们也可以这样做
library(dplyr)
library(stringr)
df %>%
group_by(personal_ID) %>%
mutate(benefit_1_through_3 = +(all(str_c('wave', 1:3) %in%
wave[as.logical(benefit_dummy_by_wave)]))) %>%
ungroup
# A tibble: 8 × 4
personal_ID benefit_dummy_by_wave wave benefit_1_through_3
<dbl> <dbl> <chr> <int>
1 101 1 wave1 1
2 101 1 wave2 1
3 101 1 wave3 1
4 101 0 wave4 1
5 201 1 wave1 0
6 201 1 wave2 0
7 201 0 wave3 0
8 201 0 wave4 0