我想知道如何循环我下面的代码,使其更功能和推广其他数据(当前数据只是一个玩具):
FIRST
,我使用sample()
从data
中选择study
,然后使用outcome == outcome_to_remove
的filter()
行。这给出了datat
输出。
SECOND
,我使用sample()
从datat
中选择study
,然后使用outcome == outcome_to_remove2
的filter()
行。这给出了最终输出。
我们可以循环这个过程吗?
编辑:我想添加到我的代码的唯一条件是,循环之前和之后的length(unique(data$study))
应该始终保持不变。也就是说,一个study
不可能在FIRST
步骤中丢失了它的outcome == "A"
,在SECOND
步骤中丢失了outcome == "B"
,从而整个研究被删除。
(data <- expand_grid(study = 1:5, group = 1:2, outcome = c("A", "B")))
n = 1
#====-------------------- FIRST:
studies_to_remove = sample(unique(data$study), size = n)
outcome_to_remove = c("A")
datat <- data %>%
filter(
!( study %in% studies_to_remove &
outcome %in% outcome_to_remove
))
#====------------------- SECOND:
studies_to_remove2 = sample(unique(datat$study), size = n)
outcome_to_remove2 = c("B")
datat %>%
filter(
!( study %in% studies_to_remove2 &
outcome %in% outcome_to_remove2
))
for
loop -
data <- tidyr::expand_grid(study = 1:5, group = 1:2, outcome = c("A", "B"))
n = 1
set.seed(9873)
outcome_to_remove <- unique(data$outcome)
unique_study <- unique(data$study)
for(i in outcome_to_remove) {
studies_to_remove = sample(unique_study, size = n)
outcome_to_remove = i
unique_study <- setdiff(unique_study, studies_to_remove)
cat('nDropping study ', studies_to_remove, 'and outcome ', outcome_to_remove)
data <- data %>%
filter(
!( study %in% studies_to_remove &
outcome %in% outcome_to_remove
))
}
#Dropping study 3 and outcome A
#Dropping study 1 and outcome B
data
# study group outcome
# <int> <int> <chr>
# 1 1 1 A
# 2 1 2 A
# 3 2 1 A
# 4 2 1 B
# 5 2 2 A
# 6 2 2 B
# 7 3 1 B
# 8 3 2 B
# 9 4 1 A
#10 4 1 B
#11 4 2 A
#12 4 2 B
#13 5 1 A
#14 5 1 B
#15 5 2 A
#16 5 2 B