标记R中先前出现的X



我有一个包含许多不同社交媒体创建者的数据集(creator_id(。他们发布了很多次(posting_count(,如果ad=1,这些帖子就被归类为广告。现在我总是想把广告=1之前的3条帖子归为1。基本上;可变目标";这就是我想要得到的。没有循环的解决方案会很酷!!

creator_id <-c("aaa","aaa","aaa","aaa","aaa","aaa","aaa","aaa","bbb","bbb","bbb","bbb","bbb","bbb","bbb","bbb","bbb") 
posting_count <- c(143,144,145,146,147,148,149,150,90,91,92,93,94,95,96,97,98) 
ad <- c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1) 
goal_variable <- c(0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0)
df <- cbind(creator_id, posting_count, ad, goal_variable)

以下是使用map的编程方法。基本上,对于每一行,检查当前行是否在最近的ad == 1之前的3到1个位置之间。

library(purrr)
library(dplyr)
df %>% 
group_by(creator_id) %>% 
mutate(goal_variable = map_int(row_number(), ~ any((.x - which(ad == 1)) %in% -3:-1)))

输出

# A tibble: 17 × 4
# Groups:   creator_id [2]
creator_id posting_count    ad goal_variable
<chr>              <dbl> <dbl>         <int>
1 aaa                  143     0             0
2 aaa                  144     0             0
3 aaa                  145     0             0
4 aaa                  146     0             1
5 aaa                  147     0             1
6 aaa                  148     0             1
7 aaa                  149     1             0
8 aaa                  150     0             0
9 bbb                   90     0             0
10 bbb                   91     0             0
11 bbb                   92     0             0
12 bbb                   93     0             1
13 bbb                   94     0             1
14 bbb                   95     0             1
15 bbb                   96     1             1
16 bbb                   97     0             1
17 bbb                   98     1             0

首先,一种更干净的生成df的方法,不需要中间变量。

我们可以将ifelse与多个|(或(语句一起使用。在这里,每个lead执行以下操作:Lead([variable], [n to look ahead], [default value(0)])

df <- data.frame(creator_id =c("aaa","aaa","aaa","aaa","aaa","aaa","aaa","aaa","bbb","bbb","bbb","bbb","bbb","bbb","bbb","bbb","bbb"), 
posting_count = c(143,144,145,146,147,148,149,150,90,91,92,93,94,95,96,97,98),
ad = c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1),
goal_variable = c(0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,0))
library(dplyr)        
df %>% 
group_by(creator_id) %>%
mutate(new_goal=ifelse(lead(ad,1,0)==1|lead(ad,2,0)==1|lead(ad,3,0)==1,1,0))

带有slider的选项

library(slider)
library(dplyr)
df %>% 
group_by(creator_id) %>%
mutate(goal_variable2 = lead(+(slide_int(ad, (x) 1 %in% x, 
.after = 2)), default = 0)) %>% 
ungroup

-输出

# A tibble: 17 × 5
creator_id posting_count    ad goal_variable goal_variable2
<chr>              <dbl> <dbl>         <dbl>          <dbl>
1 aaa                  143     0             0              0
2 aaa                  144     0             0              0
3 aaa                  145     0             0              0
4 aaa                  146     0             1              1
5 aaa                  147     0             1              1
6 aaa                  148     0             1              1
7 aaa                  149     1             0              0
8 aaa                  150     0             0              0
9 bbb                   90     0             0              0
10 bbb                   91     0             0              0
11 bbb                   92     0             0              0
12 bbb                   93     0             1              1
13 bbb                   94     0             1              1
14 bbb                   95     0             1              1
15 bbb                   96     1             1              1
16 bbb                   97     0             1              1
17 bbb                   98     1             0              0

相关内容

  • 没有找到相关文章

最新更新