查询,可在一周内识别回访的活跃用户



编写一个查询来识别返回的活动用户。返回的活动用户是指在首次购买后7天内进行第二次购买的用户

id  u_id item   created_at  revenue
1   109 milk    3/3/2020 0:00   123
2   139 biscuit 3/18/2020 0:00  421
3   120 milk    3/18/2020 0:00  176
4   108 banana  3/18/2020 0:00  862
5   130 milk    3/28/2020 0:00  333
6   103 bread   3/29/2020 0:00  862
7   122 banana  3/7/2020 0:00   952
8   125 bread   3/13/2020 0:00  317
9   139 bread   3/23/2020 0:00  929
10  141 banana  3/17/2020 0:00  812
11  116 bread   3/31/2020 0:00  226
12  128 bread   3/4/2020 0:00   112
13  146 biscuit 3/4/2020 0:00   362
14  119 banana  3/28/2020 0:00  127

您可以使用窗口函数来获取最早的创建日期,然后在一周内查找其他记录:

select distinct u_id
from (select t.*,
min(created_at) over (partition by u_id) as min_created_at
from t
) t
where created_at > min_created_at and
created_at < min_created_at + interval 7 day;

如果您只检查客户第一次购买,并在接下来的7天内进行第二次访问,则在第二次拜访后将放弃第三次购买。只需在7天内全局检查两次购买,如下所示:

create table t(id integer, u_id integer, item varchar(100),created_at date,revenue float);

insert into t 
values (1,   109, "milk" ,  STR_TO_DATE("3/3/2020", '%m/%d/%Y') ,   123)
, (2,139,"biscuit",STR_TO_DATE("3/18/2020", '%m/%d/%Y'),421)
, (3,120,"milk",STR_TO_DATE("3/18/2020", '%m/%d/%Y'),176)
, (4,108,"banana",STR_TO_DATE("3/18/2020", '%m/%d/%Y'),862)
, (5,130,"milk",STR_TO_DATE("3/28/2020", '%m/%d/%Y'),333)
, (6,103,"bread",STR_TO_DATE("3/29/2020", '%m/%d/%Y'),862)
, (7,122,"banana",STR_TO_DATE("3/7/2020", '%m/%d/%Y'),952)
, (8,125,"bread",STR_TO_DATE("3/13/2020", '%m/%d/%Y'),317)
, (9,139,"bread",STR_TO_DATE("3/23/2020", '%m/%d/%Y'),929)
, (10,141,"banana",STR_TO_DATE("3/17/2020", '%m/%d/%Y'),812)
, (11,116,"bread",STR_TO_DATE("3/31/2020", '%m/%d/%Y'),226)
, (12,128,"bread",STR_TO_DATE("3/4/2020", '%m/%d/%Y'),112)
, (13,146,"biscuit",STR_TO_DATE("3/4/2020", '%m/%d/%Y'),362)
, (14,119,"banana",STR_TO_DATE("3/28/2020", '%m/%d/%Y'),127);
select * from t as t1 where exists (select * from t as t2 where t1.u_id = t2.u_id and t1.created_at - t2.created_at > 0 and  t1.created_at - t2.created_at <= 7  );

上面的例子使用窗口函数解决了问题。由于给定的表为日期时间格式,因此需要在where条件下使用日期转换。这是一个棘手的部分。

create table public.purchase_history(
id int,
userid int,
item varchar,
created_at datetime,
revenue int);
Insert into public.purchase_history values
(1,   109, 'milk' ,'03/03/2020' ,   123)
, (2,139,'biscuit','03/18/2020',421)
, (3,120,'milk','03/18/2020',176)
, (4,108,'banana','03/18/2020',862)
, (5,130,'milk','03/28/2020',333)
, (6,103,'bread','03/29/2020',862)
, (7,122,'banana','03/07/2020',952)
, (8,125,'bread','03/13/2020',317)
, (9,139,'bread','03/23/2020',929)
, (10,141,'banana','03/17/2020',812)
, (11,116,'bread','03/31/2020',226)
, (12,128,'bread','03/04/2020',112)
, (13,146,'biscuit','03/04/2020',362)
, (14,119,'banana','03/28/2020',127)
, (15,120,'milk','03/28/2020',186);
select distinct userid
FROM
(select 
id,
userid,
created_at,
coalesce(lead(created_at)over(partition by userid order by created_at),'9999-12-31') as next_purchase
from 
public.purchase_history order by userid)repeated
where (repeated.next_purchase::date-repeated.created_at::date)<=7;

假设表名为amazon_transactions

SELECT distinct a.user_id FROM amazon_transactions a 
JOIN amazon_transactions b
ON a.user_id = b.user_id 
WHERE a.id <>b.id
AND a.created_at <= b.created_at
AND b.created_at <= a.created_at+7
ORDER BY a.user_id

最新更新