编写一个查询来识别返回的活动用户。返回的活动用户是指在首次购买后7天内进行第二次购买的用户
id u_id item created_at revenue
1 109 milk 3/3/2020 0:00 123
2 139 biscuit 3/18/2020 0:00 421
3 120 milk 3/18/2020 0:00 176
4 108 banana 3/18/2020 0:00 862
5 130 milk 3/28/2020 0:00 333
6 103 bread 3/29/2020 0:00 862
7 122 banana 3/7/2020 0:00 952
8 125 bread 3/13/2020 0:00 317
9 139 bread 3/23/2020 0:00 929
10 141 banana 3/17/2020 0:00 812
11 116 bread 3/31/2020 0:00 226
12 128 bread 3/4/2020 0:00 112
13 146 biscuit 3/4/2020 0:00 362
14 119 banana 3/28/2020 0:00 127
您可以使用窗口函数来获取最早的创建日期,然后在一周内查找其他记录:
select distinct u_id
from (select t.*,
min(created_at) over (partition by u_id) as min_created_at
from t
) t
where created_at > min_created_at and
created_at < min_created_at + interval 7 day;
如果您只检查客户第一次购买,并在接下来的7天内进行第二次访问,则在第二次拜访后将放弃第三次购买。只需在7天内全局检查两次购买,如下所示:
create table t(id integer, u_id integer, item varchar(100),created_at date,revenue float);
insert into t
values (1, 109, "milk" , STR_TO_DATE("3/3/2020", '%m/%d/%Y') , 123)
, (2,139,"biscuit",STR_TO_DATE("3/18/2020", '%m/%d/%Y'),421)
, (3,120,"milk",STR_TO_DATE("3/18/2020", '%m/%d/%Y'),176)
, (4,108,"banana",STR_TO_DATE("3/18/2020", '%m/%d/%Y'),862)
, (5,130,"milk",STR_TO_DATE("3/28/2020", '%m/%d/%Y'),333)
, (6,103,"bread",STR_TO_DATE("3/29/2020", '%m/%d/%Y'),862)
, (7,122,"banana",STR_TO_DATE("3/7/2020", '%m/%d/%Y'),952)
, (8,125,"bread",STR_TO_DATE("3/13/2020", '%m/%d/%Y'),317)
, (9,139,"bread",STR_TO_DATE("3/23/2020", '%m/%d/%Y'),929)
, (10,141,"banana",STR_TO_DATE("3/17/2020", '%m/%d/%Y'),812)
, (11,116,"bread",STR_TO_DATE("3/31/2020", '%m/%d/%Y'),226)
, (12,128,"bread",STR_TO_DATE("3/4/2020", '%m/%d/%Y'),112)
, (13,146,"biscuit",STR_TO_DATE("3/4/2020", '%m/%d/%Y'),362)
, (14,119,"banana",STR_TO_DATE("3/28/2020", '%m/%d/%Y'),127);
select * from t as t1 where exists (select * from t as t2 where t1.u_id = t2.u_id and t1.created_at - t2.created_at > 0 and t1.created_at - t2.created_at <= 7 );
上面的例子使用窗口函数解决了问题。由于给定的表为日期时间格式,因此需要在where条件下使用日期转换。这是一个棘手的部分。
create table public.purchase_history(
id int,
userid int,
item varchar,
created_at datetime,
revenue int);
Insert into public.purchase_history values
(1, 109, 'milk' ,'03/03/2020' , 123)
, (2,139,'biscuit','03/18/2020',421)
, (3,120,'milk','03/18/2020',176)
, (4,108,'banana','03/18/2020',862)
, (5,130,'milk','03/28/2020',333)
, (6,103,'bread','03/29/2020',862)
, (7,122,'banana','03/07/2020',952)
, (8,125,'bread','03/13/2020',317)
, (9,139,'bread','03/23/2020',929)
, (10,141,'banana','03/17/2020',812)
, (11,116,'bread','03/31/2020',226)
, (12,128,'bread','03/04/2020',112)
, (13,146,'biscuit','03/04/2020',362)
, (14,119,'banana','03/28/2020',127)
, (15,120,'milk','03/28/2020',186);
select distinct userid
FROM
(select
id,
userid,
created_at,
coalesce(lead(created_at)over(partition by userid order by created_at),'9999-12-31') as next_purchase
from
public.purchase_history order by userid)repeated
where (repeated.next_purchase::date-repeated.created_at::date)<=7;
假设表名为amazon_transactions
SELECT distinct a.user_id FROM amazon_transactions a
JOIN amazon_transactions b
ON a.user_id = b.user_id
WHERE a.id <>b.id
AND a.created_at <= b.created_at
AND b.created_at <= a.created_at+7
ORDER BY a.user_id