我有log_event
表,它有如下表:
user_id|event_date_time| event
------------------------------
7494212|1535308430 | Opened app
7494212|1535308433 | Closed app
1475185|1535308444 | Registered
6946725|1535308475 | Opened app
6946725|1535308476 | Made a purchase
6946725|1535308477 | closed app
Create table log_event (
user_id int,
event_date_time bigint,
event varchar(70)
);
INSERT INTO log_event Values( 7494212,1535308430,'Opened app');
INSERT INTO log_event Values( 7494212,1535308433,'Closed app');
INSERT INTO log_event Values( 1475185,1535308444,'Registered');
INSERT INTO log_event Values( 6946725,1535308475,'Opened app');
INSERT INTO log_event Values( 6946725,1535308476,'Made a purchase');
INSERT INTO log_event Values( 6946725,1535308477,'closed app');
db<gt;小提琴
我想根据Event's
列打开和关闭来查找user
花费的average time
。在一天或一个月内,单个用户可以有多个打开和关闭条目。既然我不确定how to find next close for every opened app event and then group by for single user
,有没有办法?提前感谢
我已经在stackoverflow中寻找解决方案,但找不到最接近它的东西。
您可以使用LEAD()
窗口函数为每个具有event = 'Opened app'
的行获取具有event = 'Closed app'
的行,并使用它来查找2个时间戳的差异
然后您可以进行聚合以获得平均值:
SELECT user_id, AVG(time_dif) average_time_spent
FROM (
SELECT user_id, event,
LEAD(event_date_time) OVER (PARTITION BY user_id ORDER BY event_date_time) - event_date_time time_dif
FROM log_event
WHERE event IN ('Opened app', 'Closed app')
) t
WHERE event = 'Opened app'
GROUP BY user_id;
请参阅演示
您可以使用self-join
:
with sessions(id, c, d) as (
select l.user_id, l.event_date_time, l.event_date_time - max(l1.event_date_time)
from log_event l join log_event l1 on l1.user_id = l.user_id and l1.event_date_time < l.event_date_time and lower(l1.event) = 'opened app' where lower(l.event) = "closed app"
group by l.user_id, l.event_date_time
)
select id, avg(d) from sessions group by id;