PostgreSQL将几个周期合并为一个



我正在尝试组合范围。

WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union 
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union 
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union 
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29' 
union 
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union 
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43' 
), b AS (
SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
FROM   a
)
, c AS (
SELECT *, count(step) OVER (ORDER BY begat) AS grp
FROM   b
)
SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
FROM   c
GROUP  BY grp
ORDER  BY 1 

结果

1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
4 "2017-09-22 12:24:16";"2017-09-22 13:18:29"
5 "2017-09-28 09:48:54";"2017-09-28 13:39:13"

位置 3,4 相交(数据>下一个 begat( 如何将所有交集合并为一个大区间

我需要结果

1 "2017-09-16 07:12:57";"2017-09-16 11:30:22"
2 "2017-09-18 17:05:21";"2017-09-19 13:18:01"
3 "2017-09-19 15:34:40";"2017-09-22 13:29:37"
4 "2017-09-28 09:48:54";"2017-09-28 13:39:13"

嘿,我建议使用以下过程:

1-确定何时是新行,以便为不重叠的值指定值1 (CTE b(

2-将与其他行重叠的行排序在一起。通过这种方式,您可以看到有一个通用标识符,该标识符将允许您 MAX 和 MIN begat and endat (CTE c(

3-对于每个序列,给出begat的最小值和endat的最大值,这样您将获得最终值

WITH a AS (
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union 
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union 
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union 
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29' 
union 
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union 
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43' 
)
, b AS (
SELECT
begat
, endat
, (begat > MAX(endat) OVER w IS TRUE)::INT is_new
FROM a
WINDOW w AS (ORDER BY begat ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
) 
, c AS (
SELECT
begat
, endat
, SUM((is_new)) OVER (ORDER BY begat) seq
FROM b
)
SELECT
MIN(begat) beg_at
, MAX(endat) end_at
FROM c
GROUP BY seq

已编辑

如果你需要速度,你可以使用 psql 函数:

create or replace function append_ranges_in_a() returns setof a
language plpgsql
as
$BODY$
declare
v_current a%rowtype;
v_new a%rowtype;
v_first boolean:=true;
begin
for v_current in select begat, endat from a order by begat, endat
loop
if v_first then
v_first := false;
v_new.begat := v_current.begat;
elsif v_new.endat < v_current.begat then
return next v_new;
v_new.begat := v_current.begat;
end if;
v_new.endat := greatest(v_current.endat,v_new.endat);
end loop;
return next v_new;
return;
end;
$BODY$;
select * from append_ranges_in_a()

我用~400000行测试它:

delete from a;
insert into a (begat,  endat)
select time::text, (time+'1 day'::interval)::text
from (select t+(round(random()*23.0)||' hours')::interval as time
from generate_series('1401-01-01'::timestamp,'2018-08-21'::timestamp,'1 day'::interval) t
) t;
select count(*) from a;
select * from append_ranges_in_a() offset 100000 limit 10      

它的速度是 O(n^2( 纯 SQL 版本的两倍。

旧的慢速解决方案:

可以使用递归 WITH 查询 https://www.postgresql.org/docs/current/static/queries-with.html 逐行构造结果。

  1. 我创建表
  2. 第一行是候选的第一行(在结束处结束(,但该行不是"就绪">
  3. 然后我查看下一行(步骤(,如果它不相交,我添加一个准备好的行,
  4. 我还添加了一个具有当前(最后一个(观察到范围的未就绪行
  5. 当我没有更多行时,我计算最后一行
  6. 我保留准备好的行和最后一行

这是代码

CREATE TABLE a as 
select '2017-09-16 07:12:57' as begat,'2017-09-16 11:30:22' as endat
union 
select '2017-09-18 17:05:21' ,'2017-09-19 13:18:01'
union 
select '2017-09-19 15:34:40' ,'2017-09-22 13:29:37'
union 
select '2017-09-22 12:24:16' ,'2017-09-22 13:18:29' 
union 
select '2017-09-28 09:48:54' ,'2017-09-28 13:39:13'
union 
select '2017-09-20 13:52:43' ,'2017-09-20 14:14:43';
WITH RECURSIVE t(begat, endat, ready, step) AS (
select * from (
select *,false,1 from a order by begat, endat limit 1) a
UNION ALL
SELECT new_rows.*
FROM (SELECT * FROM t ORDER BY begat DESC limit 1) t,
lateral (SELECT * FROM a ORDER BY begat, endat OFFSET step LIMIT 1) a,
lateral (
SELECT t.begat, t.endat, true as ready, step WHERE t.endat < a.begat
UNION SELECT CASE WHEN t.endat < a.begat THEN a.begat ELSE t.begat END, greatest(a.endat, t.endat), false, step+1
) new_rows
)
select begat, endat
from (
select begat, endat, ready, row_number() over (order by begat desc, endat desc)=1 is_last
from t
order by begat, endat) t
where ready or is_last;

i 使用范围类型 https://www.postgresql.org/docs/9.3/static/rangetypes.html

WITH tmp AS (
-- preparation range type
select  begat, coalesce( endat, 'infinity' ) as endAt, tsrange( begat, coalesce( endat, 'infinity' ) ) as rg
from (
select '2017-09-11 17:13:03'::timestamp as begat ,'2017-09-12 12:24:09'::timestamp as endat  union
select '2017-09-19 15:34:40','2017-09-20 11:04:45' union
select '2017-09-20 08:32:00','2017-09-22 13:28:37' union
select '2017-09-20 13:52:43','2017-09-20 14:14:43' union
select '2017-09-21 12:24:16','2017-09-21 13:28:29' union
select '2017-09-22 12:24:16','2017-09-22 13:28:29' union
select '2017-09-22 12:34:16','2017-09-23 13:28:29' union
select '2017-09-22 12:25:16','2017-09-24 13:28:29' union
select '2017-09-28 09:48:54','2017-09-28 13:39:13' union
select '2017-09-28 14:22:16','2017-09-28 15:52:15' union
select '2017-10-05 12:17:45','2017-10-06 12:35:38' union
select '2017-10-06 16:20:44','2017-10-07 10:11:09' union
select '2017-10-07 20:38:32','2017-10-09 14:42:29' union
select '2017-10-12 18:22:14','2017-10-12 20:52:45'
) a
),a as (
-- group intersecting range
select l.*
from tmp l left join tmp r on  l.begAt > r.begAt and r.rg @>  l.rg
where r.begAt is null  
),
b AS (
SELECT *, lag(endat) OVER (ORDER BY begat) < begat OR NULL AS step
FROM   a
)
, c AS (
SELECT *, count(step) OVER (ORDER BY begat) AS grp
FROM   b
)
SELECT min(begat), coalesce( max(endat), 'infinity' ) AS range
FROM   c
GROUP  BY grp
ORDER  BY 1 

相关内容

最新更新