我使用的是PostgreSQL,我有一个table
与以下列:id
,distance
和length
。我想通过distance
排序表,并使用窗口函数创建一个名为cum_length
的新列。我还想过滤行,以便只有在cum_length
值超过某个阈值之前的行才包含在最终结果中。
输入table
示例:
尝试如下:
WITH ordered_table AS
(
SELECT id, distance, length,
SUM(length) OVER (ORDER BY distance) AS cum_length
FROM table_name
)
SELECT id, distance, length, cum_length
FROM ordered_table
WHERE cum_length <= COALESCE((SELECT MIN(cum_length) FROM ordered_table WHERE cum_length > 6), 6)
对于示例数据,这相当于WHERE cum_length <= 7
。
看到演示
这是基于您的ordered_table
查询和一个额外的lag
窗口函数来计算previous_is_less
而不是阈值。previous_is_less
为true或null的记录符合选择条件。
with t as
(
select *,
lag(cum_length) over (order by cum_length) < 6 as previous_is_less
from
(
SELECT id, distance, length,
SUM(length) OVER (order BY distance) AS cum_length
from the_table
) as ordered_table
)
select id, distance, length, cum_length
from t
where coalesce(previous_is_less, true)
order by cum_length;
DB-Fiddle演示在大数据集上与这个比较可能会很有趣,只有一次按距离排序,一旦找到阈值就应该停止扫描数据
with data(id, distance, length) as (
select 1, 10, 1 FROM DUAL UNION ALL
select 2, 5, 2 FROM DUAL UNION ALL
select 3, 8, 1 FROM DUAL UNION ALL
select 4, 1, 3 FROM DUAL UNION ALL
select 5, 3, 2 FROM DUAL UNION ALL
select 6, 9, 2 FROM DUAL -- UNION ALL
),
rdata(id, distance, length, rn) as (
select id, distance, length, row_number() over(order by distance) as rn
from data
),
recdata(id, distance, length, rn, cumlength) as (
select id, distance, length, rn, length
from rdata d
where rn = 1
union all
select d.id, d.distance, d.length, d.rn, d.length + r.cumlength
from recdata r
join rdata d on d.rn = r.rn + 1 and r.cumlength <= 6
)
select id, distance, length, cumlength from recdata
;