下表是我可以访问的数据库中的表的子集:
CREATE TABLE trips(trip_id INT, foot BOOLEAN, bike BOOLEAN, bus BOOLEAN,
car BOOLEAN, metro BOOLEAN, motorcycle BOOLEAN, train BOOLEAN,
other BOOLEAN)
-- sample values
INSERT INTO trips (trip_id, foot, bike, bus, car, metro,
motorcycle, train, other)
VALUES(19,true,false,false,false,false,false,false,false),
(20,false,false,false,false,false,false,false,false),
(56,true,false,true,false,false,false,false,false),
(65,true,false,false,true,false,false,false,false),
(77,false,false,false,true,false,false,false,false)
然后,例如,我想在表中生成以下关于旅行模式的统计数据。
- 仅限步行出行次数
- 仅乘公共汽车出行次数
- 仅开车出行的次数等
- 步行和汽车出行次数
- 步行和公共汽车旅行
- 所有模式均为FALSE的总行程
我产生这个db<gt;fiddle,但不确定如何过滤这些统计数据。
您可以按照下面的模板构建查询
仅脚
SELECT count(*) nf
FROM trips
WHERE foot
AND NOT (bike OR bus OR car OR metro OR motorcycle OR train OR other)
仅脚+车
SELECT count(*) nfc
FROM trips
WHERE foot AND car
AND NOT (bike OR bus OR metro OR motorcycle OR train OR other)
在使用条件聚合的单个查询中也是如此
SELECT
count(*) filter(where foot
AND NOT (bike OR bus OR car OR metro OR motorcycle OR train OR other)) nf,
count(*) filter(where foot AND car
AND NOT (bike OR bus OR metro OR motorcycle OR train OR other)) nfc
FROM trips
执行逻辑运算
SELECT * FROM trips WHERE foot = true
AND (bike AND bus AND car AND metro AND
motorcycle AND train AND other) = False
trip_id | foot | 自行车 | us>训练 | 其他 |
---|---|---|---|---|
19 | t | >td style="text align:left">ff | ||
56 | t | f | >td style="ext-align:left;">f||
65 | t | f |
通过将数据翻转到jsonb
来规范化数据,一个查询可以获得所有模式的混合:
with normed as (
select trip_id, tmode, used::boolean
from trips
cross join lateral jsonb_each(to_jsonb(trips) - 'trip_id') as e(tmode, used)
), modenames as (
select distinct tmode,
(2 ^ (dense_rank() over (order by tmode) - 1))::bigint as mode_val
from normed
), possible_n as (
select generate_series(0, max(mode_val::bigint) * 2 - 1) as tripval
from modenames
), perms as (
select n.tripval, string_agg(m.tmode, ', ' order by m.mode_val) as tmodes,
count(1) as n_modes
from possible_n n
join modenames m on m.mode_val & n.tripval > 0
group by n.tripval
), tvals as (
select n.trip_id, sum(m.mode_val) as tripval
from normed n
join modenames m on m.tmode = n.tmode
where n.used = true
group by n.trip_id
)
select p.tmodes, count(v.trip_id) n_trips,
nullif(array_agg(v.trip_id), '{NULL}') trips
from perms p
left join tvals v on v.tripval = p.tripval
group by p.n_modes, p.tmodes
order by p.n_modes;
小提琴