我有如下的原始数据
INSP_EQP SENDTIME SENDTIME_NOW
Item A 00h 10
Item A 01h 10
Item A 02h 10
Item A 03h 10
Item A 04h 10
Item A 05h 10
Item A 06h 10
Item A 07h 10
Item A 09h 10
Item A 10h 10
Item B 00h 10
Item B 01h 10
Item B 02h 10
Item B 03h 10
Item B 04h 10
Item B 05h 10
Item B 06h 10
Item B 07h 10
Item B 08h 10
Item B 09h 10
Item B 10h 10
我想选择哪个条目只有到当前时间(SENDTIME_NOW=10(的连续时间帧(至少5次(而没有中断。在这种情况下,只需保留项目B并删除项目A。我想要的结果是:
INSP_EQP SENDTIME SENDTIME_NOW
Item B 00h 10
Item B 01h 10
Item B 02h 10
Item B 03h 10
Item B 04h 10
Item B 05h 10
Item B 06h 10
Item B 07h 10
Item B 08h 10
Item B 09h 10
Item B 10h 10
我试图找到许多函数,但似乎没有一个SQL oracle函数支持这一点。有人能告诉我应该使用SQL还是使用其他语言来完成这项工作吗。谢谢
您可以使用match_recognize
:
- 您可以检查哪些行适合您的条件,例如:
DBFiddle:https://dbfiddle.uk/RsuH2bG2
with t(INSP_EQP, SENDTIME, SENDTIME_NOW) as (
select 'Item A', '00h', 10 from dual union all
select 'Item A', '01h', 10 from dual union all
select 'Item A', '02h', 10 from dual union all
select 'Item A', '03h', 10 from dual union all
select 'Item A', '04h', 10 from dual union all
select 'Item A', '05h', 10 from dual union all
select 'Item A', '06h', 10 from dual union all
select 'Item A', '07h', 10 from dual union all
select 'Item A', '09h', 10 from dual union all
select 'Item A', '10h', 10 from dual union all
select 'Item B', '00h', 10 from dual union all
select 'Item B', '01h', 10 from dual union all
select 'Item B', '02h', 10 from dual union all
select 'Item B', '03h', 10 from dual union all
select 'Item B', '04h', 10 from dual union all
select 'Item B', '05h', 10 from dual union all
select 'Item B', '06h', 10 from dual union all
select 'Item B', '07h', 10 from dual union all
select 'Item B', '08h', 10 from dual union all
select 'Item B', '09h', 10 from dual union all
select 'Item B', '10h', 10 from dual
)
select *
from (-- inline view to make columns more readable
select
INSP_EQP
,to_number(rtrim(sendtime,'h')) SENDTIME_num
,SENDTIME
,SENDTIME_NOW
from t
)
match_recognize (
partition by INSP_EQP
order by SENDTIME_num
measures
classifier() as cls,
count(*) as cnt,
first(SENDTIME_num) as n_first,
last (SENDTIME_num) as n_last
ALL ROWS PER MATCH
PATTERN (ok* err*)
DEFINE
ok as (ok.SENDTIME_num = 0 or ok.SENDTIME_num = prev(ok.SENDTIME_num)+1)
)
退货:
INSP_EQP SENDTIME_NUM CLS CNT N_FIRST N_LAST SENDTIME SENDTIME_NOW
---------- ------------ ----- ---------- ---------- ---------- -------- ------------
Item A 0 OK 1 0 0 00h 10
Item A 1 OK 2 0 1 01h 10
Item A 2 OK 3 0 2 02h 10
Item A 3 OK 4 0 3 03h 10
Item A 4 OK 5 0 4 04h 10
Item A 5 OK 6 0 5 05h 10
Item A 6 OK 7 0 6 06h 10
Item A 7 OK 8 0 7 07h 10
Item A 9 ERR 9 0 9 09h 10
Item A 10 ERR 10 0 10 10h 10
Item B 0 OK 1 0 0 00h 10
Item B 1 OK 2 0 1 01h 10
Item B 2 OK 3 0 2 02h 10
Item B 3 OK 4 0 3 03h 10
Item B 4 OK 5 0 4 04h 10
Item B 5 OK 6 0 5 05h 10
Item B 6 OK 7 0 6 06h 10
Item B 7 OK 8 0 7 07h 10
Item B 8 OK 9 0 8 08h 10
Item B 9 OK 10 0 9 09h 10
Item B 10 OK 11 0 10 10h 10
正如您所看到的,"项目A"有2行带有"Err",因此您现在可以过滤掉它们。例如使用分析函数:
DB Fiddle 2:https://dbfiddle.uk/W5b-cE7L
select *
from (
select
v.*
,count(case when cls='ERR' then 1 end)over(partition by insp_eqp) cnt_errs
from (-- inline view to make columns more readable
select
INSP_EQP
,to_number(rtrim(sendtime,'h')) SENDTIME_num
,SENDTIME
,SENDTIME_NOW
from t
)
match_recognize (
partition by INSP_EQP
order by SENDTIME_num
measures
classifier() as cls,
count(*) as cnt,
first(SENDTIME_num) as n_first,
last (SENDTIME_num) as n_last
ALL ROWS PER MATCH
PATTERN (ok* err*)
DEFINE
ok as (ok.SENDTIME_num = 0 or ok.SENDTIME_num = prev(ok.SENDTIME_num)+1)
) v
)
where cnt_errs=0
它返回:
INSP_EQP SENDTIME_NUM CLS CNT N_FIRST N_LAST SENDTIME SENDTIME_NOW CNT_ERRS
---------- ------------ ----- ---------- ---------- ---------- -------- ------------ ----------
Item B 0 OK 1 0 0 00h 10 0
Item B 1 OK 2 0 1 01h 10 0
Item B 2 OK 3 0 2 02h 10 0
Item B 3 OK 4 0 3 03h 10 0
Item B 4 OK 5 0 4 04h 10 0
Item B 5 OK 6 0 5 05h 10 0
Item B 6 OK 7 0 6 06h 10 0
Item B 7 OK 8 0 7 07h 10 0
Item B 8 OK 9 0 8 08h 10 0
Item B 9 OK 10 0 9 09h 10 0
Item B 10 OK 11 0 10 10h 10 0
11 rows selected.
最终基于本文中的1个答案,我得到了我想要的结果。但似乎有什么不对劲,让我再也看不到那个人的回复了。无论如何,非常感谢
select INSP_EQP
,SENDTIME
,CHAMBER
,AREA
,SENDTIME_NOW
from (
select INSP_EQP
,CHAMBER
,AREA
,SENDTIME
,SENDTIME_NOW
,SENDTIME_INT
,rn
,pre_2
,SENDTIME_INT-2
,case when RN = 1 and SENDTIME_INT-2 = pre_2 then 1 ELSE null END match
,count(case when RN = 1 and SENDTIME_INT-2 = pre_2 then 1 ELSE null end) over(partition by INSP_EQP) as chk
from (
select INSP_EQP
,SENDTIME
, SENDTIME_INT
,CHAMBER
,AREA
,SENDTIME_NOW
,row_number() over (partition by INSP_EQP order by SENDTIME_INT desc) as rn
,lag(SENDTIME_INT, 2) over(partition by INSP_EQP order by SENDTIME_INT) as pre_2
from t
) t
) t
where chk = 1
order by SENDTIME_INT
)
GROUP BY INSP_EQP
,SENDTIME_NOW
,CHAMBER
,AREA
ORDER BY COUNT(SENDTIME)