使用间歇数据按日期和状态计数



我想按日期计算处于特定状态的对象(灯泡、计划作业、客户账单状态)的数量。但是,数据是断断续续的。对象可以有也可以没有特定日期的行。

不是一个简单的GROUP BY解决方案。对处于特定状态的对象数量的要求;

在下面要解决的最小问题示例中,在03-01上,GROUP BY将导致"1开1关"。在03-02中,只表示一个对象。显示03-02结果的GROUP BY将是"1开0关"。03-02日按状态合计物品的正确答案是"2开1关";因为它必须包含行数为03-01的对象。

要解决的最小问题在这里

SET NOCOUNT ON
-->>-- minimum problem to solve  -- count by status for a specific day
IF OBJECT_ID('tempdb..#d') IS NOT NULL DROP TABLE #d
CREATE TABLE #d (ndx SMALLINT IDENTITY(1,1), id TINYINT, dt DATE, status CHAR(10) )
INSERT INTO #d (id, dt, status)
VALUES
( 1, '20230301' , 'on'        )
, ( 3, '20230301' , 'off'       )
, ( 2, '20230302' , 'on'        )
, ( 3, '20230303' , 'off'       )
, ( 3, '20230305' , 'on'        )
, ( 1, '20230308' , 'off'       )
, ( 2, '20230308' , 'off'       )
, ( 1, '20230310' , 'off'       )
, ( 2, '20230311' , 'off'       )
, ( 1, '20230312' , 'off'       )
, ( 3, '20230312' , 'off'       )
, ( 2, '20230313' , 'on'        )
, ( 1, '20230314' , 'on'        )
, ( 3, '20230314' , 'off'       )
, ( 3, '20230316' , 'off'       )
, ( 2, '20230320' , 'on'        )
, ( 1, '20230321' , 'off'       )
SELECT * FROM #d d ORDER BY id, dt
IF OBJECT_ID('tempdb..#c') IS NOT NULL DROP TABLE #c
CREATE TABLE #c ( calendardt DATE )
INSERT INTO #c(calendardt)
VALUES
('2023-03-01 '), ('2023-03-02 '), ('2023-03-03 '), ('2023-03-04 '), ('2023-03-05 ')
, ('2023-03-06 '), ('2023-03-07 '), ('2023-03-08 '), ('2023-03-09 '), ('2023-03-10 ')
, ('2023-03-11 '), ('2023-03-12 '), ('2023-03-13 '), ('2023-03-14 '), ('2023-03-15 ')
, ('2023-03-16 '), ('2023-03-17 '), ('2023-03-18 '), ('2023-03-19 '), ('2023-03-20 ')
, ('2023-03-21 '), ('2023-03-22 '), ('2023-03-23 '), ('2023-03-24 '), ('2023-03-25 ')
SELECT * FROM #c UNION ALL SELECT * FROM #c ORDER BY calendardt
SELECT *
FROM #c c
LEFT JOIN #d d ON d.dt = c.calendardt
ORDER BY c.calendardt, d.id

预期的结果应该如下所示

-->>-- expected result
calendardt  [status]    [count]
2023-03-01  on          1
2023-03-01  off         1
2023-03-02  on          2
2023-03-02  off         1
2023-03-03  on          2
2023-03-03  off         1
2023-03-04  on          2
2023-03-04  off         1
2023-03-05  on          3
2023-03-05  off         0
2023-03-06  on          3
2023-03-06  off         0
2023-03-07  on          3
2023-03-07  off         0
2023-03-08  on          1
2023-03-08  off         2
2023-03-09  on          1
2023-03-09  off         2
2023-03-10  on          1
2023-03-10  off         2
2023-03-11  on          1
2023-03-11  off         2
2023-03-12  on          1
2023-03-12  off         2
2023-03-13  on          0
2023-03-13  off         3
2023-03-14  on          0
2023-03-14  off         3
2023-03-15  on          0
2023-03-15  off         3
2023-03-16  on          0
2023-03-16  off         3

我没有尝试。我能够每天跟踪数据的变化。我可以捕获对象进入新状态的第一个日期,并计算它处于该状态的时间。但是我不能按日期和状态计算总数。

请帮助按日期和状态计数。

with changes as (
select id, dt, status 
from (select id, dt, status, 
case when lag(status) over (partition by id order by dt)  = status 
then 0 else 1 end chg 
from #d) t
where chg = 1),
cal as (
select calendardt dt, id from #c cross join (select distinct id from #d) t),
joined as (
select cal.dt, cal.id,
last_value(status) ignore nulls 
over (partition by cal.id order by cal.dt) status 
from cal left join changes d on cal.dt = d.dt and d.id = cal.id)
select * --dt, stat_on, stat_off 
from joined pivot (count(id) for status in ([on], [off])) piv
order by dt

SQL Server dbfiddle

最初我在Oracle中解决方案,dbfiddle在这里。我对SQL Server的了解非常有限,我只能在dbfiddle中测试它,但它返回预期值(直到4月13日,但我认为它应该是1 ON,因为id=2是ON)。

这个想法是首先消除不改变任何东西的行(例如状态关闭),然后与日历连接,并与所有id交叉连接。这样我们就有了每一天和id的行。然后last_value填充缺失的数据。我们可以聚合。

On/off表缺失

..
CREATE TABLE #on_off (status varchar(3));
INSERT INTO #on_off (status)
VALUES
('on'), ('off');
SELECT c.calendardt, c.status, count(d.id) n
FROM (
select c.*, f.*
from #c c
cross join #on_off f
) c
LEFT JOIN #d d ON d.dt = c.calendardt and d.status = c.status
GROUP BY c.calendardt, c.status
ORDER BY c.calendardt, c.status

,db&lt的在小提琴

最新更新