T-SQL:查找每天具有空值的最大并发人数



我有一张表,可以在房间里存储人们的登录和注销信息。其中一人登录并告诉系统他们总共有多少人。只要他们没有注销,"注销"列中的值就会保持为NULL。这是我必须做的一份报告的查询,但我被卡住了。

我想找出在给定的时间跨度内,每个房间每天并发的最大人数。然而,我也想统计为NULL的人数,因为他们实际上仍然登录

我根据Stackoverflow上的几个答案创建了这个查询。不幸的是,我还没有弄清楚如何获得NULL值的人数​​在一个没有人登录那个房间的日子里。原因可能是累计总数,但我如何解决这个问题?

下面是一个示例表:

CREATE TABLE Registrations(
[RoomName] VARCHAR(32),
[LogIn] DATETIME,
[LogOut] DATETIME,
[NumberOfPeople] INT
)
INSERT INTO Registrations (RoomName, LogIn, LogOut, NumberOfPeople)
VALUES
('MainHall', '2022-01-31 08:00:00.000', NULL, 5),
('MainHall', '2022-01-31 08:00:00.000', '2022-01-31 10:00:00.000', 5),
('MainHall', '2022-01-31 09:00:00.000', '2022-01-31 11:00:00.000', 2),
('MainHall', '2022-01-31 12:00:00.000', '2022-01-31 14:00:00.000', 2),
('SmallRoom', '2022-01-31 08:00:00.000', NULL, 3),
('SmallRoom', '2022-01-31 10:00:00.000', '2022-01-31 12:00:00.000', 3),
('SmallRoom', '2022-01-31 11:00:00.000', '2022-01-31 14:00:00.000', 3),
('MainHall', '2022-02-01 08:00:00.000', NULL, 5),
('MainHall', '2022-02-01 08:00:00.000', '2022-02-01 10:00:00.000', 5),
('MainHall', '2022-02-01 09:00:00.000', '2022-02-01 11:00:00.000', 2),
('MainHall', '2022-02-01 12:00:00.000', '2022-02-01 14:00:00.000', 2)

这就是2022-01-31年主大厅的第一批作品应该是什么样子的:

-- A    |-------------------
-- B    |-----|
-- C       |-----|
-- D                |-----|

这就是我想要得到的结果:

Day         |    Room     | MaxNumberOfConcurrentLoggedInPeople 
------------+-------------+-------------------------------------
2022-01-31  |  MainHall   |                 12 
2022-01-31  |  SmallRoom  |                 9
2022-02-01  |  MainHall   |                 17
2022-02-01  |  SmallRoom  |                 3

这是我的问题:

SELECT  ct.RoomName,
ct.DatePeak,
ct.RegistrationPeak
FROM
(
SELECT  cte.RoomName,
CONVERT(DATE, cte.DatePeak) AS DatePeak,
MAX(cte.RegistrationPeak) AS RegistrationPeak
FROM
(
SELECT  MarkedDateTime.RoomName,
SUM(SUM(MarkedDateTime.NumberOfPeople)) OVER (PARTITION BY MarkedDateTime.RoomName ORDER BY MarkedDateTime.DatePeak) AS RegistrationPeak,
CONVERT(DATE, MarkedDateTime.DatePeak) AS DatePeak
FROM
(
SELECT      RoomName,
NumberOfPeople,
LogIn AS DatePeak
FROM        Registrations
WHERE       (LogOut >=  '2022-01-31 00:00:00.000' OR LogOut IS NULL)
AND         LogIn < DATEADD(DAY, 1, '2022-02-01 00:00:00.000')
UNION ALL
SELECT      RoomName,
-NumberOfPeople,
COALESCE(LogOut, DATEADD(MILLISECOND, - 3, DATEADD(DAY, 1, '2022-02-01 00:00:00.000'))) AS DatePeak
FROM        Registrations
WHERE       (LogOut >=  '2022-01-31 00:00:00.000' OR LogOut IS NULL)
AND         LogIn < DATEADD(DAY, 1, '2022-02-01 00:00:00.000')
) AS MarkedDateTime
GROUP BY RoomName, DatePeak
) AS cte
GROUP BY DatePeak, RoomName
) AS ct
WHERE   ct.DatePeak <= '2022-02-01 00:00:00.000'
AND     ct.DatePeak >= '2022-01-31 00:00:00.000'

编辑:我添加了解决方案和一个小的视觉帮助。2.编辑:修复了提供的测试表中的错误日期。

您的数据似乎与预期输出不太匹配,有些数据甚至没有意义(登录前注销(,但我认为您可以这样解决:

  • 将登录和注销取消预览为单独的事件,数量分别为正或负。排除null
  • 计算数量的运行总和,按房间和日期划分,按时间排序
  • 然后简单地按房间和日期分组,取运行总和的最大值
SELECT
evnt.Day,
evnt.RoomName,
MaxNumberOfConcurrentLoggedInPeople = MAX(evnt.RunningSum)
FROM (
SELECT
r.RoomName,
Day = CAST(v.EventDateTime AS date),
v.DiffQuantity,
RunningSum = SUM(v.DiffQuantity) OVER (PARTITION BY r.RoomName, CAST(v.EventDateTime AS date) ORDER BY v.EventDateTime ROWS UNBOUNDED PRECEDING)
FROM Registrations r
CROSS APPLY (VALUES
(r.LogIn, r.NumberOfPeople),
(r.LogOut, -r.NumberOfPeople)
) v(EventDateTime, DiffQuantity)
WHERE v.EventDateTime IS NOT NULL
) evnt
GROUP BY
evnt.RoomName,
evnt.Day;

db<gt;小提琴

下面有很多代码。我对它进行了评论,以解释它在做什么,但总的来说:我正在计算房间使用的每一秒。这意味着这份报告的准确性下降到了一秒钟(不亚于一秒钟(。PS我认为你提供的数据有问题,所以我修改了它。

INSERT INTO Registrations (RoomName, LogIn, LogOut, NumberOfPeople)
VALUES ('MainHall', '20220131 08:00:00.000',  NULL, 5),
('MainHall', '20220131 08:00:00.000',  '20220131 10:00:00.000', 5),
('MainHall', '20220131 09:00:00.000',  '20220131 11:00:00.000', 2),
('MainHall', '20220131 12:00:00.000',  '20220131 14:00:00.000', 2),
('SmallRoom', '20220131 08:00:00.000', NULL, 3),
('SmallRoom', '20220131 10:00:00.000', '20220131 12:00:00.000', 3),
('SmallRoom', '20220131 11:00:00.000', '20220131 14:00:00.000', 3),
('MainHall', '20220201 08:00:00.000',  NULL, 5),
('MainHall', '20220201 08:00:00.000',  '20220201 10:00:00.000', 5),
('MainHall', '20220201 09:00:00.000',  '20220201 11:00:00.000', 2),
('MainHall', '20220201 12:00:00.000',  '20220201 14:00:00.000', 2)
-- tally table so we can calculate all the dates we need
SELECT  TOP 86400 -- seconds in a day
IDENTITY(INT,1,1) AS N
INTO    #Tally
FROM    master.sys.all_columns ac1
CROSS JOIN master.sys.all_columns ac2
;
--  need to figure out how how many days we want to process
DECLARE @StartDate DATETIME
DECLARE @EndDate DATETIME
DECLARE @NumberOfDays INT
SELECT  @StartDate = min(dateadd(day, datediff(day,0,d),0)), 
@EndDate = max(dateadd(day, datediff(day,0,d),0))
FROM (  SELECT  MIN(LogIn) d
FROM    Registrations
UNION 
SELECT  MAX(LogIn)
FROM    Registrations
UNION 
SELECT  MIN(coalesce(LogOut, getdate()))
FROM    Registrations
UNION 
SELECT  MAX(coalesce(LogOut, getdate()))
FROM    Registrations ) x   
SET @NumberOfDays = DATEDIFF( DAY, @StartDate, @EndDate ) + 1
--SELECT @StartDate, @EndDate, @NumberOfDays
IF(SELECT OBJECT_ID('tempdb..#Days')) IS NOT NULL
DROP TABLE #Days
CREATE TABLE #Days(N  INT, D DATETIME);
-- The cross join is slow so, if there are only a few days dont produce a cartesean product
IF((SELECT COUNT(1) FROM #Tally) >= @NumberOfDays) BEGIN
INSERT INTO #Days(N,D)
SELECT  [N] = T1.N ,
[D] = DATEADD(DAY, T1.N-1, @StartDate)
FROM      #Tally T1
WHERE   T1.N <= @NumberOfDays
ORDER BY T1.N
END
ELSE BEGIN
INSERT INTO #Days(N,D)
SELECT  [N] = ROW_NUMBER() OVER(ORDER BY T1.N, T2.N) ,
[D] = DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY T1.N, T2.N)-1, @StartDate)
FROM      #Tally T1
CROSS JOIN #Tally T2
END            
--SELECT * FROM #Days
-- Ensure there are no missing rooms or days
; WITH base as (
SELECT  DISTINCT r.RoomName, d.D 
FROM    Registrations r
CROSS JOIN #Days d
)
--SELECT * FROM base ORDER BY 1,2 /*
-- calculate all the times down to the second that a room can be in use
, RoomSlot as (
SELECT  b.RoomName,  DATEADD(SECOND, t.N -1, b.D) D
FROM    base b
CROSS JOIN #Tally t
)
--SELECT * FROM RoomSlot ORDER BY 1,2 /*
-- find each second a room is in use
, data as (
SELECT  rs.*, r.NumberOfPeople, r.id, r.[LogIn], r.[Logout]
FROM    RoomSlot rs 
INNER JOIN Registrations r ON  r.RoomName = rs.RoomName
AND r.LogIn <= rs.D  
AND COALESCE(r.LogOut, GETDATE()) > rs.D
)
--SELECT * FROM data ORDER BY 1,2  /*
-- calculate all the people using a room for each second it is in use
, results as (
SELECT  RoomName, D, SUM(NumberOfPeople) NumberOfPeople
FROM    data
GROUP BY RoomName, D
)
--SELECT * FROM results ORDER BY 1,2,3 /*
-- get the max usage
SELECT  RoomName, DATEADD(DAY, DATEDIFF(DAY,0,D),0), MAX(NumberOfPeople)
FROM    results
GROUP BY RoomName, DATEADD(DAY, DATEDIFF(DAY,0,D),0)
ORDER BY DATEADD(DAY, DATEDIFF(DAY,0,D),0), RoomName
-- */ -- */ -- */ -- */ -- */

/*
Actual output
Day         |    Room     | MaxNumberOfConcurrentLoggedInPeople 
------------+-------------+-------------------------------------
2022-01-31  |  MainHall   |                 12 
2022-01-31  |  SmallRoom  |                 9
2022-02-01  |  MainHall   |                 17
2022-02-01  |  SmallRoom  |                 3
*/

最新更新