我有一张表,可以在房间里存储人们的登录和注销信息。其中一人登录并告诉系统他们总共有多少人。只要他们没有注销,"注销"列中的值就会保持为NULL。这是我必须做的一份报告的查询,但我被卡住了。
我想找出在给定的时间跨度内,每个房间每天并发的最大人数。然而,我也想统计为NULL的人数,因为他们实际上仍然登录
我根据Stackoverflow上的几个答案创建了这个查询。不幸的是,我还没有弄清楚如何获得NULL值的人数在一个没有人登录那个房间的日子里。原因可能是累计总数,但我如何解决这个问题?
下面是一个示例表:
CREATE TABLE Registrations(
[RoomName] VARCHAR(32),
[LogIn] DATETIME,
[LogOut] DATETIME,
[NumberOfPeople] INT
)
INSERT INTO Registrations (RoomName, LogIn, LogOut, NumberOfPeople)
VALUES
('MainHall', '2022-01-31 08:00:00.000', NULL, 5),
('MainHall', '2022-01-31 08:00:00.000', '2022-01-31 10:00:00.000', 5),
('MainHall', '2022-01-31 09:00:00.000', '2022-01-31 11:00:00.000', 2),
('MainHall', '2022-01-31 12:00:00.000', '2022-01-31 14:00:00.000', 2),
('SmallRoom', '2022-01-31 08:00:00.000', NULL, 3),
('SmallRoom', '2022-01-31 10:00:00.000', '2022-01-31 12:00:00.000', 3),
('SmallRoom', '2022-01-31 11:00:00.000', '2022-01-31 14:00:00.000', 3),
('MainHall', '2022-02-01 08:00:00.000', NULL, 5),
('MainHall', '2022-02-01 08:00:00.000', '2022-02-01 10:00:00.000', 5),
('MainHall', '2022-02-01 09:00:00.000', '2022-02-01 11:00:00.000', 2),
('MainHall', '2022-02-01 12:00:00.000', '2022-02-01 14:00:00.000', 2)
这就是2022-01-31年主大厅的第一批作品应该是什么样子的:
-- A |-------------------
-- B |-----|
-- C |-----|
-- D |-----|
这就是我想要得到的结果:
Day | Room | MaxNumberOfConcurrentLoggedInPeople
------------+-------------+-------------------------------------
2022-01-31 | MainHall | 12
2022-01-31 | SmallRoom | 9
2022-02-01 | MainHall | 17
2022-02-01 | SmallRoom | 3
这是我的问题:
SELECT ct.RoomName,
ct.DatePeak,
ct.RegistrationPeak
FROM
(
SELECT cte.RoomName,
CONVERT(DATE, cte.DatePeak) AS DatePeak,
MAX(cte.RegistrationPeak) AS RegistrationPeak
FROM
(
SELECT MarkedDateTime.RoomName,
SUM(SUM(MarkedDateTime.NumberOfPeople)) OVER (PARTITION BY MarkedDateTime.RoomName ORDER BY MarkedDateTime.DatePeak) AS RegistrationPeak,
CONVERT(DATE, MarkedDateTime.DatePeak) AS DatePeak
FROM
(
SELECT RoomName,
NumberOfPeople,
LogIn AS DatePeak
FROM Registrations
WHERE (LogOut >= '2022-01-31 00:00:00.000' OR LogOut IS NULL)
AND LogIn < DATEADD(DAY, 1, '2022-02-01 00:00:00.000')
UNION ALL
SELECT RoomName,
-NumberOfPeople,
COALESCE(LogOut, DATEADD(MILLISECOND, - 3, DATEADD(DAY, 1, '2022-02-01 00:00:00.000'))) AS DatePeak
FROM Registrations
WHERE (LogOut >= '2022-01-31 00:00:00.000' OR LogOut IS NULL)
AND LogIn < DATEADD(DAY, 1, '2022-02-01 00:00:00.000')
) AS MarkedDateTime
GROUP BY RoomName, DatePeak
) AS cte
GROUP BY DatePeak, RoomName
) AS ct
WHERE ct.DatePeak <= '2022-02-01 00:00:00.000'
AND ct.DatePeak >= '2022-01-31 00:00:00.000'
编辑:我添加了解决方案和一个小的视觉帮助。2.编辑:修复了提供的测试表中的错误日期。
您的数据似乎与预期输出不太匹配,有些数据甚至没有意义(登录前注销(,但我认为您可以这样解决:
- 将登录和注销取消预览为单独的事件,数量分别为正或负。排除null
- 计算数量的运行总和,按房间和日期划分,按时间排序
- 然后简单地按房间和日期分组,取运行总和的最大值
SELECT
evnt.Day,
evnt.RoomName,
MaxNumberOfConcurrentLoggedInPeople = MAX(evnt.RunningSum)
FROM (
SELECT
r.RoomName,
Day = CAST(v.EventDateTime AS date),
v.DiffQuantity,
RunningSum = SUM(v.DiffQuantity) OVER (PARTITION BY r.RoomName, CAST(v.EventDateTime AS date) ORDER BY v.EventDateTime ROWS UNBOUNDED PRECEDING)
FROM Registrations r
CROSS APPLY (VALUES
(r.LogIn, r.NumberOfPeople),
(r.LogOut, -r.NumberOfPeople)
) v(EventDateTime, DiffQuantity)
WHERE v.EventDateTime IS NOT NULL
) evnt
GROUP BY
evnt.RoomName,
evnt.Day;
db<gt;小提琴
下面有很多代码。我对它进行了评论,以解释它在做什么,但总的来说:我正在计算房间使用的每一秒。这意味着这份报告的准确性下降到了一秒钟(不亚于一秒钟(。PS我认为你提供的数据有问题,所以我修改了它。
INSERT INTO Registrations (RoomName, LogIn, LogOut, NumberOfPeople)
VALUES ('MainHall', '20220131 08:00:00.000', NULL, 5),
('MainHall', '20220131 08:00:00.000', '20220131 10:00:00.000', 5),
('MainHall', '20220131 09:00:00.000', '20220131 11:00:00.000', 2),
('MainHall', '20220131 12:00:00.000', '20220131 14:00:00.000', 2),
('SmallRoom', '20220131 08:00:00.000', NULL, 3),
('SmallRoom', '20220131 10:00:00.000', '20220131 12:00:00.000', 3),
('SmallRoom', '20220131 11:00:00.000', '20220131 14:00:00.000', 3),
('MainHall', '20220201 08:00:00.000', NULL, 5),
('MainHall', '20220201 08:00:00.000', '20220201 10:00:00.000', 5),
('MainHall', '20220201 09:00:00.000', '20220201 11:00:00.000', 2),
('MainHall', '20220201 12:00:00.000', '20220201 14:00:00.000', 2)
-- tally table so we can calculate all the dates we need
SELECT TOP 86400 -- seconds in a day
IDENTITY(INT,1,1) AS N
INTO #Tally
FROM master.sys.all_columns ac1
CROSS JOIN master.sys.all_columns ac2
;
-- need to figure out how how many days we want to process
DECLARE @StartDate DATETIME
DECLARE @EndDate DATETIME
DECLARE @NumberOfDays INT
SELECT @StartDate = min(dateadd(day, datediff(day,0,d),0)),
@EndDate = max(dateadd(day, datediff(day,0,d),0))
FROM ( SELECT MIN(LogIn) d
FROM Registrations
UNION
SELECT MAX(LogIn)
FROM Registrations
UNION
SELECT MIN(coalesce(LogOut, getdate()))
FROM Registrations
UNION
SELECT MAX(coalesce(LogOut, getdate()))
FROM Registrations ) x
SET @NumberOfDays = DATEDIFF( DAY, @StartDate, @EndDate ) + 1
--SELECT @StartDate, @EndDate, @NumberOfDays
IF(SELECT OBJECT_ID('tempdb..#Days')) IS NOT NULL
DROP TABLE #Days
CREATE TABLE #Days(N INT, D DATETIME);
-- The cross join is slow so, if there are only a few days dont produce a cartesean product
IF((SELECT COUNT(1) FROM #Tally) >= @NumberOfDays) BEGIN
INSERT INTO #Days(N,D)
SELECT [N] = T1.N ,
[D] = DATEADD(DAY, T1.N-1, @StartDate)
FROM #Tally T1
WHERE T1.N <= @NumberOfDays
ORDER BY T1.N
END
ELSE BEGIN
INSERT INTO #Days(N,D)
SELECT [N] = ROW_NUMBER() OVER(ORDER BY T1.N, T2.N) ,
[D] = DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY T1.N, T2.N)-1, @StartDate)
FROM #Tally T1
CROSS JOIN #Tally T2
END
--SELECT * FROM #Days
-- Ensure there are no missing rooms or days
; WITH base as (
SELECT DISTINCT r.RoomName, d.D
FROM Registrations r
CROSS JOIN #Days d
)
--SELECT * FROM base ORDER BY 1,2 /*
-- calculate all the times down to the second that a room can be in use
, RoomSlot as (
SELECT b.RoomName, DATEADD(SECOND, t.N -1, b.D) D
FROM base b
CROSS JOIN #Tally t
)
--SELECT * FROM RoomSlot ORDER BY 1,2 /*
-- find each second a room is in use
, data as (
SELECT rs.*, r.NumberOfPeople, r.id, r.[LogIn], r.[Logout]
FROM RoomSlot rs
INNER JOIN Registrations r ON r.RoomName = rs.RoomName
AND r.LogIn <= rs.D
AND COALESCE(r.LogOut, GETDATE()) > rs.D
)
--SELECT * FROM data ORDER BY 1,2 /*
-- calculate all the people using a room for each second it is in use
, results as (
SELECT RoomName, D, SUM(NumberOfPeople) NumberOfPeople
FROM data
GROUP BY RoomName, D
)
--SELECT * FROM results ORDER BY 1,2,3 /*
-- get the max usage
SELECT RoomName, DATEADD(DAY, DATEDIFF(DAY,0,D),0), MAX(NumberOfPeople)
FROM results
GROUP BY RoomName, DATEADD(DAY, DATEDIFF(DAY,0,D),0)
ORDER BY DATEADD(DAY, DATEDIFF(DAY,0,D),0), RoomName
-- */ -- */ -- */ -- */ -- */
/*
Actual output
Day | Room | MaxNumberOfConcurrentLoggedInPeople
------------+-------------+-------------------------------------
2022-01-31 | MainHall | 12
2022-01-31 | SmallRoom | 9
2022-02-01 | MainHall | 17
2022-02-01 | SmallRoom | 3
*/