我正在使用一些当前以 1 分钟间隔存储的数据,如下所示:
CREATE TABLE #MinuteData
(
[Id] INT ,
[MinuteBar] DATETIME ,
[Open] NUMERIC(12, 6) ,
[High] NUMERIC(12, 6) ,
[Low] NUMERIC(12, 6) ,
[Close] NUMERIC(12, 6)
);
INSERT INTO #MinuteData
( [Id], [MinuteBar], [Open], [High], [Low], [Close] )
VALUES ( 1, '2015-01-01 17:00:00', 1.557870, 1.557880, 1.557870, 1.557880 ),
( 2, '2015-01-01 17:01:00', 1.557900, 1.557900, 1.557880, 1.557880 ),
( 3, '2015-01-01 17:02:00', 1.557960, 1.558070, 1.557960, 1.558040 ),
( 4, '2015-01-01 17:03:00', 1.558080, 1.558100, 1.558040, 1.558050 ),
( 5, '2015-01-01 17:04:00', 1.558050, 1.558100, 1.558020, 1.558030 ),
( 6, '2015-01-01 17:05:00', 1.558580, 1.558710, 1.557870, 1.557950 ),
( 7, '2015-01-01 17:06:00', 1.557910, 1.558120, 1.557910, 1.557990 ),
( 8, '2015-01-01 17:07:00', 1.557940, 1.558250, 1.557940, 1.558170 ),
( 9, '2015-01-01 17:08:00', 1.558140, 1.558200, 1.558080, 1.558120 ),
( 10, '2015-01-01 17:09:00', 1.558110, 1.558140, 1.557970, 1.557970 );
SELECT *
FROM #MinuteData;
DROP TABLE #MinuteData;
这些值跟踪货币汇率,因此对于每个分钟间隔 (条形),分钟开始时有Open
价格,分钟结束时有Close
价格。High
和Low
值表示每分钟内的最高和最低速率。
期望的输出
我希望将此数据重新格式化为 5 分钟间隔以产生以下输出:
MinuteBar Open Close Low High
2015-01-01 17:00:00.000 1.557870 1.558030 1.557870 1.558100
2015-01-01 17:05:00.000 1.558580 1.557970 1.557870 1.558710
这将取 5 中第一分钟的Open
值,从 5 的最后一分钟获取Close
值。High
和Low
值表示 5 分钟内的最高high
和最低low
率。
当前解决方案
我有一个解决方案可以做到这一点(如下),但它感觉不优雅,因为它依赖于id
价值观和自我加入。此外,我打算在更大的数据集上运行它,因此如果可能的话,我希望以更有效的方式进行操作:
-- Create a column to allow grouping in 5 minute Intervals
SELECT Id, MinuteBar, [Open], High, Low, [Close],
DATEDIFF(MINUTE, '2015-01-01T00:00:00', MinuteBar)/5 AS Interval
INTO #5MinuteData
FROM #MinuteData
ORDER BY minutebar
-- Group by inteval and aggregate prior to self join
SELECT Interval ,
MIN(MinuteBar) AS MinuteBar ,
MIN(Id) AS OpenId ,
MAX(Id) AS CloseId ,
MIN(Low) AS Low ,
MAX(High) AS High
INTO #DataMinMax
FROM #5MinuteData
GROUP BY Interval;
-- Self join to get the Open and Close values
SELECT t1.Interval ,
t1.MinuteBar ,
tOpen.[Open] ,
tClose.[Close] ,
t1.Low ,
t1.High
FROM #DataMinMax t1
INNER JOIN #5MinuteData tOpen ON tOpen.Id = OpenId
INNER JOIN #5MinuteData tClose ON tClose.Id = CloseId;
DROP TABLE #DataMinMax
DROP TABLE #5MinuteData
返工尝试
而不是上述查询,我一直在考虑使用 FIRST_VALUE 和 LAST_VALUE,因为它似乎是我想要的,但我无法完全让它与我正在做的分组一起工作。可能有比我正在尝试做的更好的解决方案,所以我愿意接受建议。目前我正在尝试这样做:
SELECT MIN(MinuteBar) MinuteBar5 ,
FIRST_VALUE([Open]) OVER (ORDER BY MinuteBar) AS Opening,
MAX(High) AS High ,
MIN(Low) AS Low ,
LAST_VALUE([Close]) OVER (ORDER BY MinuteBar) AS Closing ,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval
FROM #MinuteData
GROUP BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5
这给了我以下错误,如果我删除这些行,则与查询运行时的FIRST_VALUE
和LAST_VALUE
有关:
列 '#MinuteData.MinuteBar' 在选择列表中无效,因为它不包含在聚合函数或 GROUP BY 子句中。
SELECT
MIN(MinuteBar) AS MinuteBar5,
Opening,
MAX(High) AS High,
MIN(Low) AS Low,
Closing,
Interval
FROM
(
SELECT FIRST_VALUE([Open]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar) AS Opening,
FIRST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar DESC) AS Closing,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval,
*
FROM #MinuteData
) AS T
GROUP BY Interval, Opening, Closing
接近您当前解决方案的解决方案。有两个地方你做错了。
FIRST_VALUE AND LAST_VALUE 是分析函数,它们在窗口或分区上工作,而不是在组上工作。您可以单独运行嵌套查询并查看其结果。
LAST_VALUE 是当前窗口的最后一个值,未在查询中指定,默认窗口是从当前分区的第一行到当前行的行。您可以按降序使用FIRST_VALUE,也可以指定窗口
LAST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Closing,
这里有一种方法可以在没有临时表的情况下做到这一点:
;WITH CTEInterval AS
( -- This replaces your first temporary table (#5MinuteData)
SELECT [Id],
[MinuteBar],
[Open],
[High],
[Low],
[Close],
DATEPART(MINUTE, MinuteBar)/5 AS Interval
FROM #MinuteData
), CTEOpenClose as
( -- this is instead of your second temporary table (#DataMinMax)
SELECT [Id],
[MinuteBar],
FIRST_VALUE([Open]) OVER (PARTITION BY Interval ORDER BY MinuteBar) As [Open],
[High],
[Low],
FIRST_VALUE([Close]) OVER (PARTITION BY Interval ORDER BY MinuteBar DESC) As [Close],
Interval
FROM CTEInterval
)
-- This is the final select
SELECT MIN([MinuteBar]) as [MinuteBar],
AVG([Open]) as [Open], -- All values of [Open] in the same interval are the same...
AVG([Close]) as [Close], -- All values of [Close] in the same interval are the same...
MIN([Low]) as [Low],
MAX([High]) as [High]
FROM CTEOpenClose
GROUP BY Interval
结果:
MinuteBar Open Close Low High
2015-01-01 17:00:00.000 1.557870 1.558030 1.557870 1.558100
2015-01-01 17:05:00.000 1.558580 1.557970 1.557870 1.558710
演示在这里
;with cte
as
(--this can be your permanent table with intervals ,rather than generating on fly
select cast('2015-01-01 17:00:00.000' as datetime) as interval,dateadd(mi,5,'2015-01-01 17:00:00.000') as nxtinterval
union all
select dateadd(mi,5,interval),dateadd(mi,5,nxtinterval) from cte
where interval<='2015-01-01 17:45:00.000'
)
,finalcte
as
(select minutebar,
low,high,
dense_rank() over (order by interval,nxtinterval) as grpd,
last_value([close]) over ( partition by interval,nxtinterval order by interval,nxtinterval) as [close],
first_value([open]) over (partition by interval,nxtinterval order by interval,nxtinterval) as [open]
from cte c
join
#minutedata m
on m.minutebar between interval and nxtinterval
)
select
min(minutebar) as minutebar,
min(low) as 'low',
max(high) as 'High',
max([open]) as 'open',
max([close]) as 'close'
from finalcte
group by grpd