将 GROUP BY 与 FIRST_VALUE 和 LAST_VALUE 一起使用



我正在使用一些当前以 1 分钟间隔存储的数据,如下所示:

CREATE TABLE #MinuteData
(
[Id] INT ,
[MinuteBar] DATETIME ,
[Open] NUMERIC(12, 6) ,
[High] NUMERIC(12, 6) ,
[Low] NUMERIC(12, 6) ,
[Close] NUMERIC(12, 6)
);
INSERT  INTO #MinuteData
( [Id], [MinuteBar], [Open], [High], [Low], [Close] )
VALUES  ( 1, '2015-01-01 17:00:00', 1.557870, 1.557880, 1.557870, 1.557880 ),
( 2, '2015-01-01 17:01:00', 1.557900, 1.557900, 1.557880, 1.557880 ),
( 3, '2015-01-01 17:02:00', 1.557960, 1.558070, 1.557960, 1.558040 ),
( 4, '2015-01-01 17:03:00', 1.558080, 1.558100, 1.558040, 1.558050 ),
( 5, '2015-01-01 17:04:00', 1.558050, 1.558100, 1.558020, 1.558030 ),
( 6, '2015-01-01 17:05:00', 1.558580, 1.558710, 1.557870, 1.557950 ),
( 7, '2015-01-01 17:06:00', 1.557910, 1.558120, 1.557910, 1.557990 ),
( 8, '2015-01-01 17:07:00', 1.557940, 1.558250, 1.557940, 1.558170 ),
( 9, '2015-01-01 17:08:00', 1.558140, 1.558200, 1.558080, 1.558120 ),
( 10, '2015-01-01 17:09:00', 1.558110, 1.558140, 1.557970, 1.557970 );
SELECT  *
FROM    #MinuteData;
DROP TABLE #MinuteData;

这些值跟踪货币汇率,因此对于每个分钟间隔 (条形),分钟开始时有Open价格,分钟结束时有Close价格。HighLow值表示每分钟内的最高和最低速率。

期望的输出

我希望将此数据重新格式化为 5 分钟间隔以产生以下输出:

MinuteBar                Open       Close       Low         High
2015-01-01 17:00:00.000  1.557870   1.558030    1.557870    1.558100
2015-01-01 17:05:00.000  1.558580   1.557970    1.557870    1.558710

这将取 5 中第一分钟的Open值,从 5 的最后一分钟获取Close值。HighLow值表示 5 分钟内的最高high和最低low率。

当前解决方案

我有一个解决方案可以做到这一点(如下),但它感觉不优雅,因为它依赖于id价值观和自我加入。此外,我打算在更大的数据集上运行它,因此如果可能的话,我希望以更有效的方式进行操作:

-- Create a column to allow grouping in 5 minute Intervals
SELECT  Id, MinuteBar, [Open], High, Low, [Close], 
DATEDIFF(MINUTE, '2015-01-01T00:00:00', MinuteBar)/5 AS Interval
INTO    #5MinuteData
FROM    #MinuteData
ORDER BY minutebar
-- Group by inteval and aggregate prior to self join
SELECT  Interval ,
MIN(MinuteBar) AS MinuteBar ,
MIN(Id) AS OpenId ,
MAX(Id) AS CloseId ,
MIN(Low) AS Low ,
MAX(High) AS High
INTO    #DataMinMax
FROM    #5MinuteData
GROUP BY Interval;
-- Self join to get the Open and Close values
SELECT  t1.Interval ,
t1.MinuteBar ,
tOpen.[Open] ,
tClose.[Close] ,
t1.Low ,
t1.High
FROM    #DataMinMax t1
INNER JOIN #5MinuteData tOpen ON tOpen.Id = OpenId
INNER JOIN #5MinuteData tClose ON tClose.Id = CloseId;
DROP TABLE #DataMinMax
DROP TABLE #5MinuteData

返工尝试

而不是上述查询,我一直在考虑使用 FIRST_VALUE 和 LAST_VALUE,因为它似乎是我想要的,但我无法完全让它与我正在做的分组一起工作。可能有比我正在尝试做的更好的解决方案,所以我愿意接受建议。目前我正在尝试这样做:

SELECT  MIN(MinuteBar) MinuteBar5 ,
FIRST_VALUE([Open]) OVER (ORDER BY MinuteBar) AS Opening,
MAX(High) AS High ,
MIN(Low) AS Low ,
LAST_VALUE([Close]) OVER (ORDER BY MinuteBar) AS Closing ,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval
FROM    #MinuteData
GROUP BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5

这给了我以下错误,如果我删除这些行,则与查询运行时的FIRST_VALUELAST_VALUE有关:

列 '#MinuteData.MinuteBar' 在选择列表中无效,因为它不包含在聚合函数或 GROUP BY 子句中。

SELECT 
MIN(MinuteBar) AS MinuteBar5,
Opening,
MAX(High) AS High,
MIN(Low) AS Low,
Closing,
Interval
FROM 
(
SELECT FIRST_VALUE([Open]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar) AS Opening,
FIRST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar DESC) AS Closing,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval,
*
FROM #MinuteData
) AS T
GROUP BY Interval, Opening, Closing

接近您当前解决方案的解决方案。有两个地方你做错了。

  1. FIRST_VALUE AND LAST_VALUE 是分析函数,它们在窗口或分区上工作,而不是在组上工作。您可以单独运行嵌套查询并查看其结果。

  2. LAST_VALUE 是当前窗口的最后一个值,未在查询中指定,默认窗口是从当前分区的第一行到当前行的行。您可以按降序使用FIRST_VALUE,也可以指定窗口

    LAST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 
    ORDER BY MinuteBar 
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Closing,
    

这里有一种方法可以在没有临时表的情况下做到这一点:

;WITH CTEInterval AS 
(  -- This replaces your first temporary table (#5MinuteData)
SELECT  [Id], 
[MinuteBar], 
[Open], 
[High], 
[Low], 
[Close],
DATEPART(MINUTE, MinuteBar)/5 AS Interval
FROM #MinuteData
), CTEOpenClose as 
( -- this is instead of your second temporary table (#DataMinMax)
SELECT  [Id], 
[MinuteBar], 
FIRST_VALUE([Open]) OVER (PARTITION BY Interval ORDER BY MinuteBar) As [Open],
[High],
[Low], 
FIRST_VALUE([Close]) OVER (PARTITION BY Interval ORDER BY MinuteBar DESC) As [Close],
Interval
FROM CTEInterval
)
-- This is the final select
SELECT  MIN([MinuteBar]) as [MinuteBar], 
AVG([Open]) as [Open], -- All values of [Open] in the same interval are the same...
AVG([Close]) as [Close],  -- All values of [Close] in the same interval are the same...
MIN([Low]) as [Low], 
MAX([High]) as [High]
FROM CTEOpenClose
GROUP BY Interval

结果:

MinuteBar                Open       Close       Low         High
2015-01-01 17:00:00.000  1.557870   1.558030    1.557870    1.558100
2015-01-01 17:05:00.000  1.558580   1.557970    1.557870    1.558710

演示在这里

;with cte
as
(--this can be your permanent table with intervals ,rather than generating on fly
select cast('2015-01-01 17:00:00.000' as datetime) as interval,dateadd(mi,5,'2015-01-01 17:00:00.000') as nxtinterval
union all
select dateadd(mi,5,interval),dateadd(mi,5,nxtinterval) from cte
where interval<='2015-01-01 17:45:00.000'
)
,finalcte
as
(select minutebar,
low,high,
dense_rank() over (order by  interval,nxtinterval) as grpd,
last_value([close]) over ( partition by interval,nxtinterval order by interval,nxtinterval) as [close],
first_value([open]) over (partition by interval,nxtinterval order by interval,nxtinterval) as [open]
from cte c
join
#minutedata m
on m.minutebar between interval and nxtinterval
)
select 
min(minutebar) as minutebar,
min(low) as 'low',
max(high) as 'High',
max([open]) as 'open',
max([close]) as 'close'
from finalcte
group by grpd

相关内容

  • 没有找到相关文章

最新更新