太数据与波斯特格雷斯之间的 AVG 窗口函数值的差异



在Teradata和Postgresql中作为窗口函数应用时,AVG值存在差异。请在下面找到代码

create table tab1
(pop integer,
act  varchar(100),
rate numeric(38,15),
year_month date,
a_obs  integer,
cat1 varchar(1000)
);
insert into tab1 values(19150551,'SAMPLE',0.002047878413524,'2017-09-01 00:00:00',39218,'CONSUMERDecrease');
insert into tab1 values(19092796,'SAMPLE',0.014806317524159,'2017-09-01 00:00:00',282694,'CONSUMERIncrease');
insert into tab1 values(1611642,'SAMPLE',0.001293091145552,'2017-09-01 00:00:00',2084,'OPENDecrease');
insert into tab1 values(1626776,'SAMPLE',0.010981843843283,'2017-09-01 00:00:00',17865,'OPENIncrease');
insert into tab1 values(19257622,'SAMPLE',0.002375007672287,'2017-10-01 00:00:00',45737,'CONSUMERDecrease');
insert into tab1 values(19263407,'SAMPLE',0.011992842179994,'2017-10-01 00:00:00',231023,'CONSUMERIncrease');
insert into tab1 values(1623765,'SAMPLE',0.001870344538773,'2017-10-01 00:00:00',3037,'OPENDecrease');
insert into tab1 values(1633775,'SAMPLE',0.008691527291090,'2017-10-01 00:00:00',14200,'OPENIncrease');
insert into tab1 values(19291826,'SAMPLE',0.002548384999948,'2017-11-01 00:00:00',49163,'CONSUMERDecrease');
insert into tab1 values(19364688,'SAMPLE',0.018412586869461,'2017-11-01 00:00:00',356554,'CONSUMERIncrease');
insert into tab1 values(1626310,'SAMPLE',0.001650976751050,'2017-11-01 00:00:00',2685,'OPENDecrease');
insert into tab1 values(1643574,'SAMPLE',0.010273343335925,'2017-11-01 00:00:00',16885,'OPENIncrease');
insert into tab1 values(19521183,'SAMPLE',0.002472135013539,'2017-12-01 00:00:00',48259,'CONSUMERDecrease');
insert into tab1 values(19607991,'SAMPLE',0.017408718720852,'2017-12-01 00:00:00',341350,'CONSUMERIncrease');
insert into tab1 values(1631794,'SAMPLE',0.001417458331137,'2017-12-01 00:00:00',2313,'OPENDecrease');
insert into tab1 values(1654019,'SAMPLE',0.009217548286930,'2017-12-01 00:00:00',15246,'OPENIncrease');
insert into tab1 values(19649539,'SAMPLE',0.002312878688910,'2018-01-01 00:00:00',45447,'CONSUMERDecrease');
insert into tab1 values(19735372,'SAMPLE',0.014637930311119,'2018-01-01 00:00:00',288885,'CONSUMERIncrease');
insert into tab1 values(1629580,'SAMPLE',0.001304016985972,'2018-01-01 00:00:00',2125,'OPENDecrease');
insert into tab1 values(1657584,'SAMPLE',0.008844197337812,'2018-01-01 00:00:00',14660,'OPENIncrease');
insert into tab1 values(19697940,'SAMPLE',0.002322831727582,'2018-02-01 00:00:00',45755,'CONSUMERDecrease');
insert into tab1 values(19758091,'SAMPLE',0.014947243638062,'2018-02-01 00:00:00',295329,'CONSUMERIncrease');
insert into tab1 values(1636323,'SAMPLE',0.001345088958598,'2018-02-01 00:00:00',2201,'OPENDecrease');
insert into tab1 values(1652098,'SAMPLE',0.008980702113313,'2018-02-01 00:00:00',14837,'OPENIncrease');
insert into tab1 values(20303042,'SAMPLE',0.002804899876580,'2018-03-01 00:00:00',56948,'CONSUMERDecrease');
insert into tab1 values(20285060,'SAMPLE',0.013430327541550,'2018-03-01 00:00:00',272435,'CONSUMERIncrease');
insert into tab1 values(1647063,'SAMPLE',0.001465031999383,'2018-03-01 00:00:00',2413,'OPENDecrease');
insert into tab1 values(1661797,'SAMPLE',0.008485392620158,'2018-03-01 00:00:00',14101,'OPENIncrease');
insert into tab1 values(20430539,'SAMPLE',0.002837663754246,'2018-04-01 00:00:00',57975,'CONSUMERDecrease');
insert into tab1 values(20437053,'SAMPLE',0.013785451356416,'2018-04-01 00:00:00',281734,'CONSUMERIncrease');
insert into tab1 values(1659544,'SAMPLE',0.001523310017692,'2018-04-01 00:00:00',2528,'OPENDecrease');
insert into tab1 values(1675019,'SAMPLE',0.007977819953087,'2018-04-01 00:00:00',13363,'OPENIncrease');
insert into tab1 values(20646020,'SAMPLE',0.002641380760069,'2018-05-01 00:00:00',54534,'CONSUMERDecrease');
insert into tab1 values(20601704,'SAMPLE',0.015858202797206,'2018-05-01 00:00:00',326706,'CONSUMERIncrease');
insert into tab1 values(1660905,'SAMPLE',0.001308322872169,'2018-05-01 00:00:00',2173,'OPENDecrease');
insert into tab1 values(1684229,'SAMPLE',0.010146482455771,'2018-05-01 00:00:00',17089,'OPENIncrease');
insert into tab1 values(20790580,'SAMPLE',0.002539130702462,'2018-06-01 00:00:00',52790,'CONSUMERDecrease');
insert into tab1 values(20733151,'SAMPLE',0.011445679433869,'2018-06-01 00:00:00',237305,'CONSUMERIncrease');
insert into tab1 values(1683987,'SAMPLE',0.001293359153010,'2018-06-01 00:00:00',2178,'OPENDecrease');
insert into tab1 values(1690818,'SAMPLE',0.007794452152745,'2018-06-01 00:00:00',13179,'OPENIncrease');
insert into tab1 values(20920348,'SAMPLE',0.002633034593880,'2018-07-01 00:00:00',55084,'CONSUMERDecrease');
insert into tab1 values(20821752,'SAMPLE',0.013583487114821,'2018-07-01 00:00:00',282832,'CONSUMERIncrease');
insert into tab1 values(1684491,'SAMPLE',0.001687156535713,'2018-07-01 00:00:00',2842,'OPENDecrease');
insert into tab1 values(1695645,'SAMPLE',0.013209722553954,'2018-07-01 00:00:00',22399,'OPENIncrease');
insert into tab1 values(20962139,'SAMPLE',0.002833489464028,'2018-08-01 00:00:00',59396,'CONSUMERDecrease');
insert into tab1 values(20974476,'SAMPLE',0.013324862084755,'2018-08-01 00:00:00',279482,'CONSUMERIncrease');
insert into tab1 values(1687619,'SAMPLE',0.001673363478368,'2018-08-01 00:00:00',2824,'OPENDecrease');
insert into tab1 values(1705427,'SAMPLE',0.023551872932702,'2018-08-01 00:00:00',40166,'OPENIncrease');
insert into tab1 values(21186671,'SAMPLE',0.003380616048647,'2018-09-01 00:00:00',71624,'CONSUMERDecrease');
insert into tab1 values(21118710,'SAMPLE',0.011539672640990,'2018-09-01 00:00:00',243703,'CONSUMERIncrease');
insert into tab1 values(1697221,'SAMPLE',0.001652701681160,'2018-09-01 00:00:00',2805,'OPENDecrease');
insert into tab1 values(1716502,'SAMPLE',0.010922212732639,'2018-09-01 00:00:00',18748,'OPENIncrease');
insert into tab1 values(21192289,'SAMPLE',0.003490703623379,'2018-10-01 00:00:00',73976,'CONSUMERDecrease');
insert into tab1 values(21247435,'SAMPLE',0.012780225001277,'2018-10-01 00:00:00',271547,'CONSUMERIncrease');
insert into tab1 values(1713316,'SAMPLE',0.001665775607068,'2018-10-01 00:00:00',2854,'OPENDecrease');
insert into tab1 values(1731032,'SAMPLE',0.008466625689184,'2018-10-01 00:00:00',14656,'OPENIncrease');
insert into tab1 values(21418469,'SAMPLE',0.003610855659198,'2018-11-01 00:00:00',77339,'CONSUMERDecrease');
insert into tab1 values(21341613,'SAMPLE',0.014239551621520,'2018-11-01 00:00:00',303895,'CONSUMERIncrease');
insert into tab1 values(1728070,'SAMPLE',0.001842517953555,'2018-11-01 00:00:00',3184,'OPENDecrease');
insert into tab1 values(1740742,'SAMPLE',0.008774419184463,'2018-11-01 00:00:00',15274,'OPENIncrease');
insert into tab1 values(21357655,'SAMPLE',0.003238230039768,'2018-12-01 00:00:00',69161,'CONSUMERDecrease');
insert into tab1 values(21405448,'SAMPLE',0.013063076278525,'2018-12-01 00:00:00',279621,'CONSUMERIncrease');
insert into tab1 values(1766407,'SAMPLE',0.003247835861158,'2018-12-01 00:00:00',5737,'OPENDecrease');
insert into tab1 values(1774356,'SAMPLE',0.009075968971277,'2018-12-01 00:00:00',16104,'OPENIncrease');
insert into tab1 values(21480474,'SAMPLE',0.003105937047758,'2019-01-01 00:00:00',66717,'CONSUMERDecrease');
insert into tab1 values(21503589,'SAMPLE',0.011378472681932,'2019-01-01 00:00:00',244678,'CONSUMERIncrease');
insert into tab1 values(1785520,'SAMPLE',0.003119539405887,'2019-01-01 00:00:00',5570,'OPENDecrease');
insert into tab1 values(1791290,'SAMPLE',0.008353756231543,'2019-01-01 00:00:00',14964,'OPENIncrease');
insert into tab1 values(21540829,'SAMPLE',0.003581477760211,'2019-02-01 00:00:00',77148,'CONSUMERDecrease');
insert into tab1 values(21591541,'SAMPLE',0.010447934216460,'2019-02-01 00:00:00',225587,'CONSUMERIncrease');
insert into tab1 values(1797190,'SAMPLE',0.002124427578609,'2019-02-01 00:00:00',3818,'OPENDecrease');
insert into tab1 values(1807594,'SAMPLE',0.007535431075784,'2019-02-01 00:00:00',13621,'OPENIncrease');
select rate,AVG(rate) OVER (PARTITION BY category ORDER BY year_month) avg,category 
from tab1;

当相同的数据在 postgres 中可用时,avg 值与 teradata 不匹配。我需要在 Postgres 中匹配的值

有两个潜在的问题。

在这种情况下,Teradata 不遵循标准 SQL,这要求ORDER BY默认为 RANGE UNBOUNDED PRECEDING(TD 不支持(。您需要添加一个显式ROWS UNBOUNDED PECEDING(只要year_month中没有重复项,否则您将遇到麻烦(。

Teradata(与多个 DBMS 一样(以 FLOAT 的形式返回 AVG 的结果,其精度为 15 位。因此,不匹配可能意味着 PG 返回更高的精度。在这种情况下,您需要手动计算平均值,即总和/计数:

Sum(Cast(rate AS number))
Over (PARTITION BY category 
      ORDER BY year_month
      ROWS Unbounded Preceding)
/
Row_Number()
Over (PARTITION BY category 
      ORDER BY year_month)

numberdecimal/numeric 都有 38 位精度,但对于一个数字,您不必指定确切的精度,因此这是与 PG 结果最接近的可能匹配。当然,您可以将结果转换为固定精度,例如 cast(... as decimal(38,20)) .

最新更新