我有一个如下所示的表:
word nb_by_date.date nb_by_date.nb
---------------------------------------
abc 2020-01-01 17
2020-01-06 43
abc 2020-01-01 33
2020-01-05 12
2020-01-06 5
def 2020-01-02 11
2020-01-05 8
def 2020-01-02 1
您可以使用以下方法获得此示例:
WITH t AS (
SELECT "abc" AS word, [STRUCT('2020-01-01' AS date, 17 AS nb), STRUCT('2020-01-06' AS date, 43 AS nb)]
UNION ALL SELECT "abc" AS word, [STRUCT('2020-01-01' AS date, 33 AS nb), STRUCT('2020-01-05' AS date, 12 AS nb), STRUCT('2020-01-06' AS date, 5 AS nb)]
UNION ALL SELECT "def" AS word, [STRUCT('2020-01-02' AS date, 11 AS nb), STRUCT('2020-01-05' AS date, 8 AS nb)]
UNION ALL SELECT "def" AS word, [STRUCT('2020-01-02' AS date, 1 AS nb)]
)
我的目标是获得:
word nb_by_date.date nb_by_date.nb
---------------------------------------
abc 2020-01-01 50
2020-01-05 12
2020-01-06 55
def 2020-01-02 22
2020-01-05 8
这是我的尝试:
SELECT
word,
ARRAY(
SELECT STRUCT(date, SUM(nb))
FROM UNNEST(nb_by_date)
GROUP BY date
ORDER BY date) nb_by_date
FROM (
SELECT word, ARRAY_CONCAT_AGG(nb_by_date) nb_by_date
FROM t
GROUP BY word
)
它适用于这个玩具示例。但是,我有大量的数据,使用ARRAY_CONCAT_AGG(nb_by_date)
会创建一个超过100MB限制的行(无法查询超过100MB的行。(。我如何调整查询以使其即使使用大量数据也能正常工作?
您可以使用两个级别的聚合:
WITH t AS (
SELECT 'abc' AS word, [STRUCT('2020-01-01' AS date, 17 AS nb), STRUCT('2020-01-06' AS date, 43 AS nb)] as ar UNION ALL
SELECT 'abc' AS word, [STRUCT('2020-01-01' AS date, 33 AS nb), STRUCT('2020-01-05' AS date, 12 AS nb), STRUCT('2020-01-06' AS date, 5 AS nb)] UNION ALL
SELECT 'def' AS word, [STRUCT('2020-01-02' AS date, 11 AS nb), STRUCT('2020-01-05' AS date, 8 AS nb)] UNION ALL
SELECT 'def' AS word, [STRUCT('2020-01-02' AS date, 1 AS nb)]
)
select t.word, array_agg(struct( date, nb) order by date) as ar
from (select t.word, el.date, sum(el.nb) as nb
from t cross join
unnest(t.ar) el
group by t.word, el.date
) t
group by word