我设置了一个代码,将Google Analytics中的数据从原始GA表加载到经过调整的表中,从而提供更多见解。不知何故,任务没有在定义的3600秒时限内运行。因此,任务被取消,并且没有加载任何数据。
"Statement reached its statement or warehouse timeout of 3,600 second(s) and was canceled."
然后,我使用一个固定的子句以手动方式加载它:WHERE gae."DAY"='2020-03-31'
而不是CCD_ 2。这仍然需要很多时间,但最终会奏效。
如何加快查询速度?或者无论如何,解决我的问题。
CREATE OR REPLACE TASK dm."Website"."x009_002_all_GA_events"
WAREHOUSE = marketing_wh
SCHEDULE = 'USING CRON 26 5 * * * Europe/Berlin'
AS
merge into DM."Website".ALL_GA_EVENTS_CLEAN target --DM."Website".ALL_GA_EVENTS target
using (
SELECT
GAEVENTACTION AS GAEVENTACTION,
GAEVENTCATEGORY AS GAEVENTCATEGORY,
"DAY" AS Datum,
DEVICE_TYPE AS Device,
EVENT_COUNT AS EventCount,
GAUNIQUEEVENTS AS Uniqueevents,
EVENT_VALUE AS eventvalue,
LABELS AS labelz,
URL AS urlz,
--split_part(LABELS,'/_',2) AS "HITSTAMP",
CASE WHEN split_part(LABELS,'/_',2) IS NOT NULL THEN TRY_CAST(split_part(LABELS,'/_',2) AS timestamp) ELSE NULL END AS "HITSTAMP",
split_part(LABELS,'/_',3) AS EVENT_INFO,
split_part(LABELS,'/_',1) AS "SESSIONID",
CASE
WHEN CONTAINS (URL, '/checkout/')=TRUE THEN split_part(URL,'/',3)
WHEN CONTAINS (URL, '/auto/')=TRUE THEN split_part(split_part(URL,'/',3),'?',1)
WHEN CONTAINS (URL, '/angebote/')=TRUE THEN split_part(URL,'/',3)
ELSE 'no vehicle'
END AS vehicleID,
rank() over (partition BY "SESSIONID" order by "HITSTAMP") as "RANK",
CASE
WHEN (GAEVENTACTION= ('pdp_flash_offer_request' )) THEN
CASE WHEN split_part(LABELS,'/_',2)=(SELECT min(split_part(t2.LABELS,'/_',2))
FROM "DL_Datatap"."PUBLIC"."GA_all_events" t2
WHERE split_part(t2.LABELS,'/_',1)=split_part(gae.LABELS,'/_',1) AND gae.GAEVENTACTION=t2.GAEVENTACTION)
THEN TRUE
ELSE FALSE
END
WHEN (GAEVENTACTION= ('chat_started')) THEN
CASE WHEN split_part(LABELS,'/_',2)=(SELECT min(split_part(t2.LABELS,'/_',2))
FROM "DL_Datatap"."PUBLIC"."GA_all_events" t2
WHERE split_part(t2.LABELS,'/_',1)=split_part(gae.LABELS,'/_',1) AND gae.GAEVENTACTION=t2.GAEVENTACTION)
THEN TRUE
ELSE FALSE
END
WHEN (GAEVENTACTION= ('Direct_checkout_send')) THEN
CASE WHEN split_part(LABELS,'/_',2)=(SELECT min(split_part(t2.LABELS,'/_',2))
FROM "DL_Datatap"."PUBLIC"."GA_all_events" t2
WHERE split_part(t2.LABELS,'/_',1)=split_part(gae.LABELS,'/_',1) AND gae.GAEVENTACTION=t2.GAEVENTACTION)
THEN TRUE
ELSE FALSE
END
WHEN (GAEVENTACTION= ('pdp_offer_request')) THEN
CASE WHEN split_part(LABELS,'/_',2)=(SELECT min(split_part(t2.LABELS,'/_',2))
FROM "DL_Datatap"."PUBLIC"."GA_all_events" t2
WHERE split_part(t2.LABELS,'/_',1)=split_part(gae.LABELS,'/_',1) AND gae.GAEVENTACTION=t2.GAEVENTACTION)
THEN TRUE
ELSE FALSE
END
WHEN (GAEVENTACTION= ('agent-requested')) THEN
CASE WHEN split_part(LABELS,'/_',2)=(SELECT min(split_part(t2.LABELS,'/_',2))
FROM "DL_Datatap"."PUBLIC"."GA_all_events" t2
WHERE split_part(t2.LABELS,'/_',1)=split_part(gae.LABELS,'/_',1) AND gae.GAEVENTACTION=t2.GAEVENTACTION)
THEN TRUE
ELSE FALSE
END
WHEN (GAEVENTACTION= ('SERP_softlead_send')) THEN
CASE WHEN split_part(LABELS,'/_',2)=(SELECT min(split_part(t2.LABELS,'/_',2))
FROM "DL_Datatap"."PUBLIC"."GA_all_events" t2
WHERE split_part(t2.LABELS,'/_',1)=split_part(gae.LABELS,'/_',1) AND gae.GAEVENTACTION=t2.GAEVENTACTION)
THEN TRUE
ELSE FALSE
END
ELSE False
END AS "GOAL_EVENT"
FROM "DL_Datatap"."PUBLIC"."GA_all_events" gae
---WHERE gae."DAY"='2020-03-31'
WHERE gae."DAY">=CuRRENT_DATE-1
) SOURCE
ON target.SESSIONID=SOURCE."SESSIONID" AND target.HITSTAMP=SOURCE."HITSTAMP" AND target.EVENT_ACTION=SOURCE.GAEVENTACTION AND target."Date"=SOURCE.Datum
when NOT matched then INSERT (EVENT_ACTION, EVENT_CATEGORY, "Date", DEVICE, TOTAL_EVENTS, UNIQUE_EVENTS, EVENT_VALUE, EVENT_LABEL, URL, HITSTAMP, EVENT_INFO, SESSIONID, VEHICLEID, EVENT_SEQUENCE, GOAL_EVENT)
VALUES (SOURCE.GAEVENTACTION, SOURCE.GAEVENTCATEGORY, SOURCE.Datum, SOURCE.Device, SOURCE.eventcount, SOURCE.Uniqueevents, SOURCE. eventvalue, SOURCE.labelz, SOURCE.urlz, SOURCE."HITSTAMP", SOURCE.EVENT_INFO, SOURCE."SESSIONID", SOURCE.vehicleid, SOURCE."RANK", SOURCE."GOAL_EVENT"
)
您可以增加任务超时限制作为解决方法:
CREATE OR REPLACE TASK dm."Website"."x009_002_all_GA_events"
WAREHOUSE = marketing_wh
SCHEDULE = 'USING CRON 26 5 * * * Europe/Berlin'
USER_TASK_TIMEOUT_MS = 86400000
AS
...
https://docs.snowflake.com/en/sql-reference/sql/create-task.html#optional-参数
如果您需要帮助来调整SQL,请提交一个案例以获得支持。因为他们可以看到你的表的元数据,以及你以前运行的执行计划,他们可以指导你调整查询,集群你的目标表等等