雪花云数据平台Merge语句在DML上获取重复错误


create or replace table catalog_page(
cp_catalog_page_sk number,
cp_catalog_page_id text,
cp_start_date_sk number,
cp_end_date_sk number,
cp_department text,
cp_catalog_number number,
cp_catalog_page_number number,
cp_description text,
cp_type text);

create or replace table dem0_json (
o_type text,
cp_catalog_page_sk number,
cp_catalog_page_id text, 
cp_start_date_sk number,
cp_end_date_sk number,
cp_department text, 
cp_catalog_number number,
cp_catalog_page_number number,
cp_description text,
cp_type text);
insert into catalog_page (cp_catalog_page_sk,   cp_catalog_page_id, cp_start_date_sk,   cp_end_date_sk, cp_department,  cp_catalog_number,  cp_catalog_page_number  ,cp_description, cp_type) values
(1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv', 'bi-annual'),
(2, 'AAAAAAAACAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 2, 'English areas will leave prisoners. Too public countries ought to become beneath the years.', 'bi-annual'),
(3, 'AAAAAAAADAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 3, 'Times could not address disabled Indians. Effectively public ports c' , 'bi-annual'),
(6, 'AAAAAAAAGAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 6, 'Exciting principles wish greatly only excellent women. Appropriate fortunes shall not', 'bi-annual'),
(7, 'AAAAAAAAHAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 7, 'National services must not come at least into a girls', 'bi-annual'),
(8, 'AAAAAAAAIAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 8, 'Areas see early for a pounds. New goods study too serious women. Unwittingly sorry incentives shall', 'bi-annual'),
(10, 'AAAAAAAAKAAAAAAA',2450815, 2450996, 'DEPARTMENT', 1, 10, 'Careful, intense funds balance perhaps boys. Romantic chips remove legs. Direct birds get ', 'bi-annual'),
(12, 'AAAAAAAAMAAAAAAA',2450815, 2450996, 'DEPARTMENT', 1, 12, 'Girls indicate so in a countries. Natural, emotional weeks try a', 'bi-annual');

源为json数据

insert into dem0_json (o_type ,CP_CATALOG_PAGE_SK, CP_CATALOG_PAGE_ID, CP_START_DATE_SK, CP_END_DATE_SK, CP_DEPARTMENT, CP_CATALOG_NUMBER, CP_CATALOG_PAGE_NUMBER, CP_DESCRIPTION, CP_TYPE)
values 
('U', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual'),
('I', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual'),
('D', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual'),
('I', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual');

MERGE INTO catalog_page AS t 
USING dem0_json AS s 
ON t.dem0_json = t.dem0_json 
WHEN MATCHED AND s.o_type='U' THEN 
UPDATE 
WHEN MATCHED AND s.o_type='D' THEN 
DELETE 
WHEN MATCHED AND s.o_type='I' THEN 
INSERT

好的,所以问题是你一行一行地思考,而不是Snowflake如何进行合并,因此文档中关于非确定性结果的部分

当合并将目标表中的一行与源中的多行连接时,以下连接条件会产生不确定的结果(即系统无法确定用于更新或删除目标行的源值(:

在这种情况下,合并的结果取决于为ERROR_on_NONDERISTIC_merge会话参数指定的值:

如果为TRUE(默认值(,则合并返回一个错误。如果为FALSE,则从重复项中选择一行执行更新或删除;未定义所选行。

最后一位是提示,snowflake正在"中执行这些操作;一次通过";也就是所有的删除,然后所有的更新,然后所有插入。并且一行仅处于这些步骤中的一个步骤中。

您的数据有4个操作发生在同一行。最糟糕的是,你没有操作顺序键。不存在";行顺序";

你需要的是一个订单,因此喜欢:

insert into dem0_json (o_order, o_type ,CP_CATALOG_PAGE_SK, CP_CATALOG_PAGE_ID, CP_START_DATE_SK, CP_END_DATE_SK, CP_DEPARTMENT, CP_CATALOG_NUMBER, CP_CATALOG_PAGE_NUMBER, CP_DESCRIPTION, CP_TYPE)
values 
(1, 'U', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual'),
(2, 'I', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual'),
(3, 'D', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual'),
(4, 'I', 1, 'AAAAAAAABAAAAAAA', 2450815, 2450996, 'DEPARTMENT', 1, 1, 'In general basic characters welcome. Clearly lively friends conv','bi-annual');

然后你需要把结果折叠起来。

如果最后一个操作是删除,那么它就是删除,而之前的所有操作都无关紧要。

如果更新链应该堆叠(如果仅传输部分值,但它看起来具有"整行"(

如果最后一行是插入,但该行已经存在,那么它应该成为更新。

折叠过程可能需要使用递归CTE。但一旦数据被折叠起来。然后你用它做一次合并。

以下是一些示例折叠测试数据,注释中包含折叠结果(假设只有有效输入(

with dem0_json(o_order, o_type ,CP_CATALOG_PAGE_SK, value) as (
SELECT * FROM values 
(1, 'U', 1, 'A'), -- U

(2, 'I', 2, 'B'), -- I

(3, 'D', 3, 'C'), -- D

(4, 'I', 4, 'D'),   
(5, 'U', 4, 'A'), -- I 5

(6, 'I', 5, 'D'),   
(7, 'D', 5, 'A'), -- no op
(8, 'D', 6, 'D'),   
(9, 'I', 6, 'A'), -- U 5

(10, 'U', 7, 'D'),   
(11, 'D', 7, 'A'), -- D
(12, 'U', 8, 'D'),   
(13, 'U', 8, 'A'), -- U 13
)

相关内容

最新更新