我在表中存储了JSON对象,我正在尝试编写一个查询以从该JSON中获取第一个元素。
复制脚本
create table staging.par.test_json (id int, val varchar(2000));
insert into staging.par.test_json values (1, '{"list":[{"element":"Plumber"},{"element":"Craft"},{"element":"Plumbing"},{"element":"Electrics"},{"element":"Electrical"},{"element":"Tradesperson"},{"element":"Home services"},{"element":"Housekeepings"},{"element":"Electrical Goods"}]}');
insert into staging.par.test_json values (2,'
{
"list": [
{
"element": "Wholesale jeweler"
},
{
"element": "Fashion"
},
{
"element": "Industry"
},
{
"element": "Jewelry store"
},
{
"element": "Business service"
},
{
"element": "Corporate office"
}
]
}');
with cte_get_cats AS
(
select id,
val as category_list
from staging.par.test_json
),
cats_parse AS
(
select id,
parse_json(category_list) as c
from cte_get_cats
),
distinct_cats as
(
select id,
INDEX,
UPPER(cast(value:element AS varchar)) As c
from
cats_parse,
LATERAL flatten(INPUT => c:"list")
order by 1,2
) ,
cat_array AS
(
SELECT
id,
array_agg(DISTINCT c) AS sds_categories
FROM
distinct_cats
GROUP BY 1
),
sds_cats AS
(
select id,
cast(sds_categories[0] AS varchar) as sds_primary_category
from cat_array
)
select * from sds_cats;
值:类别
{"list":[{"element":"Plumber"},{"element":"Craft"},{"element":"Plumbing"},{"element":"Electrics"},{"element":"Electrical"},{"element":"Tradesperson"},{"element":"Home services"},{"element":"Housekeepings"},{"element":"Electrical Goods"}]}
将其展平为列表给了我
["Plumber","Craft","Plumbing","Electrics","Electrical","Tradesperson","Home services","Housekeepings","Electrical Goods"]
问题:其顺序并不总是相同的。雪花似乎改变了顺序,有时雪花会根据字母表改变顺序。我怎样才能使它成为静态的。我不希望更改顺序。
问题是你使用ARRAY_AGG
的方式:
array_agg(DISTINCT c) AS sds_categories
像这样指定它不会给 Snowflake 关于数组内容应该如何排列的指南。您不应假设数组的创建顺序与其输入记录相同 - 可能会,但不能保证。所以你可能想做
array_agg(DISTINCT c) within group (order by index) AS sds_categories
但这行不通,因为如果您使用 DISTINCT c
,每个c
的 index
值是未知的。也许你不需要DISTINCT
,那么这将起作用
array_agg(c) within group (order by index) AS sds_categories
如果确实需要DISTINCT
,则需要以某种方式将index
与不同的c
值相关联。一种方法是对输入中的index
使用 MIN
函数。这是一个完整的查询
with cte_get_cats AS
(
select id,
val as category_list
from staging.par.test_json
),
cats_parse AS
(
select id,
parse_json(category_list) as c
from cte_get_cats
),
distinct_cats as
(
select id,
MIN(INDEX) AS index,
UPPER(cast(value:element AS varchar)) As c
from
cats_parse,
LATERAL flatten(INPUT => c:"list")
group by 1,3
) ,
cat_array AS
(
SELECT
id,
array_agg(c) within group (order by index) AS sds_categories
FROM
distinct_cats
GROUP BY 1
),
sds_cats AS
(
select id,
cast(sds_categories[0] AS varchar) as sds_primary_category
from cat_array
)
select * from cat_array;