如何提取元素account_code
的所有值?下面的SELECT
语句允许我提取与索引[x]
关联的任何单个值,但我希望提取所有值(每个值都在自己的行中(,以便输出为:
account_codes
------------
1
2
3
SELECT
JSON_EXTRACT_SCALAR(v, '$.accounting[0].account_code') AS account_codes
FROM (VALUES JSON '
{"accounting":
[
{"account_code": "1", "account_name": "Travel"},
{"account_code": "2", "account_name": "Salary"},
{"account_code": "3", "account_name": "Equipment"},
]
}'
) AS t(v)
您需要使用的运算符是unnest,它将压平数组并获取所有列值。下面是我用来创建表和获取所有帐户代码的配置单元目录中的查询和DDL
DDL:
CREATE EXTERNAL TABLE `sf_73515497`(
`accounting` array<struct<account_code:string,account_name:string>> COMMENT 'from deserializer')
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'paths'='accounting')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://path-to-json-prefix/'
带有unnest:的SQL
WITH dataset AS (
SELECT accounting from "sf_73515497"
)
SELECT t.accounts.account_code FROM dataset
CROSS JOIN UNNEST(accounting) as t(accounts)