如何在varchar类型的json列中进行解析



你能告诉我如何用varchar类型进行解析吗?(在Presto和Hive中(

[{u'pageId': u'102', u'title': u'ud648', u'isBrandHomePage': 1, u'active': True, u'pageType': 1, u'type': u'page'}, {u'pageId': u'103', u'title': u'uc804uccb4 uc0c1ud488', u'isBrandHomePage': 0, u'active': True, u'pageType': 2, u'type': u'page'}, {u'pageId': u'104', u'title': u'uae30ud68duc804', u'isBrandHomePage': 0, u'active': True, u'pageType': 0, u'type': u'page'}]

在Hive中解析:将JSON转换为有效JSON,删除[],对记录进行拆分,使用jsontuple:

with mytable as (
select "[{u'pageId': u'102', u'title': u'ud648', u'isBrandHomePage': 1, u'active': True, u'pageType': 1, u'type': u'page'}, {u'pageId': u'103', u'title': u'uc804uccb4 uc0c1ud488', u'isBrandHomePage': 0, u'active': True, u'pageType': 2, u'type': u'page'}, {u'pageId': u'104', u'title': u'uae30ud68duc804', u'isBrandHomePage': 0, u'active': True, u'pageType': 0, u'type': u'page'}]" json
)
select pageId,title,isBrandHomePage,active,pageType,type
from
(
select t.json, 
regexp_replace(
regexp_replace(
regexp_replace(
regexp_replace(t.json,'\[|\]',''),
"u\x27|\x27",'"'),
'True','true'),
'False','false') json_fixed
from mytable t
)s
lateral view outer explode(split(json_fixed,'(?<=\}), *(?=\{)')) e as str_struct
lateral view json_tuple(e.str_struct,'pageId','title','isBrandHomePage','active','pageType','type') x as pageId,title,isBrandHomePage,active,pageType,type

结果:

pageid  title                 isbrandhomepage   active  pagetype    type
102     ud648                 1                 true    1           page
103     uc804uccb4 uc0c1ud488 0                 true    2           page
104     uae30ud68duc804       0                 true    0           page

在Presto:中解析

with mytable as (
select '[{u''pageId'': u''102'', u''title'': u''ud648'', u''isBrandHomePage'': 1, u''active'': True, u''pageType'': 1, u''type'': u''page''}, {u''pageId'': u''103'', u''title'': u''uc804uccb4 uc0c1ud488'', u''isBrandHomePage'': 0, u''active'': True, u''pageType'': 2, u''type'': u''page''}, {u''pageId'': u''104'', u''title'': u''uae30ud68duc804'', u''isBrandHomePage'': 0, u''active'': True, u''pageType'': 0, u''type'': u''page''}]' json
)
select json_extract(record,'$.pageId') pageId,
json_extract(record,'$.title') title,
json_extract(record,'$.isBrandHomePage') isBrandHomePage,
json_extract(record,'$.active') active,
json_extract(record,'$.pageType') pageType,
json_extract(record,'$.type') type
from
(
select t.json, 
regexp_replace(
regexp_replace(
regexp_replace(t.json,'ux27|x27','"'),
'True','true'),
'False','false') json_fixed
from mytable t
)s
CROSS JOIN
UNNEST( CAST(JSON_PARSE(json_fixed) AS ARRAY<JSON>) ) as x(record)

结果:

pageId  title                   isBrandHomePage active  pageType    type    
"102"   "ud648"                 1               TRUE    1           "page"
"103"   "uc804uccb4 uc0c1ud488" 0               TRUE    2           "page"
"104"   "uae30ud68duc804"       0               TRUE    0           "page"

最新更新