你能告诉我如何用varchar类型进行解析吗?(在Presto和Hive中(
[{u'pageId': u'102', u'title': u'ud648', u'isBrandHomePage': 1, u'active': True, u'pageType': 1, u'type': u'page'}, {u'pageId': u'103', u'title': u'uc804uccb4 uc0c1ud488', u'isBrandHomePage': 0, u'active': True, u'pageType': 2, u'type': u'page'}, {u'pageId': u'104', u'title': u'uae30ud68duc804', u'isBrandHomePage': 0, u'active': True, u'pageType': 0, u'type': u'page'}]
在Hive中解析:将JSON转换为有效JSON,删除[],对记录进行拆分,使用jsontuple:
with mytable as (
select "[{u'pageId': u'102', u'title': u'ud648', u'isBrandHomePage': 1, u'active': True, u'pageType': 1, u'type': u'page'}, {u'pageId': u'103', u'title': u'uc804uccb4 uc0c1ud488', u'isBrandHomePage': 0, u'active': True, u'pageType': 2, u'type': u'page'}, {u'pageId': u'104', u'title': u'uae30ud68duc804', u'isBrandHomePage': 0, u'active': True, u'pageType': 0, u'type': u'page'}]" json
)
select pageId,title,isBrandHomePage,active,pageType,type
from
(
select t.json,
regexp_replace(
regexp_replace(
regexp_replace(
regexp_replace(t.json,'\[|\]',''),
"u\x27|\x27",'"'),
'True','true'),
'False','false') json_fixed
from mytable t
)s
lateral view outer explode(split(json_fixed,'(?<=\}), *(?=\{)')) e as str_struct
lateral view json_tuple(e.str_struct,'pageId','title','isBrandHomePage','active','pageType','type') x as pageId,title,isBrandHomePage,active,pageType,type
结果:
pageid title isbrandhomepage active pagetype type
102 ud648 1 true 1 page
103 uc804uccb4 uc0c1ud488 0 true 2 page
104 uae30ud68duc804 0 true 0 page
在Presto:中解析
with mytable as (
select '[{u''pageId'': u''102'', u''title'': u''ud648'', u''isBrandHomePage'': 1, u''active'': True, u''pageType'': 1, u''type'': u''page''}, {u''pageId'': u''103'', u''title'': u''uc804uccb4 uc0c1ud488'', u''isBrandHomePage'': 0, u''active'': True, u''pageType'': 2, u''type'': u''page''}, {u''pageId'': u''104'', u''title'': u''uae30ud68duc804'', u''isBrandHomePage'': 0, u''active'': True, u''pageType'': 0, u''type'': u''page''}]' json
)
select json_extract(record,'$.pageId') pageId,
json_extract(record,'$.title') title,
json_extract(record,'$.isBrandHomePage') isBrandHomePage,
json_extract(record,'$.active') active,
json_extract(record,'$.pageType') pageType,
json_extract(record,'$.type') type
from
(
select t.json,
regexp_replace(
regexp_replace(
regexp_replace(t.json,'ux27|x27','"'),
'True','true'),
'False','false') json_fixed
from mytable t
)s
CROSS JOIN
UNNEST( CAST(JSON_PARSE(json_fixed) AS ARRAY<JSON>) ) as x(record)
结果:
pageId title isBrandHomePage active pageType type
"102" "ud648" 1 TRUE 1 "page"
"103" "uc804uccb4 uc0c1ud488" 0 TRUE 2 "page"
"104" "uae30ud68duc804" 0 TRUE 0 "page"