我的输入中有两个url格式,需要解析
URL的
abc.com/abcd?id=123
xyz.com/abcd/id123
解析后,我需要将id=123
和id123
存储在数据库中
这是我解析url和的蜂窝查询
insert into table table2
select
CASE
WHEN parse_url(url_domain,'HOST')="abc.com"
THEN
parse_url(url_domain,'HOST') as host,
split(url_domain,'\?id=')[1] as id,
count(*)
from table1
GROUP BY parse_url(url_domain, 'HOST'), split(url_domain,'\?id=')[1]
WHEN parse_url(url_domain,'HOST')="xyz.com"
THEN
parse_url(url_domain,'HOST') as host,
split(url_domain,'\/id')[1] as id,
count(*)
from table1
GROUP BY parse_url(url_domain, 'HOST'), split(url_domain,'\/id=')[1]
END
ORDER BY host, id DESC limit 100;
但当我执行查询时,它会给出以下错误
FAILED: ParseException line 6:33 missing KW_END at 'as' near ']'
我尝试删除id别名和其他组合,但没有工作
信息:我配置单元我不能在Groupby子句中使用别名作为其配置单元限制
split(url_domain,'\?id=')[1] as id
如果我使用GROUP BY id
,它会出现错误,但这可以很好地使用GROUP BY parse_url(url_domain, 'HOST')
因此,我无法将GROUP BY移动到CASE语句之外
更新
insert into table table2
select
CASE
WHEN parse_url(url_domain,'HOST')="abc.com"
THEN
parse_url(url_domain,'HOST') as host, split(url_domain,'\?id=')[1] as id,
WHEN parse_url(url_domain,'HOST')="xyz.com"
THEN
parse_url(url_domain,'HOST') as host, split(url_domain,'\/id')[1] as id,
END
count(*)
from table1
GROUP BY parse_url(url_domain, 'HOST')
ORDER BY host, id DESC limit 100;
相同错误:(
错误日志
NoViableAltException(262@[146:1: selectExpression : ( expression | tableAllColumns );])
at org.antlr.runtime.DFA.noViableAlt(DFA.java:158)
at org.antlr.runtime.DFA.predict(DFA.java:116)
at org.apache.hadoop.hive.ql.parse.HiveParser_SelectClauseParser.selectExpression(HiveParser_SelectClauseParser.java:2882)
at org.apache.hadoop.hive.ql.parse.HiveParser_SelectClauseParser.selectItem(HiveParser_SelectClauseParser.java:2266)
at org.apache.hadoop.hive.ql.parse.HiveParser_SelectClauseParser.selectList(HiveParser_SelectClauseParser.java:1052)
at org.apache.hadoop.hive.ql.parse.HiveParser_SelectClauseParser.selectClause(HiveParser_SelectClauseParser.java:789)
at org.apache.hadoop.hive.ql.parse.HiveParser.selectClause(HiveParser.java:31425)
at org.apache.hadoop.hive.ql.parse.HiveParser.regular_body(HiveParser.java:29083)
at org.apache.hadoop.hive.ql.parse.HiveParser.queryStatement(HiveParser.java:28968)
at org.apache.hadoop.hive.ql.parse.HiveParser.queryStatementExpression(HiveParser.java:28762)
at org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1238)
at org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:938)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:190)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:424)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:342)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1000)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:911)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:259)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:216)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:781)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:675)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:614)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:94)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:55)
at java.lang.reflect.Method.invoke(Method.java:619)
at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
FAILED: ParseException line 6:33 missing KW_END at 'as' near ',' in select expression
line 7:0 cannot recognize input near 'WHEN' 'parse_url' '(' in select expression
我从未使用过hive,但您是否可以在THEN条件下使用看起来像是子查询的内容?我知道在transact-sql中必须有一个值。
最后通过创建两个单独的CASE
语句解决了这个问题,一个用于parse_url
,一个为split_url
。由于hive在GROUP BY
子句中使用别名有问题,因此这是的唯一方法
最终配置单元SQL语句
insert into table table2 select parse_url(url_domain,'HOST') as host,
CASE
WHEN parse_url(url_domain,'HOST')="abc.com"
THEN
parse_url(url_domain,'QUERY','id')
WHEN parse_url(url_domain,'HOST')="def.com"
THEN
parse_url(url_domain,'QUERY','packageName')
WHEN parse_url(url_domain,'HOST')="xyz.com"
THEN
split(split(url_domain,'\/id')[1],'\?')[0]
ELSE
"NULL"
END as appid,
count(*)
from table1
group by parse_url(url_domain,'HOST'),
CASE
WHEN parse_url(url_domain,'HOST')="abc.com"
THEN
parse_url(url_domain,'QUERY','id')
WHEN parse_url(url_domain,'HOST')="def.com"
THEN
parse_url(url_domain,'QUERY','packageName')
WHEN parse_url(url_domain,'HOST')="xyz.com"
THEN
split(split(url_domain,'\/id')[1],'\?')[0]
ELSE
"NULL"
END
order by host,appid DESC LIMIT 100;