我可以打印SQL,并且可以通过Hive运行,但是我在Spark SQL中遇到了错误。
Exception in thread "main" org.apache.spark.sql.catalyst.parser.ParseException:
mismatched input 'from' expecting {<EOF>, 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'LATERAL', 'WINDOW', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'SORT', 'CLUSTER', 'DISTRIBUTE'}(line 3, pos 11)
== SQL ==
INSERT OVERWRITE table zytrain.archer_odm_cate_info partition(data_date='20190227')
select u.* from
-----------^^^
(SELECT
t.cat_level1_name,t.cat_level1_id,
t.cat_level2_name,t.goods_type,
COUNT(t.goods_id) goods_cnt,
COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
SUM(expo_pv) expo_pv,
SUM(goods_click) goods_click,
SUM(cart_click) cart_click,
SUM(bill_cnt) bill_cnt,
SUM(sales_volume) sales_volume,
SUM(sales_amount) sales_amount,
SUM(gross_profit) gross_profit
FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end goods_type,
b.goods_id,
SUM(b.expo_pv) expo_pv,
SUM(b.goods_click) goods_click,
SUM(b.cart_click) cart_click,
SUM(b.cur_day_bill) bill_cnt,
SUM(b.sales_volume) sales_volume,
SUM(b.sales_amount) sales_amount,
SUM(b.gross_profit) gross_profit
FROM dim.dim_jc_goods a
LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
ON a.goods_id = b.goods_id
AND b.site_id = 600
AND b.data_date BETWEEN '20190212'
AND '20190227'
LEFT OUTER JOIN
(
SELECT DISTINCT a3.goods_id
FROM JOLLY.WHO_product_pool a1,
jolly.who_esoloo_supplier a2,
dim.dim_jc_goods a3
WHERE a1.supp_code = a2.code
AND a2.supplier_genre = 8
AND from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
AND from_unixtime(gmt_created,'yyyyMMdd') <= '20190227'
AND substr(a3.first_on_sale_time,1,10) <= '2019-02-27'
AND a1.goods_id > 0
AND a1.goods_id = a3.goods_id
) t1 ON a.goods_id = t1.goods_id
WHERE substr(a.first_on_sale_time,1,10) BETWEEN '2019-02-13' AND '2019-02-27'
AND substr(a.add_time,1,10) BETWEEN '2019-02-12' AND '2019-02-27'
AND a.cat_level1_id IN (2,324,7574,7576)
GROUP BY a.cat_level1_name,a.cat_level1_id,
a.cat_level2_name,
b.goods_id,case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end) t
GROUP BY t.cat_level1_name,t.cat_level1_id,
thet.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type
union all
SELECT
t.cat_level1_name,t.cat_level1_id,
t.cat_level2_name,t.goods_type,
COUNT(t.goods_id) goods_cnt,
COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
SUM(expo_pv) expo_pv,
SUM(goods_click) goods_click,
SUM(cart_click) cart_click,
SUM(bill_cnt) bill_cnt,
SUM(sales_volume) sales_volume,
SUM(sales_amount) sales_amount,
SUM(gross_profit) gross_profit
FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
'BU_ALL' goods_type,
b.goods_id,
SUM(b.expo_pv) expo_pv,
SUM(b.goods_click) goods_click,
SUM(b.cart_click) cart_click,
SUM(b.cur_day_bill) bill_cnt,
SUM(b.sales_volume) sales_volume,
SUM(b.sales_amount) sales_amount,
SUM(b.gross_profit) gross_profit
FROM dim.dim_jc_goods a
LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
ON a.goods_id = b.goods_id
AND b.site_id = 600
AND b.data_date BETWEEN '20190212'
AND '20190227'
LEFT OUTER JOIN
(
SELECT DISTINCT a3.goods_id
FROM JOLLY.WHO_product_pool a1,
jolly.who_esoloo_supplier a2,
dim.dim_jc_goods a3
WHERE a1.supp_code = a2.code
AND a2.supplier_genre = 8
AND from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
AND from_unixtime(gmt_created,'yyyyMMdd') <= '20190227'
AND substr(a3.first_on_sale_time,1,10) <= '2019-02-27'
AND a1.goods_id > 0
AND a1.goods_id = a3.goods_id
) t1 ON a.goods_id = t1.goods_id
WHERE
t1.goods_id is null
AND a.cat_level1_id IN (2,324,7574,7576)
GROUP BY a.cat_level1_name,a.cat_level1_id,
a.cat_level2_name,
b.goods_id) t
GROUP BY t.cat_level1_name,t.cat_level1_id,
t.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type) u
错误输出:
at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:197)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:99)
at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:45)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:53)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592)
at com.ODM.odm_info$.main(odm_info.scala:163)
at com.ODM.odm_info.main(odm_info.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:743)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
val data1=spark.sql(
s"""
|SELECT
|t.cat_level1_name,t.cat_level1_id,
| t.cat_level2_name,t.goods_type,
| COUNT(t.goods_id) goods_cnt,
| COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
| SUM(expo_pv) expo_pv,
| SUM(goods_click) goods_click,
| SUM(cart_click) cart_click,
| SUM(bill_cnt) bill_cnt,
| SUM(sales_volume) sales_volume,
| SUM(sales_amount) sales_amount,
| SUM(gross_profit) gross_profit
|FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
| case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end goods_type,
| b.goods_id,
| SUM(b.expo_pv) expo_pv,
| SUM(b.goods_click) goods_click,
| SUM(b.cart_click) cart_click,
| SUM(b.cur_day_bill) bill_cnt,
| SUM(b.sales_volume) sales_volume,
| SUM(b.sales_amount) sales_amount,
| SUM(b.gross_profit) gross_profit
| FROM dim.dim_jc_goods a
| LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
| ON a.goods_id = b.goods_id
| AND b.site_id = 600
| AND b.data_date BETWEEN '20190212'
| AND '$v_date1'
| LEFT OUTER JOIN
| (
| SELECT DISTINCT a3.goods_id
| FROM JOLLY.WHO_product_pool a1,
| jolly.who_esoloo_supplier a2,
| dim.dim_jc_goods a3
| WHERE a1.supp_code = a2.code
| AND a2.supplier_genre = 8
| AND from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
| AND from_unixtime(gmt_created,'yyyyMMdd') <= '$v_date1'
| AND substr(a3.first_on_sale_time,1,10) <= '$v_date2'
| AND a1.goods_id > 0
| AND a1.goods_id = a3.goods_id
| ) t1 ON a.goods_id = t1.goods_id
| WHERE substr(a.first_on_sale_time,1,10) BETWEEN '2019-02-13' AND '$v_date2'
| AND substr(a.add_time,1,10) BETWEEN '2019-02-12' AND '$v_date2'
| AND a.cat_level1_id IN (2,324,7574,7576)
| GROUP BY a.cat_level1_name,a.cat_level1_id,
| a.cat_level2_name,
| b.goods_id,case when t1.goods_id is not null then 'IS_ODM' else 'BU_NEWIN' end) t
|GROUP BY t.cat_level1_name,t.cat_level1_id,
| t.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type
""".stripMargin).cache()
val data_2=spark
.sql(
s"""
|SELECT
|t.cat_level1_name,t.cat_level1_id,
| t.cat_level2_name,t.goods_type,
| COUNT(t.goods_id) goods_cnt,
| COUNT(CASE WHEN t.sales_volume > 0 THEN t.goods_id END) saled_goods_cnt,
| SUM(expo_pv) expo_pv,
| SUM(goods_click) goods_click,
| SUM(cart_click) cart_click,
| SUM(bill_cnt) bill_cnt,
| SUM(sales_volume) sales_volume,
| SUM(sales_amount) sales_amount,
| SUM(gross_profit) gross_profit
|FROM (SELECT a.cat_level1_name,a.cat_level1_id,a.cat_level2_name,
| 'BU_ALL' goods_type,
| b.goods_id,
| SUM(b.expo_pv) expo_pv,
| SUM(b.goods_click) goods_click,
| SUM(b.cart_click) cart_click,
| SUM(b.cur_day_bill) bill_cnt,
| SUM(b.sales_volume) sales_volume,
| SUM(b.sales_amount) sales_amount,
| SUM(b.gross_profit) gross_profit
| FROM dim.dim_jc_goods a
| LEFT OUTER JOIN rpt.rpt_sum_goods_daily b
| ON a.goods_id = b.goods_id
| AND b.site_id = 600
| AND b.data_date BETWEEN '20190212'
| AND '$v_date1'
| LEFT OUTER JOIN
| (
| SELECT DISTINCT a3.goods_id
| FROM JOLLY.WHO_product_pool a1,
| jolly.who_esoloo_supplier a2,
| dim.dim_jc_goods a3
| WHERE a1.supp_code = a2.code
| AND a2.supplier_genre = 8
| AND from_unixtime(gmt_created,'yyyyMMdd') >= '20190212'
| AND from_unixtime(gmt_created,'yyyyMMdd') <= '$v_date1'
| AND substr(a3.first_on_sale_time,1,10) <= '$v_date2'
| AND a1.goods_id > 0
| AND a1.goods_id = a3.goods_id
| ) t1 ON a.goods_id = t1.goods_id
| WHERE
| t1.goods_id is null
| AND a.cat_level1_id IN (2,324,7574,7576)
| GROUP BY a.cat_level1_name,a.cat_level1_id,
| a.cat_level2_name,
| b.goods_id) t
|GROUP BY t.cat_level1_name,t.cat_level1_id,
| t.cat_level2_name,t.goods_type order by t.cat_level1_id,t.goods_type
""".stripMargin).cache()
val data_3=data1.union(data_2)
data_3.createOrReplaceTempView("data_3")
spark.sql(
s"""
| INSERT OVERWRITE table zytrain.archer_odm_cate_info partition(data_date='$v_date1')
| select * from data_3
""".stripMargin)
println("end")