HWC在配置单元3管理的表中写入时执行错误



我正在开发一个spark测试应用程序,该应用程序读取外部配置单元表执行一些转换,并使用hive码头连接器写入配置单元管理的表,以测试spark和hive 3之间的连接。

应用程序通过hwc读取表,但当它开始插入时,它会崩溃,并出现以下错误

llap.HiveWarehouseDataSourceWriter - HiveWarehouseDataSourceWriter: com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter@6b6ddc37, msg:Committed File /tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_39_0
2020-03-04 15:06:30.064 [main] INFO  llap.HiveWarehouseDataSourceWriter - Handling write: database:von_onogoro, table:mtm_reconciliation, savemode: Overwrite, tableExists:true, createTable:false, loadData:true
2020-03-04 15:06:30.072 [main] INFO  llap.HiveWarehouseDataSourceWriter - Load data query: LOAD DATA INPATH '/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464' OVERWRITE  INTO TABLE von_onogoro.mtm_reconciliation
2020-03-04 15:06:30.472 [main] INFO  llap.HiveWarehouseDataSourceWriter - Commit job 20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464
2020-03-04 15:06:30.475 [main] ERROR llap.HiveWarehouseDataSourceWriter - Aborted DataWriter job 20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464
2020-03-04 15:06:30.481 [main] ERROR processing.SpringTaskProcessor - Erreur fatale
org.apache.spark.SparkException: Writing job aborted.
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:112) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) ~[spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:664) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:256) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at com.test.sparkHive.computation.reconciliation.mtm.dao.MtmReconciliationDao.saveIntoHive(MtmReconciliationDao.scala:85) ~[onogoro-computation_2.11-1.12.2-SNAPSHOT.jar:?]
at com.test.sparkHive.computation.reconciliation.mtm.MtmReconciliationFeeder.computeAndFeedMtmReconciliation(MtmReconciliationFeeder.scala:122) ~[onogoro-computation_2.11-1.12.2-SNAPSHOT.jar:?]
at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.run(MtmReconciliationProcessor.java:46) ~[onogoro-1.12.3-SNAPSHOT.jar:?]
at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:154) [common-econometrics-core-3.15.29.jar:?]
at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:118) [common-econometrics-core-3.15.29.jar:?]
at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.main(MtmReconciliationProcessor.java:40) [onogoro-1.12.3-SNAPSHOT.jar:?]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_222]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_222]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_222]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_222]
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:900) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:192) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:217) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
Caused by: java.lang.RuntimeException: java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. org.apache.hadoop.hive.ql.metadata.HiveException: Load Data failed for hdfs://hdfs-prince/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_0_0 as the file is not owned by hive and load data is also not ran as hive
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.commit(HiveWarehouseDataSourceWriter.java:172) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:91) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
... 29 more
Caused by: java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. org.apache.hadoop.hive.ql.metadata.HiveException: Load Data failed for hdfs://hdfs-prince/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_0_0 as the file is not owned by hive and load data is also not ran as hive
at shadehive.org.apache.hive.jdbc.HiveStatement.waitForOperationToComplete(HiveStatement.java:401) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at shadehive.org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:266) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at shadehive.org.apache.hive.jdbc.HivePreparedStatement.execute(HivePreparedStatement.java:101) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at org.apache.commons.dbcp2.DelegatingPreparedStatement.execute(DelegatingPreparedStatement.java:94) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at org.apache.commons.dbcp2.DelegatingPreparedStatement.execute(DelegatingPreparedStatement.java:94) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at com.hortonworks.spark.sql.hive.llap.JDBCWrapper.executeUpdate(HS2JDBCWrapper.scala:356) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at com.hortonworks.spark.sql.hive.llap.DefaultJDBCWrapper.executeUpdate(HS2JDBCWrapper.scala) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.handleWriteWithSaveMode(HiveWarehouseDataSourceWriter.java:276) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.commit(HiveWarehouseDataSourceWriter.java:170) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:91) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
... 29 more
org.apache.spark.SparkException: Writing job aborted.
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:112)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:664)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:256)
at com.test.sparkHive.computation.reconciliation.mtm.dao.MtmReconciliationDao.saveIntoHive(MtmReconciliationDao.scala:85)
at com.test.sparkHive.computation.reconciliation.mtm.MtmReconciliationFeeder.computeAndFeedMtmReconciliation(MtmReconciliationFeeder.scala:122)
at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.run(MtmReconciliationProcessor.java:46)
at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:154)
at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:118)
at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.main(MtmReconciliationProcessor.java:40)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:900)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:192)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:217)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.RuntimeException: java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. org.apache.hadoop.hive.ql.metadata.HiveException: Load Data failed for hdfs://hdfs-prince/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_0_0 as the file is not owned by hive and load data is also not ran as hive
at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.commit(HiveWarehouseDataSourceWriter.java:172)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:91)
... 29 more

要在配置单元3中写入的代码:

val hiveHwc=
com.hortonworks.spark.sql.hive.llap.HiveWarehouseBuilder.session(spark).build()
reconciliatinDF.write.format("com.hortonworks.spark.sql.hive.llap.HiveWarehouseConnector")
.option("database", "von_onogoro")
.option("table", "mtm_reconciliation")
.mode(SaveMode.Overwrite)
.save()

@Prince这个错误是权限问题:

文件不归配置单元所有,加载数据也不作为配置单元运行

如果您解决了权限问题,您的查询将完成。如果你有HDP,你应该能够使用Ranger和Hive插件为你的spark用户创建一个策略,使其拥有对Hive的权限。

最新更新