Spark phoenix read 由于 hbase-spark 依赖项而中断 ClassNotFoundExcept



我正在编写一个简单的Spark程序,用于从Phoenix读取并使用Spark-Hbase-Connector写入HBase。我成功地从凤凰城阅读并单独使用SHC写信给Hbase。但是,当我将所有内容放在一起(特别是添加 hbase-spark 依赖项(时,管道在 Phoenix read 语句处中断。

法典:


import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.datasources.hbase.HBaseTableCatalog
object SparkHbasePheonix {
def main(args: Array[String]): Unit = {
def catalog =
s"""{
|"table":{"namespace":"default", "name":"employee"},
|"rowkey":"key",
|"columns":{
|"key":{"cf":"rowkey", "col":"key", "type":"string"},
|"fName":{"cf":"person", "col":"firstName", "type":"string"},
|"lName":{"cf":"person", "col":"lastName", "type":"string"},
|"mName":{"cf":"person", "col":"middleName", "type":"string"},
|"addressLine":{"cf":"address", "col":"addressLine", "type":"string"},
|"city":{"cf":"address", "col":"city", "type":"string"},
|"state":{"cf":"address", "col":"state", "type":"string"},
|"zipCode":{"cf":"address", "col":"zipCode", "type":"string"}
|}
|}""".stripMargin
val spark: SparkSession = SparkSession.builder()
.master("local[1]")
.appName("HbaseSparkWrite")
.getOrCreate()
val df = spark.read.format("org.apache.phoenix.spark")
.option("table", "ph_employee")
.option("zkUrl", "0.0.0.0:2181")
.load()
df.write.options(
Map(HBaseTableCatalog.tableCatalog -> catalog, HBaseTableCatalog.newTable -> "4"))
.format("org.apache.spark.sql.execution.datasources.hbase")
.save()
}
}

聚 甲醛:

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<scala.tools.version>2.11</scala.tools.version>
<scala.version>2.11.8</scala.version>
<spark.version>2.3.2.3.1.0.31-28</spark.version>
<hbase.version>2.0.2.3.1.0.31-28</hbase.version>
<phoenix.version>5.0.0.3.1.5.9-1</phoenix.version>
</properties>
<!-- Hbase dependencies-->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-spark -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-spark</artifactId>
<version>2.0.2.3.1.0.6-1</version>
</dependency>
<dependency>
<groupId>com.hortonworks</groupId>
<artifactId>shc-core</artifactId>
<version>1.1.1-2.1-s_2.11</version>
</dependency>

<!-- Phoenix dependencies-->
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-client</artifactId>
<version>${phoenix.version}</version>
<exclusions>
<exclusion>
<groupId>org.glassfish</groupId>
<artifactId>javax.el</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-spark</artifactId>
<version>${phoenix.version}</version>
<exclusions>
<exclusion>
<groupId>org.glassfish</groupId>
<artifactId>javax.el</artifactId>
</exclusion>
</exclusions>
</dependency>

例外:

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/client/HConnectionManager
at org.apache.phoenix.query.HConnectionFactory$HConnectionFactoryImpl.createConnection(HConnectionFactory.java:47)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.openConnection(ConnectionQueryServicesImpl.java:396)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.access$300(ConnectionQueryServicesImpl.java:228)
at org.apache.phoenix.query.ConnectionQueryServicesImpl$13.call(ConnectionQueryServicesImpl.java:2374)
at org.apache.phoenix.query.ConnectionQueryServicesImpl$13.call(ConnectionQueryServicesImpl.java:2352)
at org.apache.phoenix.util.PhoenixContextExecutor.call(PhoenixContextExecutor.java:76)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.init(ConnectionQueryServicesImpl.java:2352)
at org.apache.phoenix.jdbc.PhoenixDriver.getConnectionQueryServices(PhoenixDriver.java:232)
at org.apache.phoenix.jdbc.PhoenixEmbeddedDriver.createConnection(PhoenixEmbeddedDriver.java:147)
at org.apache.phoenix.jdbc.PhoenixDriver.connect(PhoenixDriver.java:202)
at java.sql.DriverManager.getConnection(DriverManager.java:664)
at java.sql.DriverManager.getConnection(DriverManager.java:208)
at org.apache.phoenix.mapreduce.util.ConnectionUtil.getConnection(ConnectionUtil.java:98)
at org.apache.phoenix.mapreduce.util.ConnectionUtil.getInputConnection(ConnectionUtil.java:57)
at org.apache.phoenix.mapreduce.util.ConnectionUtil.getInputConnection(ConnectionUtil.java:45)
at org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.getSelectColumnMetadataList(PhoenixConfigurationUtil.java:279)
at org.apache.phoenix.spark.PhoenixRDD.toDataFrame(PhoenixRDD.scala:118)
at org.apache.phoenix.spark.PhoenixRelation.schema(PhoenixRelation.scala:60)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:432)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
at com.test.SparkPheonixToHbase$.main(SparkHbasePheonix.scala:33)
at com.test.SparkPheonixToHbase.main(SparkHbasePheonix.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.client.HConnectionManager
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 24 more
20/05/19 16:57:44 INFO SparkContext: Invoking stop() from shutdown hook

当我添加 hbase-spark 依赖项时,凤凰读取失败。

<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-spark</artifactId>
<version>2.0.2.3.1.0.6-1</version>
</dependency>

如何摆脱此错误?

只需使用其中一个连接器。

如果要读取凤凰表,并且输出表不是凤凰表,而是标准 HBase 表,请仅使用 SHC 或 HBase Spark 连接器。他们可以直接从HBase读取凤凰表,而无需凤凰层。请参阅此处的选项:https://sparkbyexamples.com/hbase/spark-hbase-connectors-which-one-to-use/#spark-sql

如果您也想保存到凤凰城,只需使用凤凰连接器进行阅读和写作。

通常,混合连接器可能会导致构建冲突,因为它们可能在其内部类中重叠,特别是如果您不关心在后台完全导入使用相同 HBase 客户端的版本。除非您有充分的理由使用不同的库进行阅读和写作,否则请坚持使用最适合您需求的库之一。

最新更新