我试图通过使用spark-cassandra-connector
从cassandra获得数据帧,但低于异常。
注意:连接cassandra成功。
Spark版本:2.4.1
Spark -cassandra-connector版本:2.5.1
Error starting ApplicationContext. To display the conditions report re-run your application with 'debug' enabled.
2021-10-01 11:32:01.649 ERROR 17404 --- [ main] o.s.boot.SpringApplication : Application run failed
java.lang.InstantiationError: com.datastax.oss.driver.internal.core.util.collection.QueryPlan
at com.datastax.spark.connector.cql.LocalNodeFirstLoadBalancingPolicy.newQueryPlan(LocalNodeFirstLoadBalancingPolicy.scala:122) ~[spark-cassandra-connector-driver_2.11-2.5.1.jar:2.5.1]
at com.datastax.oss.driver.internal.core.metadata.LoadBalancingPolicyWrapper.newQueryPlan(LoadBalancingPolicyWrapper.java:155) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.internal.core.cql.CqlRequestHandler.onThrottleReady(CqlRequestHandler.java:193) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.internal.core.session.throttling.PassThroughRequestThrottler.register(PassThroughRequestThrottler.java:52) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.internal.core.cql.CqlRequestHandler.(CqlRequestHandler.java:171) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.internal.core.cql.CqlRequestAsyncProcessor.process(CqlRequestAsyncProcessor.java:44) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.internal.core.cql.CqlRequestSyncProcessor.process(CqlRequestSyncProcessor.java:54) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.internal.core.cql.CqlRequestSyncProcessor.process(CqlRequestSyncProcessor.java:30) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.internal.core.session.DefaultSession.execute(DefaultSession.java:230) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.api.core.cql.SyncCqlSession.execute(SyncCqlSession.java:54) ~[java-driver-core-shaded-4.11.3.jar:na]
at com.datastax.oss.driver.api.core.cql.SyncCqlSession.execute(SyncCqlSession.java:78) ~[java-driver-core-shaded-4.11.3.jar:na]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_271]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[na:1.8.0_271]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_271]
at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_271]
at com.datastax.spark.connector.cql.SessionProxy.invoke(SessionProxy.scala:43) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.sun.proxy.$Proxy81.execute(Unknown Source) ~[na:na]
at com.datastax.spark.connector.rdd.partitioner.dht.TokenFactory$$anonfun$1.apply(TokenFactory.scala:99) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.partitioner.dht.TokenFactory$$anonfun$1.apply(TokenFactory.scala:98) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:112) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$withSessionDo$1.apply(CassandraConnector.scala:111) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.cql.CassandraConnector.closeResourceAfterUse(CassandraConnector.scala:129) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1] at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:111) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.partitioner.dht.TokenFactory$.forSystemLocalPartitioner(TokenFactory.scala:98) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.partitioner.SplitSizeEstimator$class.tokenFactory(SplitSizeEstimator.scala:9) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tokenFactory$lzycompute(CassandraTableScanRDD.scala:64) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tokenFactory(CassandraTableScanRDD.scala:64) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.partitioner.SplitSizeEstimator$class.estimateDataSize(SplitSizeEstimator.scala:12) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.partitioner.SplitSizeEstimator$class.estimateSplitCount(SplitSizeEstimator.scala:21) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.estimateSplitCount(CassandraTableScanRDD.scala:64) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun$1.apply$mcI$sp(CassandraTableScanRDD.scala:228) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun$1.apply(CassandraTableScanRDD.scala:228) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD$$anonfun$1.apply(CassandraTableScanRDD.scala:228) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at scala.Option.getOrElse(Option.scala:121) ~[scala-library-2.11.12.jar:na]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.partitionGenerator$lzycompute(CassandraTableScanRDD.scala:228) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.partitionGenerator(CassandraTableScanRDD.scala:224) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.getPartitions(CassandraTableScanRDD.scala:273) ~[spark-cassandra-connector_2.11-2.5.1.jar:2.5.1]
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) ~[spark-core_2.11-2.4.1.jar:2.4.1]
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) ~[spark-core_2.11-2.4.1.jar:2.4.1]
at scala.Option.getOrElse(Option.scala:121) ~[scala-library-2.11.12.jar:na]
at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) ~[spark-core_2.11-2.4.1.jar:2.4.1]
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126) ~[spark-core_2.11-2.4.1.jar:2.4.1]
at org.apache.spark.rdd.RDD.count(RDD.scala:1168) ~[spark-core_2.11-2.4.1.jar:2.4.1]
at org.apache.spark.api.java.JavaRDDLike$class.count(JavaRDDLike.scala:455) ~[spark-core_2.11-2.4.1.jar:2.4.1]
at org.apache.spark.api.java.AbstractJavaRDDLike.count(JavaRDDLike.scala:45) ~[spark-core_2.11-2.4.1.jar:2.4.1]
您发布的错误表明嵌入式Java驱动程序无法生成要作为协调器连接到的Cassandra节点的查询计划列表。您如何定义接触点可能存在问题。
通常需要使用cassandra.connection.host
参数指定接触点。下面是如何使用连接器启动Spark shell的示例:
$ spark-shell
--packages com.datastax.spark:spark-cassandra-connector_2.11:2.5.1
--conf spark.cassandra.connection.host=cassandra_ip
--conf spark.sql.extensions=com.datastax.spark.connector.CassandraSparkExtensions
在你的情况下,看起来你是从Spring Boot创建一个连接,你可能会遇到依赖冲突。
你需要用你的配置细节更新你的原始问题,包括依赖关系的细节以及你正在运行的连接到Spark的命令,这样回答你问题的人就能更好地了解问题是什么。干杯!