Scala Spark Bigquery连接器问题-内部异常:grpc.StatusRuntimeException



当我在本地运行Spark Scala代码时,我得到了以下错误:

Exception in thread "main" com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.InternalException: com.google.cloud.spark.bigquery.repackaged.io.grpc.StatusRuntimeException: INTERNAL: request failed: internal error
at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.ApiExceptionFactory.createException(ApiExceptionFactory.java:67)
at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.grpc.GrpcApiExceptionFactory.create(GrpcApiExceptionFactory.java:72)
at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.grpc.GrpcApiExceptionFactory.create(GrpcApiExceptionFactory.java:60)
at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.grpc.GrpcExceptionCallable$ExceptionTransformingFuture.onFailure(GrpcExceptionCallable.java:97)
at com.google.cloud.spark.bigquery.repackaged.com.google.api.core.ApiFutures$1.onFailure(ApiFutures.java:68)
at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.Futures$CallbackListener.run(Futures.java:1072)
at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.DirectExecutor.execute(DirectExecutor.java:30)
at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.AbstractFuture.executeListener(AbstractFuture.java:1164)
at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.AbstractFuture.complete(AbstractFuture.java:958)
at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.AbstractFuture.setException(AbstractFuture.java:749)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.stub.ClientCalls$GrpcFuture.setException(ClientCalls.java:522)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.stub.ClientCalls$UnaryStreamToFuture.onClose(ClientCalls.java:497)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl.closeObserver(ClientCallImpl.java:426)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl.access$500(ClientCallImpl.java:66)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.close(ClientCallImpl.java:689)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.access$900(ClientCallImpl.java:577)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInternal(ClientCallImpl.java:751)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInContext(ClientCallImpl.java:740)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Suppressed: com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.AsyncTaskException: Asynchronous task failed
at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.ApiExceptions.callAndTranslateApiException(ApiExceptions.java:57)
at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.UnaryCallable.call(UnaryCallable.java:112)
at com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageClient.createReadSession(BigQueryStorageClient.java:237)
at com.google.cloud.spark.bigquery.direct.DirectBigQueryRelation.buildScan(DirectBigQueryRelation.scala:139)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.$anonfun$apply$2(DataSourceStrategy.scala:293)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.$anonfun$pruneFilterProject$1(DataSourceStrategy.scala:326)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.pruneFilterProjectRaw(DataSourceStrategy.scala:381)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.pruneFilterProject(DataSourceStrategy.scala:325)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.apply(DataSourceStrategy.scala:293)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:480)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:485)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
at scala.collection.TraversableOnce.$anonfun$foldLeft$1(TraversableOnce.scala:156)
at scala.collection.TraversableOnce.$anonfun$foldLeft$1$adapted(TraversableOnce.scala:156)
at scala.collection.Iterator.foreach(Iterator.scala:937)
at scala.collection.Iterator.foreach$(Iterator.scala:937)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:156)
at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:154)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1425)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:480)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:486)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
at org.apache.spark.sql.execution.CacheManager.$anonfun$cacheQuery$1(CacheManager.scala:100)
at org.apache.spark.sql.execution.CacheManager.writeLock(CacheManager.scala:67)
at org.apache.spark.sql.execution.CacheManager.cacheQuery(CacheManager.scala:91)
at org.apache.spark.sql.Dataset.persist(Dataset.scala:2963)
at org.apache.spark.sql.Dataset.cache(Dataset.scala:2973)
at Transform$.main(Transform.scala:22)
at Transform.main(Transform.scala)
Caused by: com.google.cloud.spark.bigquery.repackaged.io.grpc.StatusRuntimeException: INTERNAL: request failed: internal error
at com.google.cloud.spark.bigquery.repackaged.io.grpc.Status.asRuntimeException(Status.java:533)
... 16 more

下面是我的Maven依赖

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>2.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.cloud.spark/spark-bigquery -->
<dependency>
<groupId>com.google.cloud.spark</groupId>
<artifactId>spark-bigquery-with-dependencies_2.12</artifactId>
<version>0.14.0-beta</version>
</dependency>

我正在[此处]运行Scala示例代码(https://cloud.google.com/dataproc/docs/tutorials/bigquery-connector-spark-example)。

请将表引用更新为bigquery-public-data.samples.shakespeare。BigQuery的公共数据集的旧引用publicdata似乎不再有效。

最新更新