Snappy Data Missing Dependencies for Spark class org/apache/



>我从github下载了活泼的数据poc 活泼的 POC

我正在尝试构建该项目。但似乎火花版本存在依赖问题。当我使用gradlew generateAdImpressions构建文件时出现错误

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/sql/types/DataType
at io.snappydata.adanalytics.KafkaAdImpressionProducer$.<init>(KafkaAdImpressionProducer.scala:37)
at io.snappydata.adanalytics.KafkaAdImpressionProducer$.<clinit>(KafkaAdImpressionProducer.scala)
at io.snappydata.adanalytics.KafkaAdImpressionProducer.main(KafkaAdImpressionProducer.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.types.DataType
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 3 more
:generateAdImpressions FAILED

这是因为我认为该项目使用的是 spark_2.11:2.1.1,并且在 2.1.0 版中删除了此类

这是 gradle 文件

plugins {
id 'java'
id 'com.github.johnrengelman.shadow' version '1.2.3'
id 'com.commercehub.gradle.plugin.avro' version "0.5.0"
}
archivesBaseName = 'snappy-poc'
allprojects {
version = '1.0.0'
repositories {
mavenCentral()
maven { url "https://oss.sonatype.org/content/groups/public" }
maven { url "https://oss.sonatype.org/content/repositories/snapshots" }
maven { url "http://repository.snappydata.io/repository/internal" }
maven { url "http://repository.snappydata.io/repository/snapshots" }
maven { url "http://mvnrepository.com/artifact" }
maven { url 'https://clojars.org/repo' }
}
apply plugin: 'java'
apply plugin: 'maven'
apply plugin: 'scala'
apply plugin: 'idea'
apply plugin: 'eclipse'
apply plugin: "com.commercehub.gradle.plugin.avro"
ext {
sparkVersion = '2.1.1'
snappyVersion = '1.0.0'
}
configurations.all {
resolutionStrategy.cacheChangingModulesFor 4, 'hours'
}
dependencies {
compile 'org.scala-lang:scala-library:2.11.6'
compile 'org.scala-lang:scala-reflect:2.11.6'
compile 'org.scala-lang:scala-compiler:2.11.6'
}
}
dependencies {
compileOnly "io.snappydata:snappydata-core_2.11:${snappyVersion}"
compileOnly "io.snappydata:snappydata-cluster_2.11:${snappyVersion}"
compileOnly "io.snappydata:snappy-spark-core_2.11:${sparkVersion}"
compileOnly "io.snappydata:snappy-spark-catalyst_2.11:${sparkVersion}"
compileOnly "io.snappydata:snappy-spark-sql_2.11:${sparkVersion}"
// compileOnly "io.snappydata:snappydata-aqp_2.11:${snappyVersion}"
compile 'com.miguno:kafka-avro-codec_2.10:0.1.1-SNAPSHOT'
compile 'org.apache.kafka:kafka_2.11:0.8.2.1'
compile 'com.twitter:algebird-core_2.10:0.1.11'
compile 'com.googlecode.javaewah:JavaEWAH:1.1.5'
compile 'org.joda:joda-convert:1.2'
compile 'com.opencsv:opencsv:3.3'
}
task generateAvro(type: com.commercehub.gradle.plugin.avro.GenerateAvroJavaTask) {
source("src/avro")
outputDir = file("src/main/java")
}
compileJava.source(generateAvro.outputs)
avro.stringType = "charSequence"

ext {
assemblyJar = rootProject.tasks.getByPath(':assembly:shadowJar').outputs
}
def assemblyJar = tasks.getByPath(':assembly:shadowJar').outputs
task generateAdImpressions(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.adanalytics.KafkaAdImpressionProducer'
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task aggeregateAdImpressions_API(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.adanalytics.SnappyAPILogAggregator'
jvmArgs = ['-XX:MaxPermSize=512m']
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task aggeregateAdImpressions_SQL(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.adanalytics.SnappySQLLogAggregator'
jvmArgs = ['-XX:MaxPermSize=512m']
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task generateAdImpressions_Socket(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.SocketAdImpressionGenerator'
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
maxHeapSize = "8196m"
}
task startSnappyIngestionPerf_Socket(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.SocketSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task startSnappyIngestionPerf_CustomReceiver(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.CustomReceiverSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task startSnappyIngestionPerf_CSV(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.CSVSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task startSnappyIngestionPerf_Kafka(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.KafkaSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task product(type: Exec) {
dependsOn ':assembly:shadowJar'
def productDir = "${rootProject.buildDir}/snappydata-poc"
def snappyData = System.env.SNAPPYDATA
if (snappyData == null || snappyData.length() == 0) {
snappyData = "${projectDir}/../snappydata"
}
doFirst {
delete productDir
file("${productDir}/lib").mkdirs()
}
// first execute the snappydata "product" target based on env var SNAPPYDATA
workingDir snappyData
commandLine './gradlew', 'copyProduct', "-PcopyToDir=${productDir}"
// lastly copy own assembly fat jar in product lib dir
doLast {
copy {
from assemblyJar
into "${productDir}/lib"
}
}
}

Spark 2.1.1 和Spark 2.1.0 有许多相互冲突的依赖项和缺失的分支,解决它们很麻烦。我抬头一看,看起来类 DataType 在 2.1.0 中可用。

我是gradle的新手,有人可以帮我如何有选择地添加这个包含旧文件的jar,并仍然为其他人保留2.1.1。

提前非常感谢。

snappy-poc使用Spark 2.1.0,工作正常。所以你想升级到2.1.1?我用 2.1.1.1 快速测试了它,它对我来说效果很好。也许您应该尝试在build.gradle文件中将Spark版本更改为2.1.1.1

相关内容

最新更新