我正在使用" spark-submit -master local -executor-memory 800m target/scala-2.10/finalProject_2.10-1.0.jar"
和我的SBT文件的依赖项:
name := "Projectx"
version := "1.0"
scalaVersion := "2.10.6"
libraryDependencies += "org.apache.spark" %% "spark-core" % "1.6.3"
libraryDependencies += "org.apache.spark" % "spark-sql_2.10" % "1.6.1"
libraryDependencies += "com.databricks" % "spark-csv_2.10" % "1.4.0"
我的代码:
object Projectx {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("AirlineAnalysis")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
sc.setLogLevel("ERROR")
// HDFS
val rawFlights= sc.textFile("hdfs://localhost:9000/Project/2008.csv")
rawFlights.take(5)
val df = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").load("hdfs://localhost:9000/Project/2008.csv")
df.take(5)
df.printSchema()
df.col("Year").cast("int")
val df_1 = df.withColumnRenamed("Year","oldYear")
val df_2 = df_1.withColumn("Year",df_1.col("oldYear").cast("int")).drop("oldYear")
def convertColumn(df: org.apache.spark.sql.DataFrame, name:String, newType:String) = {
val df_1 = df.withColumnRenamed(name, "swap")
df_1.withColumn(name, df_1.col("swap").cast(newType)).drop("swap")
}
val df_3 = convertColumn(df_2, "ArrDelay", "int")
val df_4 = convertColumn(df_2, "DepDelay", "int")
val averageDelays = df_4.groupBy(df_4.col("FlightNum")).agg(avg(df_4.col("ArrDelay")), avg(df_4.col("DepDelay")))
averageDelays.cache()
averageDelays.show()
averageDelays.orderBy("AVG(ArrDelay)").show()
averageDelays.write.format("com.databricks.spark.csv").option("header","true").save("hdfs://localhost:9000/Flight")
sc.stop()
}
}
错误
spark-submit --master local --executor-memory 800m target/scala-2.10/projectx_2.10-1.0.jar
Error: Cannot load main class from JAR file:/home/hdadmin/target/scala-2.10/projectx_2.10-1.0.jar
Run with --help for usage help or --verbose for debug output
请帮助!
尝试添加:
--class "packageName.Projectx"
到运行命令。('packagename'的位置。取决于您的项目结构,可能不需要。)
编辑:为了响应您的第二次错误(在下面的评论中),我相信您在运行命令中也需要这一点:
--packages com.databricks:spark-csv_2.10:1.4.0
这项工作 spark-submit --class "finalproject" --master local --executor-memory 800m target/scala-2.10/finalproject_2.10-1.0.jar
您没有添加 - 类,因此找不到jar中的主类。
./bin/spark-submit
--class <main-class>
--master <master-url>
--deploy-mode <deploy-mode>
--conf <key>=<value>
... # other options
<application-jar>
[application-arguments]
http://spark.apache.org/docs/latest/submitting-applications.html