是否可以在Cygwin上的本地模式下运行Spark作业(如WordCount样本)



我正在执行一个简单的火花代码,用于从本地系统读取文件,但在下面获取错误是Cygwin Console上的代码:

val orders = sc.textFile("C:///DataResearch/retail_db/orders")
orders.first()

执行orders.first()后,我要低于错误:

java.lang.NullPointerException
        at java.lang.ProcessBuilder.start(Unknown Source)
        at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
        at org.apache.hadoop.util.Shell.run(Shell.java:379)
        at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
        at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
        at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
        at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
        at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:567)
        at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.getPermission(RawLocalFileSystem.java:542)
        at org.apache.hadoop.fs.LocatedFileStatus.<init>(LocatedFileStatus.java:42)
        at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1815)
        at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1797)
        at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:233)
        at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270)
        at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:199)
        at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
        at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
        at scala.Option.getOrElse(Option.scala:120)
        at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
        at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
        at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
        at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
        at scala.Option.getOrElse(Option.scala:120)
        at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
        at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1307)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
        at org.apache.spark.rdd.RDD.take(RDD.scala:1302)
        at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1342)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
        at org.apache.spark.rdd.RDD.first(RDD.scala:1341)
        at .<init>(<console>:18)
        at .<clinit>(<console>)
        at .<init>(<console>:7)
        at .<clinit>(<console>)
        at $print(<console>)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
        at java.lang.reflect.Method.invoke(Unknown Source)
        at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:734)
        at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:983)
        at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:573)
        at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:604)
        at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:568)
        at scala.tools.nsc.interpreter.ILoop.reallyInterpret$1(ILoop.scala:760)
        at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:805)
        at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:717)
        at scala.tools.nsc.interpreter.ILoop.processLine$1(ILoop.scala:581)
        at scala.tools.nsc.interpreter.ILoop.innerLoop$1(ILoop.scala:588)
        at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:591)
        at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:882)
        at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:837)
        at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:837)
        at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:136)
        at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:837)
        at scala.tools.nsc.interpreter.ILoop.main(ILoop.scala:904)
        at xsbt.ConsoleInterface.run(ConsoleInterface.scala:62)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
        at java.lang.reflect.Method.invoke(Unknown Source)
        at sbt.compiler.AnalyzingCompiler.call(AnalyzingCompiler.scala:107)
        at sbt.compiler.AnalyzingCompiler.console(AnalyzingCompiler.scala:82)
        at sbt.Console.sbt$Console$$console0$1(Console.scala:22)
        at sbt.Console$$anonfun$apply$2$$anonfun$apply$1.apply$mcV$sp(Console.scala:23)
        at sbt.Console$$anonfun$apply$2$$anonfun$apply$1.apply(Console.scala:23)
        at sbt.Console$$anonfun$apply$2$$anonfun$apply$1.apply(Console.scala:23)
        at sbt.Logger$$anon$4.apply(Logger.scala:84)
        at sbt.TrapExit$App.run(TrapExit.scala:248)
        at java.lang.Thread.run(Unknown Source)

我已经提到了下面的提及,但不确定它是如何工作的:

是否可以在没有Cygwin的Windows上的本地模式下运行Hadoop作业(例如WordCount样本(?

遵循此文档,它说明了如何在Windows下运行Spark

最新更新