尝试使用write.save()镶木地板文件时出错



我是pyspark的新手,当我试图把一块简单的镶木地板留作锻炼时遇到了问题。

它不断地给我返回这个错误:

Py4JJavaError:调用o163.save时出错。:ExitCodeException exitCode=1:ChangeFileModeByMask错误(87(:参数不正确

我在jupyter笔记本电脑上,我已经安装了Microsoft Visual Studio C++2010(另一个问题(,但现在我找不到这个问题的答案。

代码很简单:

#load a parquet file
parquet_file = "C:/.../file.parquet"
df = sql.Context.read.load(parquet_file)
df.show()
df2=df.select("name","favorite_color")
df2.show()
dir = "my directory"
df2.write.save(dir + "/" + "namesAndFavColors.parquet")

然后它返回给我错误:

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
~AppDataLocalTemp/ipykernel_13816/2633554599.py in <module>
----> 1 df2.write.save(dir + "/" + "namesAndFavColors.parquet")
C:sparkspark-3.1.2-bin-hadoop3.2pythonpysparksqlreadwriter.py in save(self, path, format, mode, partitionBy, **options)
1107             self._jwrite.save()
1108         else:
-> 1109             self._jwrite.save(path)
1110 
1111     @since(1.4)
C:sparkspark-3.1.2-bin-hadoop3.2pythonlibpy4j-0.10.9-src.zippy4jjava_gateway.py in __call__(self, *args)
1302 
1303         answer = self.gateway_client.send_command(command)
-> 1304         return_value = get_return_value(
1305             answer, self.gateway_client, self.target_id, self.name)
1306 
C:sparkspark-3.1.2-bin-hadoop3.2pythonpysparksqlutils.py in deco(*a, **kw)
109     def deco(*a, **kw):
110         try:
--> 111             return f(*a, **kw)
112         except py4j.protocol.Py4JJavaError as e:
113             converted = convert_exception(e.java_exception)
C:sparkspark-3.1.2-bin-hadoop3.2pythonlibpy4j-0.10.9-src.zippy4jprotocol.py in get_return_value(answer, gateway_client, target_id, name)
324             value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325             if answer[1] == REFERENCE_TYPE:
--> 326                 raise Py4JJavaError(
327                     "An error occurred while calling {0}{1}{2}.n".
328                     format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o163.save.
: ExitCodeException exitCode=1: ChangeFileModeByMask error (87): The parameter is incorrect.

at org.apache.hadoop.util.Shell.runCommand(Shell.java:1008)
at org.apache.hadoop.util.Shell.run(Shell.java:901)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:1213)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:1307)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:1289)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:865)
at org.apache.hadoop.fs.RawLocalFileSystem.mkOneDirWithMode(RawLocalFileSystem.java:547)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:587)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:559)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:586)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:559)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:586)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:559)
at org.apache.hadoop.fs.ChecksumFileSystem.mkdirs(ChecksumFileSystem.java:705)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.setupJob(FileOutputCommitter.java:354)
at org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.setupJob(HadoopMapReduceCommitProtocol.scala:178)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:188)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:131)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:132)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:131)
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438)
at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
at sun.reflect.GeneratedMethodAccessor43.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)

试试这个:

df2.write.parquet(目录+'/'+"namesAndFavColors.parquet"(

相关内容

最新更新