使用scala API,如何将一个HDFS位置的所有文件复制到另一个HDF位置



使用scala,我想将srcFilePath中的所有文件复制到destFilePath,但以下代码引发错误有人能帮我修复这个错误和复制文件的解决方案吗

scala> val srcFilePath = "/development/staging/b8baf3f4-abce-11eb-8592-0242ac110032/"
srcFilePath: String = /development/staging/b8baf3f4-abce-11eb-8592-0242ac110032/
scala> val destFilePath = "/development/staging/dest_b8baf3f4-abce-11eb-8592-0242ac110032/"
destFilePath: String = /development/staging/dest_b8baf3f4-abce-11eb-8592-0242ac110032/
scala> val hadoopConf = new Configuration()
hadoopConf: org.apache.hadoop.conf.Configuration = Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml
scala> val hdfs = FileSystem.get(hadoopConf)
hdfs: org.apache.hadoop.fs.FileSystem = DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_-1792011619_1, ugi=be9dusr@INTERNAL.IMSGLOBAL.COM (auth:KERBEROS)]]
scala>
scala> val srcPath = new Path(srcFilePath)
srcPath: org.apache.hadoop.fs.Path = /development/staging/b8baf3f4-abce-11eb-8592-0242ac110032
scala> val destPath = new Path(destFilePath)
destPath: org.apache.hadoop.fs.Path = /development/staging/dest_b8baf3f4-abce-11eb-8592-0242ac110032
scala>
scala> hdfs.copy(srcPath, destPath)
<console>:52: error: value move is not a member of org.apache.hadoop.fs.FileSystem
hdfs.copy(srcPath, destPath)

您可能想看看这个SO post 的答案

Try Hadoop's FileUtil.copy() command, as described here: https://hadoop.apache.org/docs/r2.8.5/api/org/apache/hadoop/fs/FileUtil.html#copy(org.apache.hadoop.fs.FileSystem,%20org.apache.hadoop.fs.Path,%20org.apache.hadoop.fs.FileSystem,%20org.apache.hadoop.fs.Path,%20boolean,%20org.apache.hadoop.conf.Configuration)
val conf = new org.apache.hadoop.conf.Configuration()
val srcPath = new org.apache.hadoop.fs.Path("hdfs://my/src/path")
val dstPath = new org.apache.hadoop.fs.Path("hdfs://my/dst/path")
org.apache.hadoop.fs.FileUtil.copy(
srcPath.getFileSystem(conf), 
srcPath, 
dstPath.getFileSystem(conf), 
dstPath, 
true, 
conf
)

最新更新