如何在Docker中运行Hadoop DFSIO MapReduce基准测试?



你能帮我使用TestDFSIO运行这个HDFS基准测试吗?

我创建了一个包含 4 个实时数据节点和 1 个检查点节点的 Hadoop集群,并尝试在 Docker 名称节点容器中使用以下命令写入 10 个文件(每个 100 MB(:

yarn jar $MAPRED_EXAMPLES/hadoop-mapreduce-client-jobclient-*-tests.jar TestDFSIO -write -nrFiles 10 -fileSize 100

但是,以下错误循环不断出现:

2020-06-15 00:05:01,756 INFO fs.TestDFSIO: TestDFSIO.1.8
2020-06-15 00:05:01,765 INFO fs.TestDFSIO: nrFiles = 10
2020-06-15 00:05:01,765 INFO fs.TestDFSIO: nrBytes (MB) = 100.0
2020-06-15 00:05:01,766 INFO fs.TestDFSIO: bufferSize = 1000000
2020-06-15 00:05:01,766 INFO fs.TestDFSIO: baseDir = /benchmarks/TestDFSIO
2020-06-15 00:05:04,924 INFO fs.TestDFSIO: creating control file: 104857600 bytes, 10 files
org.apache.hadoop.hdfs.server.namenode.SafeModeException: Cannot delete /benchmarks/TestDFSIO/io_control. Name node is in safe mode.
The reported blocks 78 has reached the threshold 0,9990 of total blocks 78. The minimum number of live datanodes is not required. In safe mode extension. Safe mode will be turned off automatically in 1 seconds. NamenodeHostName:namenode
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.newSafemodeException(FSNamesystem.java:1476)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkNameNodeSafeMode(FSNamesystem.java:1463)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.delete(FSNamesystem.java:3084)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.delete(NameNodeRpcServer.java:1114)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.delete(ClientNamenodeProtocolServerSideTranslatorPB.java:705)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:528)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:999)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:927)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2915)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
at org.apache.hadoop.hdfs.DFSClient.delete(DFSClient.java:1609)
at org.apache.hadoop.hdfs.DistributedFileSystem$19.doCall(DistributedFileSystem.java:946)
at org.apache.hadoop.hdfs.DistributedFileSystem$19.doCall(DistributedFileSystem.java:943)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.delete(DistributedFileSystem.java:953)
at org.apache.hadoop.fs.TestDFSIO.createControlFile(TestDFSIO.java:309)
at org.apache.hadoop.fs.TestDFSIO.run(TestDFSIO.java:868)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:90)
at org.apache.hadoop.fs.TestDFSIO.main(TestDFSIO.java:743)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71)
at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144)
at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:139)
at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:147)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:323)
at org.apache.hadoop.util.RunJar.main(RunJar.java:236)
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.hdfs.server.namenode.SafeModeException): Cannot delete /benchmarks/TestDFSIO/io_control. Name node is in safe mode.
The reported blocks 78 has reached the threshold 0,9990 of total blocks 78. The minimum number of live datanodes is not required. In safe mode extension. Safe mode will be turned off automatically in 1 seconds. NamenodeHostName:namenode
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.newSafemodeException(FSNamesystem.java:1476)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkNameNodeSafeMode(FSNamesystem.java:1463)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.delete(FSNamesystem.java:3084)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.delete(NameNodeRpcServer.java:1114)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.delete(ClientNamenodeProtocolServerSideTranslatorPB.java:705)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:528)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:999)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:927)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2915)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1545)
at org.apache.hadoop.ipc.Client.call(Client.java:1491)
at org.apache.hadoop.ipc.Client.call(Client.java:1388)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)
at com.sun.proxy.$Proxy9.delete(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.delete(ClientNamenodeProtocolTranslatorPB.java:641)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
at com.sun.proxy.$Proxy10.delete(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.delete(DFSClient.java:1607)
... 23 more
hdadmin@namenode:~/hadoop/etc/hadoop$ hdfs --daemon stop namenode^[[A^[[Dhdadmin@namenode:~/hadoop/etc/yarn --daemon stop resourcemanagerhdadmin@namenode:~/hadoop/etc/hadoop$ hdfs --daemon start namenodehdadmin@namenode:~/hadoop/etc/hadoop$ yarn --daemon stop resourcemanager
hdadmin@namenode:~/hadoop/etc/hadoop$ yarn jar $MAPRED_EXAMPLES/hadoop-mapreduce-client-jobclient-*-tests.jar TestDFSIO -write -nrFiles 10 -fileSize 100
2020-06-15 00:06:29,250 INFO fs.TestDFSIO: TestDFSIO.1.8
2020-06-15 00:06:29,261 INFO fs.TestDFSIO: nrFiles = 10
2020-06-15 00:06:29,261 INFO fs.TestDFSIO: nrBytes (MB) = 100.0
2020-06-15 00:06:29,262 INFO fs.TestDFSIO: bufferSize = 1000000
2020-06-15 00:06:29,262 INFO fs.TestDFSIO: baseDir = /benchmarks/TestDFSIO
2020-06-15 00:06:31,091 INFO fs.TestDFSIO: creating control file: 104857600 bytes, 10 files
org.apache.hadoop.hdfs.server.namenode.SafeModeException: Cannot delete /benchmarks/TestDFSIO/io_control. Name node is in safe mode.
The reported blocks 78 has reached the threshold 0,9990 of total blocks 78. The minimum number of live datanodes is not required. In safe mode extension. Safe mode will be turned off automatically in 22 seconds. NamenodeHostName:namenode
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.newSafemodeException(FSNamesystem.java:1476)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkNameNodeSafeMode(FSNamesystem.java:1463)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.delete(FSNamesystem.java:3084)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.delete(NameNodeRpcServer.java:1114)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.delete(ClientNamenodeProtocolServerSideTranslatorPB.java:705)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:528)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:999)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:927)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2915)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
at org.apache.hadoop.hdfs.DFSClient.delete(DFSClient.java:1609)
at org.apache.hadoop.hdfs.DistributedFileSystem$19.doCall(DistributedFileSystem.java:946)
at org.apache.hadoop.hdfs.DistributedFileSystem$19.doCall(DistributedFileSystem.java:943)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.delete(DistributedFileSystem.java:953)
at org.apache.hadoop.fs.TestDFSIO.createControlFile(TestDFSIO.java:309)
at org.apache.hadoop.fs.TestDFSIO.run(TestDFSIO.java:868)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:90)
at org.apache.hadoop.fs.TestDFSIO.main(TestDFSIO.java:743)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71)
at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144)
at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:139)
at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:147)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:323)
at org.apache.hadoop.util.RunJar.main(RunJar.java:236)
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.hdfs.server.namenode.SafeModeException): Cannot delete /benchmarks/TestDFSIO/io_control. Name node is in safe mode.
The reported blocks 78 has reached the threshold 0,9990 of total blocks 78. The minimum number of live datanodes is not required. In safe mode extension. Safe mode will be turned off automatically in 22 seconds. NamenodeHostName:namenode
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.newSafemodeException(FSNamesystem.java:1476)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkNameNodeSafeMode(FSNamesystem.java:1463)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.delete(FSNamesystem.java:3084)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.delete(NameNodeRpcServer.java:1114)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.delete(ClientNamenodeProtocolServerSideTranslatorPB.java:705)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:528)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:999)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:927)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2915)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1545)
at org.apache.hadoop.ipc.Client.call(Client.java:1491)
at org.apache.hadoop.ipc.Client.call(Client.java:1388)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)
at com.sun.proxy.$Proxy9.delete(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.delete(ClientNamenodeProtocolTranslatorPB.java:641)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
at com.sun.proxy.$Proxy10.delete(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.delete(DFSClient.java:1607)
... 23 more
hdadmin@namenode:~/hadoop/etc/hadoop$ hdfs --daemon start namenode
namenode is running as process 10517.  Stop it first.
hdadmin@namenode:~/hadoop/etc/hadoop$ hdfs --daemon stop namenode
hdadmin@namenode:~/hadoop/etc/hadoop$ yarn --daemon stop resourcemanager
hdadmin@namenode:~/hadoop/etc/hadoop$ hdfs --daemon start namenode
hdadmin@namenode:~/hadoop/etc/hadoop$ yarn --daemon start resourcemanager
hdadmin@namenode:~/hadoop/etc/hadoop$ export MAPRED_EXAMPLES=$HADOOP_HOME/share/hadoop/mapreduce
hdadmin@namenode:~/hadoop/etc/hadoop$ yarn jar $MAPRED_EXAMPLES/hadoop-mapreduce-client-jobclient-*-tests.jar TestDFSIO -write -nrFiles 10 -fileSize 100
2020-06-15 00:14:38,119 INFO fs.TestDFSIO: TestDFSIO.1.8
2020-06-15 00:14:38,123 INFO fs.TestDFSIO: nrFiles = 10
2020-06-15 00:14:38,124 INFO fs.TestDFSIO: nrBytes (MB) = 100.0
2020-06-15 00:14:38,124 INFO fs.TestDFSIO: bufferSize = 1000000
2020-06-15 00:14:38,124 INFO fs.TestDFSIO: baseDir = /benchmarks/TestDFSIO
2020-06-15 00:14:40,975 INFO fs.TestDFSIO: creating control file: 104857600 bytes, 10 files
2020-06-15 00:14:41,996 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:45,521 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:45,700 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:46,387 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:46,571 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:46,807 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:47,606 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:47,763 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:47,955 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:48,052 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:48,189 INFO fs.TestDFSIO: created control files for: 10 files
2020-06-15 00:14:49,015 INFO client.RMProxy: Connecting to ResourceManager at resourcemanager/172.18.0.7:8032
2020-06-15 00:14:50,046 INFO client.AHSProxy: Connecting to Application History server at timelineserver/172.18.0.2:10200
2020-06-15 00:14:50,165 INFO client.RMProxy: Connecting to ResourceManager at resourcemanager/172.18.0.7:8032
2020-06-15 00:14:50,166 INFO client.AHSProxy: Connecting to Application History server at timelineserver/172.18.0.2:10200
2020-06-15 00:14:50,750 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/hdadmin/.staging/job_1592172809365_0001
2020-06-15 00:14:50,996 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:51,419 INFO mapred.FileInputFormat: Total input files to process : 10
2020-06-15 00:14:51,547 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:51,750 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:51,805 INFO mapreduce.JobSubmitter: number of splits:10
2020-06-15 00:14:52,969 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-06-15 00:14:53,150 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1592172809365_0001
2020-06-15 00:14:53,151 INFO mapreduce.JobSubmitter: Executing with tokens: []
2020-06-15 00:14:53,679 INFO conf.Configuration: resource-types.xml not found
2020-06-15 00:14:53,680 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
2020-06-15 00:14:55,014 INFO impl.YarnClientImpl: Submitted application application_1592172809365_0001
2020-06-15 00:14:55,398 INFO mapreduce.Job: The url to track the job: http://resourcemanager:8088/proxy/application_1592172809365_0001/
2020-06-15 00:14:55,421 INFO mapreduce.Job: Running job: job_1592172809365_0001
2020-06-15 00:18:23,317 INFO mapreduce.Job: Job job_1592172809365_0001 running in uber mode : false
2020-06-15 00:18:23,492 INFO mapreduce.Job:  map 0% reduce 0%
2020-06-15 00:30:16,725 INFO mapreduce.Job: Task Id : attempt_1592172809365_0001_m_000001_0, Status : FAILED
AttemptID:attempt_1592172809365_0001_m_000001_0 Timed out after 600 secs
2020-06-15 00:30:16,759 INFO mapreduce.Job: Task Id : attempt_1592172809365_0001_m_000000_0, Status : FAILED
AttemptID:attempt_1592172809365_0001_m_000000_0 Timed out after 600 secs
2020-06-15 00:48:48,482 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:49,488 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 1 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:50,489 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 2 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:51,493 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 3 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:52,496 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 4 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:53,500 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 5 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:54,503 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 6 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:55,505 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 7 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:56,509 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 8 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:57,511 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032. Already tried 9 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-06-15 00:48:58,359 INFO retry.RetryInvocationHandler: java.net.ConnectException: Call From namenode/172.18.0.7 to resourcemanager:8032 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused, while invoking ApplicationClientProtocolPBClientImpl.getApplicationReport over null. Trying to failover immediately.
2020-06-15 00:48:59,361 INFO ipc.Client: Retrying connect to server: resourcemanager/172.18.0.7:8032.
java.io.IOException: Call From namenode/172.18.0.7 to resourcemanager:8032 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:359)
at org.apache.hadoop.mapred.ClientServiceDelegate.getTaskCompletionEvents(ClientServiceDelegate.java:398)
at org.apache.hadoop.mapred.YARNRunner.getTaskCompletionEvents(YARNRunner.java:878)
at org.apache.hadoop.mapreduce.Job$6.run(Job.java:732)
at org.apache.hadoop.mapreduce.Job$6.run(Job.java:729)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.mapreduce.Job.getTaskCompletionEvents(Job.java:729)
at org.apache.hadoop.mapreduce.Job.monitorAndPrintJob(Job.java:1652)
at org.apache.hadoop.mapred.JobClient$NetworkedJob.monitorAndPrintJob(JobClient.java:412)
at org.apache.hadoop.mapred.JobClient.monitorAndPrintJob(JobClient.java:895)
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:875)
at org.apache.hadoop.fs.TestDFSIO.runIOTest(TestDFSIO.java:476)
at org.apache.hadoop.fs.TestDFSIO.writeTest(TestDFSIO.java:455)
at org.apache.hadoop.fs.TestDFSIO.run(TestDFSIO.java:872)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:90)
at org.apache.hadoop.fs.TestDFSIO.main(TestDFSIO.java:743)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71)
at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144)
at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:139)
at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:147)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:323)
at org.apache.hadoop.util.RunJar.main(RunJar.java:236)

值得一提的是,文件大小为 10 MB 的相同作业成功完成。

我应该怎么做才能成功运行此作业?

谢谢!

除了 Namenode 和数据节点之外,还需要在群集上设置资源管理器和节点管理器才能运行 YARN 应用程序,根据异常情况,这些应用程序会丢失。

您可以参考此 (https://github.com/big-data-europe/docker-hadoop( 项目以获取如何在 Docker 中设置 Hadoop 集群的示例。

最新更新