最近amazon更改了EMR作业的策略以定义IAM角色。我得到了最新的awssdk.dll并升级了代码。我创建了所需的策略,并使我的旧pig作业成功运行。
我的Ruby作业不工作。我一直得到没有找到映射器文件的错误,即使它在S3上。我已经验证了mapper文件对所有人都是公开读取的,并且路径是正确的/存在的。
任何帮助都是感激的。
Caused by: java.lang.RuntimeException: configuration exception
at org.apache.hadoop.streaming.PipeMapRed.configure(PipeMapRed.java:232)
at org.apache.hadoop.streaming.PipeMapper.configure(PipeMapper.java:66)
... 22 more
Caused by: java.io.IOException: Cannot run program "s3://er-hadoop/Scripts/Audit/Debugging/UserId_Picker/WebPageAudit_GetDataForSpecifiedUsers_Mapper.rb": error=2, No such file or directory
StreamingStep stepConfig = new StreamingStep
{
Inputs = new List<string>
{
"s3://er-upload-filestore-east/WebPageAudit/2015/04/01/*.gz",
},
Output = "s3://er-hadoop/Output/Mangal/testawschanges",
Mapper = "s3://er-hadoop/Scripts/Audit/Debugging/UserId_Picker/WebPageAudit_GetDataForSpecifiedUsers_Mapper.rb",
Reducer = "s3://er-hadoop/Scripts/Audit/Debugging/UserId_Picker/WebPageAudit_GetDataForSpecifiedUsers_Reducer.rb",
};
stepConfig.AddHadoopConfig("-jobconf", "mapred.output.compress=true");
HadoopJarStepConfig config = stepConfig.ToHadoopJarStepConfig();
const string mapper_reducer_Path = "FULL S3 Path ";
const string mapperFileName = "YourMapper.rb";
const string reducerFileName = "YourReducer.rb";
string mapperFilePath = string.Format("{0}/{1}", mapper_reducer_Path, mapperFileName);
string reducerFilePath = string.Format("{0}/{1}", mapper_reducer_Path, reducerFileName);
StreamingStep stepConfig = new StreamingStep
{
Inputs = new List<string>
{
"YOURInput",
},
Output = "YOUrOUTPUT",
Mapper = mapperFileName,
Reducer = reducerFileName,
};
HadoopJarStepConfig config = stepConfig.ToHadoopJarStepConfig();
config.Args.Insert(0, "-files");
config.Args.Insert(1, string.Format("{0},{1}", mapperFilePath, reducerFilePath));