当我尝试在Hadoop上执行MR程序时,我收到以下错误。
样本输入日期 :
CA_25-Jan-2014 00:12:345 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 16 04:00:093 -14 05:12:345 35.7 06:19:345 23.1 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 -22.3 13:12:187 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
CA_26-Jan-2014 00:54:245 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 16 04:00:093 -14 05:12:345 55.7 06:19:345 23.1 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
CA_27-Jan-2014 00:14:045 35.7 01:19:345 23.1 02:34:542 -22.3 03:12:187 16 04:00:093 -14 05:12:345 35.7 06:19:345 23.1 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
CA_28-Jan-2014 00:22:315 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 16 04:00:093 -14 05:12:345 35.7 06:19:345 23.1 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 -23.3 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
CA_29-Jan-2014 00:15:345 15.7 01:19:345 23.1 02:34:542 52.9 03:12:187 16 04:00:093 -14 05:12:345 45.0 06:19:345 23.1 07:34:542 -2.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -17 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
NJ_29-Jan-2014 00:15:345 15.7 01:19:345 23.1 02:34:542 52.9 03:12:187 16 04:00:093 -14 05:12:345 45.0 06:19:345 23.1 07:34:542 -2.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -17 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
CA_30-Jan-2014 00:22:445 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 56 04:00:093 -14 05:12:345 35.7 06:19:345 39.6 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 -15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
CA_31-Jan-2014 00:42:245 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 16 04:00:093 -14 05:12:345 49.2 06:19:345 23.1 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -27
NY_29-Jan-2014 00:15:345 15.7 01:19:345 23.1 02:34:542 52.9 03:12:187 16 04:00:093 -14 05:12:345 45.0 06:19:345 23.1 07:34:542 -2.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -17 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
NY_30-Jan-2014 00:22:445 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 56 04:00:093 -14 05:12:345 35.7 06:19:345 39.6 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 -15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
NY_31-Jan-2014 00:42:245 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 16 04:00:093 -14 05:12:345 49.2 06:19:345 23.1 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -27
NJ_30-Jan-2014 00:22:445 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 56 04:00:093 -14 05:12:345 35.7 06:19:345 39.6 07:34:542 12.3 08:12:187 16 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 12.3 13:12:187 16 14:00:093 -7 15:12:345 -15.7 16:19:345 23.1 19:34:542 12.3 20:12:187 16 22:00:093 -7
期望输出:
日期最高温度
我想显示当天的日期和最高温度。
法典:
``import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxTemp {
public static class TempMapper extends Mapper<LongWritable,Text,Text,FloatWritable>{
String date;
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
String line=value.toString();
date=line.substring(3,14);
String items[]=line.split("t");
for(int i=2;i<items.length;i=i+2){
Float temp=Float.parseFloat(items[i]);
context.write(new Text(date),new FloatWritable(temp));
}
}
}
public static class TempReducer extends Reducer<Text,FloatWritable,Text,FloatWritable>{
public void reduce(Text key,Iterable<FloatWritable> values,Context context) throws IOException, InterruptedException{
Float maxvalue=Float.MIN_VALUE;
for(FloatWritable value:values){
maxvalue=Math.max(maxvalue,value.get());
}
context.write(key,new FloatWritable(maxvalue));
}
}
public static void main(String args[]) throws Exception{
Configuration conf=new Configuration();
Job job=new Job(conf,"Temp job");
job.setJarByClass(MaxTemp.class);
job.setMapperClass(TempMapper.class);
job.setReducerClass(TempReducer.class);
//job.setCombinerClass(TempReducer.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
错误:
16/04/23 05:25:07 INFO mapred.JobClient: Task Id : attempt_201605162225_0029_m_000000_1, Status : FAILED
java.io.IOException: Type mismatch in value from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.FloatWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:876)
at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:574)
at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
at MaxTemp$TempMapper.map(MaxTemp.java:29)
at MaxTemp$TempMapper.map(MaxTemp.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:647)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:323)
at org.apache.hadoop.mapred.Child$4.run(Child.java:266)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1278)
at org.apache.hadoop.mapred.Child.main(Child.java:260)
将 setOutputValueClass 设置为 FloatWriable 而不是 Text。
job.setOutputValueClass(FloatWritable.class);
还要在作业配置中添加 MapOutputKeyClass 和 MapOutputValueClass
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FloatWritable.class);