mapReduce得到所需的输出



请告诉我如何得到我想要的输出

当前输出给定:

阿尔巴尼亚3607 ++国家人口

阿尔巴尼亚418495 ++ Country maxPopulation

期望输出值

乡村城市minPopulation

乡村城市maxPopulation

减速机类:

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Handson3Reducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values,  Context context)  throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
int minValue = Integer.MAX_VALUE;
String line = key.toString();
String field[] = line.split(",");
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
minValue = Math.min(minValue, value.get());
}
context.write(key, new IntWritable(minValue));
context.write(key, new IntWritable(maxValue));
}
}

Mapper类:

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class handson3Mapper extends  Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;

@Override

public void map(LongWritable key, Text value, Context context)  throws IOException, InterruptedException {

int populationVal;
String line = value.toString();
String field[] = line.split(",");
String country = field[4].substring(1, field[4].length()-1);
String newString = country.concat(field[0].substring(1, field[0].length()-1));

String population = field[9].substring(1, field[9].length()-1);
String city = field[0].substring(1, field[0].length()-1);

if (!population.matches(".*\d.*") || population.equals("")||
population.matches("([0-9].*)\.([0-9].*)") ){
return;
}else{
populationVal = Integer.parseInt(population);
context.write(new Text(country),new IntWritable(populationVal));
}
}

}

跑类:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class handsonJobRunner {
public int run(String[] args) throws Exception  {
if(args.length !=2) {
System.err.println("Usage: Handson3 <input path> <outputpath>");
System.exit(-1);
}
Job job = new Job();

job.setJarByClass(handsonJobRunner.class);
job.setJobName("Handson 3");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setMapperClass(handson3Mapper.class);
job.setReducerClass(Handson3Reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0:1);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
handsonJobRunner driver = new handsonJobRunner();
driver.run(args);

}
}

提前谢谢你,如有任何建议,我将不胜感激。

您应该将城市和人口作为值发送给reducer,并在reducer中选择每个国家人口最多和最少的城市。

你的映射器应该是这样的:

public class Handson3Mapper extends Mapper<LongWritable, Text, Text, Text> {
private static final int MISSING = 9999;
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
int populationVal;
String line = value.toString();
String field[] = line.split(",");
String country = field[4].substring(1, field[4].length() - 1);
String newString = country.concat(field[0].substring(1, field[0].length() - 1));
String population = field[9].substring(1, field[9].length() - 1);
String city = field[0].substring(1, field[0].length() - 1);

if (!population.matches(".*\d.*") || population.equals("") ||
population.matches("([0-9].*)\.([0-9].*)")) {
return;
} else {
populationVal = Integer.parseInt(population);
context.write(new Text(country), new Text(city + "-" + populationVal));
}
}
}

你的减速机应该改成这个:

public class Handson3Reducer extends Reducer<Text, Text, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String maxPopulationCityName = "";
String minPopulationCityName = "";
int maxValue = Integer.MIN_VALUE;
int minValue = Integer.MAX_VALUE;
String line = key.toString();
String field[] = line.split(",");
for (IntWritable value : values) {
String[] array = value.toString().split("-");
int population = Integer.valueOf(array[1]);
if (population > maxValue) {
maxPopulationCityName = array[0];
maxValue = population;
}
if (population < minValue) {
minPopulationCityName = array[0];
minValue = population;
}
}
context.write(new Text(key + " " + minPopulationCityName), new IntWritable(minValue));
context.write(new Text(key + " " + maxPopulationCityName), new IntWritable(maxValue));
}
}