我是hadoop mapreduce的新手。我正在尝试在地图减少中实现搜索,所以我的输入文件是这样的
key1 value1,value3
key2 value2,value6
我想找到用户将作为命令行参数传递的密钥的值列表。为此,我的主(驱动程序)类类似于
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(NameSearchJava.class);
// write now I am trying with writing search key in code (Joy),later I'll
//try to pass argument while running job from hadoop.
conf.set("searcKey", "Joy");
conf.setJobName("Search");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(SearchMapper.class);
conf.setReducerClass(SearchReducer.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
and my configure function is:
String item ;
public void configure(JobConf job) {
{
item = job.get("test");
System.out.println(item);
System.err.println("search" + item);
}
在Mapper或Reducer中,我应该在哪里写configure函数。如何使用这个项参数在Reducer中进行比较。这是hadoop中获取参数的正确方法吗?
添加到Hadooper的Answer。
这是完整的代码。
你可以参考哈杜珀的答案来解释。
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author Unmesha sreeveni
* @Date 23 sep 2014
*/
public class StringSearchDriver extends Configured implements Tool {
public static class Map extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
String line = value.toString();
String searchString = conf.get("word");
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if(token.equals(searchString)){
word.set(token);
context.write(word, one);
}
}
}
}
public static class Reduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int res = ToolRunner.run(conf, new StringSearchDriver(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
if (args.length != 3) {
System.out
.printf("Usage: Search String <input dir> <output dir> <search word> n");
System.exit(-1);
}
String source = args[0];
String dest = args[1];
String searchword = args[2];
Configuration conf = new Configuration();
conf.set("word", searchword);
Job job = new Job(conf, "Search String");
job.setJarByClass(StringSearchDriver.class);
FileSystem fs = FileSystem.get(conf);
Path in =new Path(source);
Path out =new Path(dest);
if (fs.exists(out)) {
fs.delete(out, true);
}
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
boolean sucess = job.waitForCompletion(true);
return (sucess ? 0 : 1);
}
}
如下读取Driver类中的命令行参数-
conf.set("searchKey", args[2]);
其中args[2]将是作为第三个参数传递的搜索关键字。
配置方法应在映射器中编码如下-
String searchWord;
public void configure(JobConf jc)
{
searchWord = jc.get("searchKey");
}
这将使您的密钥在Mapper函数中进行搜索。
您可以使用以下逻辑在Mapper本身中执行比较-
public void map(LongWritable key, Text value,
OutputCollector<Text, IntWritable> out, Reporter reporter)
throws IOException
{
String[] input = value.toString().split(" ");
for(String word:input)
{
if (word.equalsIgnoreCase(searchWord))
out.collect(new Text(word), new IntWritable(1));
}
}
如果这有帮助,请告诉我!