疯狂java


您现在的位置: 疯狂软件 >> 新闻资讯 >> 正文

Hadoop 类Grep源代码注释


 

  关于Hadoop 类Grep源代码注释问题,我们用来一源码给大家演示一下吧:

  Java代码

  package org.apache.hadoop.examples;

  import java.util.Random;

  import org.apache.hadoop.conf.Configuration;

  import org.apache.hadoop.conf.Configured;

  import org.apache.hadoop.fs.FileSystem;

  import org.apache.hadoop.fs.Path;

  import org.apache.hadoop.io.LongWritable;

  import org.apache.hadoop.io.Text;

  import org.apache.hadoop.mapred.*;

  import org.apache.hadoop.mapred.lib.*;

  import org.apache.hadoop.util.Tool;

  import org.apache.hadoop.util.ToolRunner;

  /* 对输入文件按正则表达式查找,把结果写到输出文件上。

  查找用到了RegexMapper,LongSumReducer,InverseMapper系统自带的工具类。这个源代码运行了两个job,一个查找,一个是排序。

  Extracts matching regexs from input files and counts them. */

  public class Grep extends Configured implements Tool {

  private Grep() {} // 单例模式singleton

  public int run(String[] args) throws Exception {

  if (args.length < 3) {

  System.out.println("Grep []");

  ToolRunner.printGenericCommandUsage(System.out);

  return -1;

  }

  Path tempDir =

  new Path("grep-temp-"+

  Integer.toString(new Random()。nextInt(Integer.MAX_VALUE)));

  JobConf grepJob = new JobConf(getConf(), Grep.class);

  try {

  grepJob.setJobName("grep-search");

  FileInputFormat.setInputPaths(grepJob, args[0]);

  grepJob.setMapperClass(RegexMapper.class);//设置系统自带的mapper类来查找

  grepJob.set("mapred.mapper.regex", args[2]);

  if (args.length == 4)

  grepJob.set("mapred.mapper.regex.group", args[3]);

  grepJob.setCombinerClass(LongSumReducer.class);//设置系统自带的reducer来做合并

  grepJob.setReducerClass(LongSumReducer.class);

  //设置系统自带的reducer.

  FileOutputFormat.setOutputPath(grepJob, tempDir);

  grepJob.setOutputFormat(SequenceFileOutputFormat.class);//设置输出格式是二进制文件

  grepJob.setOutputKeyClass(Text.class);//输出的key是Text类型

  grepJob.setOutputValueClass(LongWritable.class);//输出的value是long类型

  JobClient.runJob(grepJob);

  JobConf sortJob = new JobConf(Grep.class);

  sortJob.setJobName("grep-sort");

  FileInputFormat.setInputPaths(sortJob, tempDir);

  sortJob.setInputFormat(SequenceFileInputFormat.class);//设置输入的文件格式二进制文件

  sortJob.setMapperClass(InverseMapper.class);//设置自带的排序mapper

  sortJob.setNumReduceTasks(1); // write a single file

  FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));

  sortJob.setOutputKeyComparatorClass // 输出结果是降序排列sort by decreasing freq

  (LongWritable.DecreasingComparator.class);

  JobClient.runJob(sortJob);

  }

  finally {

  FileSystem.get(grepJob)。delete(tempDir, true);

  }

  return 0;

  }

  public static void main(String[] args) throws Exception {

  int res = ToolRunner.run(new Configuration(), new Grep(), args);

  System.exit(res);

  }

  }