eclipse开发地图reduce_Eclipse

eclipse开发mapreduce

使用eclipse开发MapReduce

1.增加插件
?? ?将插件hadoop-eclipse-plugin-1.0.4.jar放入/usr/lib/eclipse/plugins目录下
?? ?（完成后重新启动eclipse）
2.配置hadoop的安装路径
?? ?window—preferences，在左边栏中找到Hadoop Map/Reduce，将hadoop的目录设置为hadoop的安装目录
3.建立MapRedece工程
?? ?创建一个MapReduce Project，点击eclipse主菜单上的File—New—Project，在弹出的对话框中选择MapReduce Project，之后输入Project的名
4.建立MapReduce程序
?? ?就和建立普通的java程序是一样的

package com.sun.mapreduce;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public class WordCount {    public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {        private final static IntWritable one = new IntWritable(1);        private Text word = new Text();        public void map(LongWritable key, Text value, Context context)             throws IOException, InterruptedException{                String line = value.toString();                StringTokenizer tokenizer = new StringTokenizer(line);                while (tokenizer.hasMoreTokens()) {                    word.set(tokenizer.nextToken());                    context.write(word, one);                }            }    }    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {        @Override            public void reduce(Text key, Iterable<IntWritable> values, Context context)            throws IOException, InterruptedException {                // TODO Auto-generated method stub                int sum = 0;                for (IntWritable val : values)                    sum += val.get();                context.write(key, new IntWritable(sum));            }    }    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        Job job = new Job(conf, "wordcount");        job.setJarByClass(WordCount.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        job.setMapperClass(Map.class);        job.setReducerClass(Reduce.class);        job.setInputFormatClass(TextInputFormat.class);        job.setOutputFormatClass(TextOutputFormat.class);        FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));        job.waitForCompletion(true);    }}

?5.执行程序

?? ?执行程序时要附加一定的参数
?? ?点击Run-run configurations ，在Arguments中填写参数，参数分别为输入文件的目录输出文件的目录
?? ?例如??? /home/asheng/hadoop/in /home/asheng/hadoop/out(in目录下应该放置需要分析的文件，out目录不需要手工建立)
?? ?设置完成后点击Run即可，通过控制台可以观察运行状态，具体的运行结果在out目录下