平均值计算
平均值的计算是对相同属性的一堆值,进行取平均。常见的如平均成绩、平均温度。
思想
平均值的计算其实是在MapReduce入门案例词频统计wordcount的基础上,再进化一步。
词频统计wordcount。是在Map阶段将每个单词作为key。value固定值为1。在Reduce阶段,将多个1相加,得到词频。
平均值计算,是在Map阶段将属性作为key,同一个属性的值作为value。在Reduce阶段,将多个值相加并取平均。
举例
姓名 语文 数学 英语
lh 92 68 70
zyt 94 88 75
ls 96 78 78
hgw 90 70 56
yxx 80 88 73
hz 90 98 70
xyd 60 88 73
hj 90 58 70
cs 50 58 11
求语文、数学、英语的平均值
数学 77.11
英语 64.00
语文 82.44
代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;
import java.text.DecimalFormat;public class AwkTest2 {public static class myMapper extends Mapper<Object, Text, Text, LongWritable>{Text k = new Text();LongWritable v = new LongWritable();@Overrideprotected void map(Object key, Text value, Context context) throws IOException, InterruptedException {String row = value.toString();String[] values = row.split(" ");int yuwen = Integer.valueOf(values[1]);int shuxue = Integer.valueOf(values[2]);int yingyu = Integer.valueOf(values[3]);k.set("语文");v.set(yuwen);context.write(k, v);k.set("数学");v.set(shuxue);context.write(k, v);k.set("英语");v.set(yingyu);context.write(k, v);}}public static class myReducer extends Reducer<Text, LongWritable, Text, Text>{public Text v = new Text();@Overrideprotected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {int sum = 0;int count = 0;for (LongWritable value : values) {sum += value.get();count++;}DecimalFormat df = new DecimalFormat( "0.00");v.set(String.valueOf(df.format(1.0*sum/count)));context.write(key, v);}}public static void main(String[] args) throws IOException {try {Job job = Job.getInstance(new Configuration(),"awktest2");job.setJarByClass(AwkTest2.class);job.setMapperClass(myMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(LongWritable.class);job.setReducerClass(myReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.addInputPath(job,new Path("/input/data.txt"));FileOutputFormat.setOutputPath(job,new Path("/output/mr/awktest/test2/"));int success = job.waitForCompletion(true) ? 0 : 1;System.exit(success);} catch (InterruptedException e) {e.printStackTrace();} catch (ClassNotFoundException e) {e.printStackTrace();}}
}