更新时间:2019年07月26日 11时12分34秒 来源:黑马程序员论坛
运行效果如下 1.输入数据 目标输出 一、创建项目 打开eclipse,新建一个工程。“file” ->“New” ->“other”,select a wizard中选择“Map/Reduce Project”,输入工程名 二、编写程序 选择src,单击右键,选择“New”→“ Package”,输入包名,点击“Finish” 选择包名,点击右键,选择“New”→“Class”,输入类名,点击“Finish” 一共要创建四个类,分别是:“User”、“SecondaryJob”、“SecondaryMapper”、“SecondaryReducer” User.java package sort; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class User implements WritableComparable<User>{ private String addr; private String name; private int age; public String getAddr() { return addr; } public void setAddr(String addr) { this.addr = addr; } public String getName() { return name; } public void setName(String name) { this.name = name; } public int getAge() { return age; } public void setAge(int age) { this.age = age; } public void set(String addr,String name, int age) { this.addr = addr; this.name = name; this.age = age; } @Override public void readFields(DataInput in) throws IOException { // TODO Auto-generated method stub this.addr = in.readUTF(); this.name = in.readUTF(); this.age = in.readInt(); } @Override public void write(DataOutput out) throws IOException { // TODO Auto-generated method stub out.writeUTF(addr); out.writeUTF(name); out.writeInt(age); } @Override public int compareTo(User o) { // TODO Auto-generated method stub //比较第一个字符 int comp = this.addr.compareTo(o.getAddr()); //如果不相同,上面comp不为0 if(0!=comp) { return comp; } return Integer.valueOf(o.getAge()).compareTo(Integer.valueOf(age)); } } SecondaryMapper.java package sort; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class SecondaryMapper extends Mapper<LongWritable, Text, User, IntWritable>{ private User mapOutputKey = new User(); private IntWritable mapOutputValue = new IntWritable(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, User, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub String[] strs = value.toString().split(","); mapOutputKey.set(strs[0],strs[1],Integer.valueOf(strs[2])); mapOutputValue.set(Integer.valueOf(strs[2])); context.write(mapOutputKey, mapOutputValue); } } SecondaryReducer.java package sort; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class SecondaryReducer extends Reducer<User, IntWritable, Text, IntWritable>{ private Text outputKey = new Text(); @Override protected void reduce(User key, Iterable<IntWritable> values, Reducer<User, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub for(IntWritable value:values) { outputKey.set(key.getAddr()+key.getName()); context.write(outputKey, value); } } } SecondaryJob.java package sort; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class SecondaryJob { public static void main(String[] args)throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(SecondaryJob.class); job.setMapperClass(SecondaryMapper.class); job.setReducerClass(SecondaryReducer.class); job.setOutputKeyClass(User.class); job.setMapOutputKeyClass(User.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //txt文件路径 FileInputFormat.setInputPaths(job, new Path("D:\\hadoopprjs\\sort\\sort.txt")); //注意,D盘这个目录下预先不能有output文件夹,否则会报错 FileOutputFormat.setOutputPath(job, new Path("D:\\hadoopprjs\\sort\\output")); job.waitForCompletion(true); } } txt文件内容如下 输出文件,运行程序后会多出来一个output文件夹 output文件夹 打开part-r-00000文件(记事本) 结束 --------------------- 原文: |