05-Hadoop实战URL流量分析
准备工作
准备数据
GET / HTTP/1.0 POST /home HTTP/1.0 POST /Study HTTP/1.0 POST /Study HTTP/1.0 POST / HTTP/1.0 POST /news HTTP/1.0 POST /news HTTP/1.0
功能描述
- 手动逐行的MapReduce编程实战分析Hadoop实战URL流量分析实现
代码实现
package com.opensourceteams.modeles.common.bigdata.hadoop.hadoop2.mapreduce.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
public class URLLog {
public static class DataMapper
extends Mapper<Object, Text, Text, LongWritable>{
private final static LongWritable empValue = new LongWritable(1);
private Text lineData = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
String result = handleLine(value.toString());
if(result != null && result.length() >0){
lineData.set(result);
context.write(lineData,empValue);
}
}
private String handleLine(String line) {
StringBuffer buffer = new StringBuffer();
if(line.length() >0 ){
if(line.contains("GET")){
buffer.append(line.substring(line.indexOf("GET"),line.indexOf("HTTP/1.0")).trim());
}else if(line.contains("POST")){
buffer.append(line.substring(line.indexOf("POST"),line.indexOf("HTTP/1.0")).trim());
}
}
System.out.printf("buffer:" + buffer.toString());
return buffer.toString();
}
}
public static class DataReducer
extends Reducer<Text, LongWritable,Text, LongWritable> {
private LongWritable result = new LongWritable();
public void reduce(Text key, Iterable<LongWritable> values,
Context context
) throws IOException, InterruptedException {
long count =0 ;
for(LongWritable v : values){
count = count + v.get();
}
result.set(count);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
if(args == null || args.length ==0){
args = new String[2];
args[0] = "/opt/workspace/bigdata/all_frame_intellij/hadoop-maven-idea/src/tutorial/resources/URLLog.txt";
args[1] = "/opt/workspace/bigdata/all_frame_intellij/hadoop-maven-idea/src/tutorial/resources/output";
}
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "URLLog");
job.setJarByClass(URLLog.class);
job.setMapperClass(DataMapper.class);
job.setCombinerClass(DataReducer.class);
job.setReducerClass(DataReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}