hadoop-cluster
MapReduce programs to perform data processing on the Hadoop cluster
Science Score: 18.0%
This score indicates how likely this project is to be science-related based on various indicators:
-
✓CITATION.cff file
Found CITATION.cff file -
○codemeta.json file
-
○.zenodo.json file
-
○DOI references
-
○Academic links in README
-
○Academic email domains
-
○Institutional organization owner
-
○JOSS paper metadata
-
○Scientific vocabulary similarity
Low similarity (0.6%) to scientific vocabulary
Last synced: 10 months ago
·
JSON representation
·
Repository
MapReduce programs to perform data processing on the Hadoop cluster
Basic Info
- Host: GitHub
- Owner: axp7911
- Language: Java
- Default Branch: master
- Size: 97.7 KB
Statistics
- Stars: 0
- Watchers: 1
- Forks: 0
- Open Issues: 0
- Releases: 0
Created over 12 years ago
· Last pushed over 12 years ago
Metadata Files
Citation
Owner
- Name: A. Patil
- Login: axp7911
- Kind: user
- Repositories: 8
- Profile: https://github.com/axp7911
Citation (CitationHistogram.java)
/*
* @author Amrut Patil
* Project: Processing Patent Citation Data set.
* This program counts the citation count in the input data to determine the distribution of counts.
* Date: July 21, 2013
*
*/
import java.io.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class CitationHistogram{
public static class Map extends Mapper<Text, Text, IntWritable, IntWritable> {
private final static IntWritable countone = new IntWritable(1);
private IntWritable citationCount = new IntWritable();
public void map(Text key, Text value,
Context context) throws InterruptedException, IOException {
citationCount.set(Integer.parseInt(value.toString()));
context.write(citationCount,countone);
}
}
public static class Reduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context) throws InterruptedException, IOException {
int count = 0;
//Counting the 1s for each citation count.
for(IntWritable val:values){
count+=val.get();
}
context.write(key,new IntWritable(count));
}
}
public static void main(String[] args) throws Exception {
//Configuring a MapReduce job
Configuration conf = new Configuration();
//Object which stores configuration parameters for a job.
//Define and control execution of a job
Job job = new Job(conf, "CitationHistogram");
job.setJarByClass(CitationHistogram.class);
//Takes care of file splitting
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//Set Mapper and Reducer
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
//The way an input file should be split up and read by Hadoop.
//Each record split in key/value pair separated by tab character(default)
job.setInputFormatClass(KeyValueTextInputFormat.class);
//The way output should be written to files
job.setOutputFormatClass(TextOutputFormat.class);
//Should be the same as type of <K2,V2>
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
}
}