在不使用 JobConf 的情况下运行 Hadoop 作业
我找不到一个提交不使用已弃用类的Hadoop作业的示例,该类尚未被弃用,仍然仅支持采用参数的方法。JobConf
JobClient
JobConf
有人可以给我指出一个Java代码的例子,它只使用类(不是)提交Hadoop map/reduce作业,并使用包而不是?Configuration
JobConf
mapreduce.lib.input
mapred.input
我找不到一个提交不使用已弃用类的Hadoop作业的示例,该类尚未被弃用,仍然仅支持采用参数的方法。JobConf
JobClient
JobConf
有人可以给我指出一个Java代码的例子,它只使用类(不是)提交Hadoop map/reduce作业,并使用包而不是?Configuration
JobConf
mapreduce.lib.input
mapred.input
希望这有帮助
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MapReduceExample extends Configured implements Tool {
static class MyMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
public MyMapper(){
}
protected void map(
LongWritable key,
Text value,
org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, LongWritable, Text>.Context context)
throws java.io.IOException, InterruptedException {
context.getCounter("mygroup", "jeff").increment(1);
context.write(key, value);
};
}
@Override
public int run(String[] args) throws Exception {
Job job = new Job();
job.setMapperClass(MyMapper.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
return 0;
}
public static void main(String[] args) throws Exception {
FileUtils.deleteDirectory(new File("data/output"));
args = new String[] { "data/input", "data/output" };
ToolRunner.run(new MapReduceExample(), args);
}
}