org.archive.access.nutch
Class NutchwaxIndexer

java.lang.Object
  extended by org.apache.hadoop.util.ToolBase
      extended by org.apache.nutch.indexer.Indexer
          extended by org.archive.access.nutch.NutchwaxIndexer
All Implemented Interfaces:
org.apache.hadoop.conf.Configurable, org.apache.hadoop.io.Closeable, org.apache.hadoop.mapred.JobConfigurable, org.apache.hadoop.mapred.Mapper, org.apache.hadoop.mapred.Reducer, org.apache.hadoop.util.Tool

public class NutchwaxIndexer
extends org.apache.nutch.indexer.Indexer

Subclass of nutch Indexer that handles keys that are not just URLs.

Author:
stack

Nested Class Summary
 
Nested classes/interfaces inherited from class org.apache.nutch.indexer.Indexer
org.apache.nutch.indexer.Indexer.OutputFormat
 
Field Summary
 org.apache.commons.logging.Log LOG
           
 
Fields inherited from class org.apache.nutch.indexer.Indexer
DONE_NAME
 
Fields inherited from class org.apache.hadoop.util.ToolBase
conf
 
Constructor Summary
NutchwaxIndexer()
           
NutchwaxIndexer(org.apache.hadoop.conf.Configuration c)
           
 
Method Summary
 void index(org.apache.hadoop.fs.Path indexDir, org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path linkDb, org.apache.hadoop.fs.Path[] segments)
           
static void main(java.lang.String[] args)
           
 void reduce(org.apache.hadoop.io.WritableComparable key, java.util.Iterator values, org.apache.hadoop.mapred.OutputCollector output, org.apache.hadoop.mapred.Reporter reporter)
           
 
Methods inherited from class org.apache.nutch.indexer.Indexer
close, configure, map, run
 
Methods inherited from class org.apache.hadoop.util.ToolBase
doMain, getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

public final org.apache.commons.logging.Log LOG
Constructor Detail

NutchwaxIndexer

public NutchwaxIndexer()

NutchwaxIndexer

public NutchwaxIndexer(org.apache.hadoop.conf.Configuration c)
Method Detail

reduce

public void reduce(org.apache.hadoop.io.WritableComparable key,
                   java.util.Iterator values,
                   org.apache.hadoop.mapred.OutputCollector output,
                   org.apache.hadoop.mapred.Reporter reporter)
            throws java.io.IOException
Specified by:
reduce in interface org.apache.hadoop.mapred.Reducer
Overrides:
reduce in class org.apache.nutch.indexer.Indexer
Throws:
java.io.IOException

index

public void index(org.apache.hadoop.fs.Path indexDir,
                  org.apache.hadoop.fs.Path crawlDb,
                  org.apache.hadoop.fs.Path linkDb,
                  org.apache.hadoop.fs.Path[] segments)
           throws java.io.IOException
Overrides:
index in class org.apache.nutch.indexer.Indexer
Throws:
java.io.IOException

main

public static void main(java.lang.String[] args)
                 throws java.lang.Exception
Throws:
java.lang.Exception


Copyright © 2005-2007 Internet Archive. All Rights Reserved.