org.archive.wayback.resourcestore.indexer
Class WarcIndexer

java.lang.Object
  extended by org.archive.wayback.resourcestore.indexer.WarcIndexer

public class WarcIndexer
extends java.lang.Object


Field Summary
static java.lang.String CDX_HEADER_MAGIC
          CDX Header line for these fields.
 
Constructor Summary
WarcIndexer()
           
 
Method Summary
 UrlCanonicalizer getCanonicalizer()
           
 boolean isProcessAll()
           
 CloseableIterator<CaptureSearchResult> iterator(java.io.File warc)
           
 CloseableIterator<CaptureSearchResult> iterator(java.lang.String pathOrUrl)
           
 CloseableIterator<CaptureSearchResult> iterator(org.archive.io.warc.WARCReader reader)
           
static void main(java.lang.String[] args)
           
 void setCanonicalizer(UrlCanonicalizer canonicalizer)
           
 void setProcessAll(boolean processAll)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

CDX_HEADER_MAGIC

public static final java.lang.String CDX_HEADER_MAGIC
CDX Header line for these fields. not very configurable..

See Also:
Constant Field Values
Constructor Detail

WarcIndexer

public WarcIndexer()
Method Detail

isProcessAll

public boolean isProcessAll()

setProcessAll

public void setProcessAll(boolean processAll)

iterator

public CloseableIterator<CaptureSearchResult> iterator(java.io.File warc)
                                                throws java.io.IOException
Parameters:
warc -
Returns:
Iterator of SearchResults for input arc File
Throws:
java.io.IOException

iterator

public CloseableIterator<CaptureSearchResult> iterator(java.lang.String pathOrUrl)
                                                throws java.io.IOException
Parameters:
pathOrUrl -
Returns:
Iterator of SearchResults for input pathOrUrl
Throws:
java.io.IOException

iterator

public CloseableIterator<CaptureSearchResult> iterator(org.archive.io.warc.WARCReader reader)
                                                throws java.io.IOException
Parameters:
arc -
Returns:
Iterator of SearchResults for input arc File
Throws:
java.io.IOException

getCanonicalizer

public UrlCanonicalizer getCanonicalizer()

setCanonicalizer

public void setCanonicalizer(UrlCanonicalizer canonicalizer)

main

public static void main(java.lang.String[] args)
Parameters:
args -


Copyright © 2005-2009 Internet Archive. All Rights Reserved.