org.archive.wayback.core
Class CaptureSearchResult

java.lang.Object
  extended by org.archive.wayback.core.SearchResult
      extended by org.archive.wayback.core.CaptureSearchResult

public class CaptureSearchResult
extends SearchResult

Version:
$Date: 2010-09-29 05:28:38 +0700 (Wed, 29 Sep 2010) $, $Revision: 3262 $
Author:
brad

Field Summary
static String CAPTURE_CAPTURE_TIMESTAMP
          Result: 14-digit timestamp when document was captured
static String CAPTURE_CLOSEST_INDICATOR
          Result: flag within a SearchResult that indicates this is the closest to a particular requested date.
static String CAPTURE_CLOSEST_VALUE
           
static String CAPTURE_DIGEST
          Result: some form of document fingerprint.
static String CAPTURE_DUPLICATE_ANNOTATION
          Result: this key being present indicates that this particular capture was not actually stored, and that other values within this SearchResult are actually values from a different record which *should* be identical to this capture, had it been stored.
static String CAPTURE_DUPLICATE_DIGEST
          flag indicates that this document was downloaded and verified as identical to a previous capture by digest.
static String CAPTURE_DUPLICATE_HTTP
          flag indicates that this document was NOT downloaded, but that the origin server indicated that the document had not changed, based on If-Modified HTTP request headers.
static String CAPTURE_DUPLICATE_STORED_TS
          Result: this key is present when the CAPTURE_DUPLICATE_ANNOTATION is also present, with the value indicating the last date that was actually stored for this duplicate.
static String CAPTURE_END_OFFSET
          Result: compressed byte offset within ARC/WARC file where this document's gzip envelope Ends.
static String CAPTURE_FILE
          Result: basename of ARC/WARC file containing this document.
static String CAPTURE_HTTP_CODE
          Result: 3-digit integer HTTP response code.
static String CAPTURE_MIME_TYPE
          Result: best-guess at mime-type of this document.
static String CAPTURE_OFFSET
          Result: compressed byte offset within ARC/WARC file where this document's gzip envelope begins.
static String CAPTURE_ORIGINAL_HOST
           
static String CAPTURE_ORIGINAL_URL
           
static String CAPTURE_REDIRECT_URL
          Result: URL that this document redirected to, or '-' if it does not redirect
static String CAPTURE_ROBOT_FLAGS
          Result: String flags which indicate robot instructions found in an HTML page.
static String CAPTURE_ROBOT_NOARCHIVE
           
static String CAPTURE_ROBOT_NOFOLLOW
           
static String CAPTURE_ROBOT_NOINDEX
           
static String CAPTURE_URL_KEY
          Result: canonicalized(lookup key) form of URL of captured document
 
Fields inherited from class org.archive.wayback.core.SearchResult
data, RESULT_TRUE_VALUE
 
Constructor Summary
CaptureSearchResult()
           
 
Method Summary
 void flagDuplicateDigest(Date storedDate)
           
 void flagDuplicateDigest(String storedTS)
           
 void flagDuplicateHTTP(Date storedDate)
           
 void flagDuplicateHTTP(String storedTS)
           
 Date getCaptureDate()
           
 String getCaptureTimestamp()
           
 String getDigest()
           
 Date getDuplicateDigestStoredDate()
           
 String getDuplicateDigestStoredTimestamp()
           
 Date getDuplicateHTTPStoredDate()
           
 String getDuplicateHTTPStoredTimestamp()
           
 long getEndOffset()
           
 String getFile()
           
 String getHttpCode()
           
 String getMimeType()
           
 long getOffset()
           
 String getOriginalHost()
           
 String getOriginalUrl()
           
 String getRedirectUrl()
           
 String getRobotFlags()
           
 String getUrlKey()
           
 boolean isClosest()
           
 boolean isDuplicateDigest()
           
 boolean isDuplicateHTTP()
           
 boolean isRobotFlagSet(String flag)
           
 boolean isRobotNoArchive()
           
 boolean isRobotNoFollow()
           
 boolean isRobotNoIndex()
           
 void setCaptureDate(Date date)
           
 void setCaptureTimestamp(String timestamp)
           
 void setClosest(boolean value)
           
 void setDigest(String digest)
           
 void setEndOffset(long offset)
           
 void setFile(String file)
           
 void setHttpCode(String httpCode)
           
 void setMimeType(String mimeType)
           
 void setOffset(long offset)
           
 void setOriginalHost(String originalHost)
           
 void setOriginalUrl(String originalUrl)
           
 void setRedirectUrl(String url)
           
 void setRobotFlag(String flag)
           
 void setRobotFlags(String robotFlags)
           
 void setRobotNoArchive()
           
 void setRobotNoFollow()
           
 void setRobotNoIndex()
           
 void setUrlKey(String urlKey)
           
 
Methods inherited from class org.archive.wayback.core.SearchResult
dateToTS, fromCanonicalStringMap, get, getBoolean, put, putBoolean, toCanonicalStringMap, tsToDate
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

CAPTURE_ORIGINAL_URL

public static final String CAPTURE_ORIGINAL_URL
See Also:
Constant Field Values

CAPTURE_ORIGINAL_HOST

public static final String CAPTURE_ORIGINAL_HOST
See Also:
Constant Field Values

CAPTURE_URL_KEY

public static final String CAPTURE_URL_KEY
Result: canonicalized(lookup key) form of URL of captured document

See Also:
Constant Field Values

CAPTURE_CAPTURE_TIMESTAMP

public static final String CAPTURE_CAPTURE_TIMESTAMP
Result: 14-digit timestamp when document was captured

See Also:
Constant Field Values

CAPTURE_FILE

public static final String CAPTURE_FILE
Result: basename of ARC/WARC file containing this document.

See Also:
Constant Field Values

CAPTURE_OFFSET

public static final String CAPTURE_OFFSET
Result: compressed byte offset within ARC/WARC file where this document's gzip envelope begins.

See Also:
Constant Field Values

CAPTURE_END_OFFSET

public static final String CAPTURE_END_OFFSET
Result: compressed byte offset within ARC/WARC file where this document's gzip envelope Ends.

See Also:
Constant Field Values

CAPTURE_MIME_TYPE

public static final String CAPTURE_MIME_TYPE
Result: best-guess at mime-type of this document.

See Also:
Constant Field Values

CAPTURE_HTTP_CODE

public static final String CAPTURE_HTTP_CODE
Result: 3-digit integer HTTP response code. may be '0' in some fringe conditions, old ARCs, bug in crawler, etc.

See Also:
Constant Field Values

CAPTURE_DIGEST

public static final String CAPTURE_DIGEST
Result: some form of document fingerprint. This should represent the HTTP payload only for HTTP captured resources. It may represent an MD5, a SHA1, and may be a fragment of the full representation of the digest.

See Also:
Constant Field Values

CAPTURE_REDIRECT_URL

public static final String CAPTURE_REDIRECT_URL
Result: URL that this document redirected to, or '-' if it does not redirect

See Also:
Constant Field Values

CAPTURE_ROBOT_FLAGS

public static final String CAPTURE_ROBOT_FLAGS
Result: String flags which indicate robot instructions found in an HTML page. Currently one or more of:
  • "A" - noarchive
  • "F" - nofollow
  • "I" - noindex
  • See Also:
    "http://noarchive.net/", Constant Field Values

    CAPTURE_ROBOT_NOARCHIVE

    public static final String CAPTURE_ROBOT_NOARCHIVE
    See Also:
    Constant Field Values

    CAPTURE_ROBOT_NOFOLLOW

    public static final String CAPTURE_ROBOT_NOFOLLOW
    See Also:
    Constant Field Values

    CAPTURE_ROBOT_NOINDEX

    public static final String CAPTURE_ROBOT_NOINDEX
    See Also:
    Constant Field Values

    CAPTURE_CLOSEST_INDICATOR

    public static final String CAPTURE_CLOSEST_INDICATOR
    Result: flag within a SearchResult that indicates this is the closest to a particular requested date.

    See Also:
    Constant Field Values

    CAPTURE_CLOSEST_VALUE

    public static final String CAPTURE_CLOSEST_VALUE
    See Also:
    Constant Field Values

    CAPTURE_DUPLICATE_ANNOTATION

    public static final String CAPTURE_DUPLICATE_ANNOTATION
    Result: this key being present indicates that this particular capture was not actually stored, and that other values within this SearchResult are actually values from a different record which *should* be identical to this capture, had it been stored.

    See Also:
    Constant Field Values

    CAPTURE_DUPLICATE_STORED_TS

    public static final String CAPTURE_DUPLICATE_STORED_TS
    Result: this key is present when the CAPTURE_DUPLICATE_ANNOTATION is also present, with the value indicating the last date that was actually stored for this duplicate.

    See Also:
    Constant Field Values

    CAPTURE_DUPLICATE_DIGEST

    public static final String CAPTURE_DUPLICATE_DIGEST
    flag indicates that this document was downloaded and verified as identical to a previous capture by digest.

    See Also:
    Constant Field Values

    CAPTURE_DUPLICATE_HTTP

    public static final String CAPTURE_DUPLICATE_HTTP
    flag indicates that this document was NOT downloaded, but that the origin server indicated that the document had not changed, based on If-Modified HTTP request headers.

    See Also:
    Constant Field Values
    Constructor Detail

    CaptureSearchResult

    public CaptureSearchResult()
    Method Detail

    getOriginalUrl

    public String getOriginalUrl()
    Returns:
    the original URL which resulted in the capture. If it is not available, the urlKey and original Host will be used to reconstruct something possibly closer to the original URL than the urlKey

    setOriginalUrl

    public void setOriginalUrl(String originalUrl)
    Parameters:
    originalUrl - as close to the original URL by which this Resource was captured as is possible

    getOriginalHost

    public String getOriginalHost()

    setOriginalHost

    public void setOriginalHost(String originalHost)

    getUrlKey

    public String getUrlKey()

    setUrlKey

    public void setUrlKey(String urlKey)

    getCaptureDate

    public Date getCaptureDate()

    setCaptureDate

    public void setCaptureDate(Date date)

    getCaptureTimestamp

    public String getCaptureTimestamp()

    setCaptureTimestamp

    public void setCaptureTimestamp(String timestamp)

    getFile

    public String getFile()

    setFile

    public void setFile(String file)

    getOffset

    public long getOffset()

    setOffset

    public void setOffset(long offset)

    getEndOffset

    public long getEndOffset()

    setEndOffset

    public void setEndOffset(long offset)

    getMimeType

    public String getMimeType()

    setMimeType

    public void setMimeType(String mimeType)

    getHttpCode

    public String getHttpCode()

    setHttpCode

    public void setHttpCode(String httpCode)

    getDigest

    public String getDigest()

    setDigest

    public void setDigest(String digest)

    getRedirectUrl

    public String getRedirectUrl()

    setRedirectUrl

    public void setRedirectUrl(String url)

    isClosest

    public boolean isClosest()

    setClosest

    public void setClosest(boolean value)

    flagDuplicateDigest

    public void flagDuplicateDigest(Date storedDate)

    flagDuplicateDigest

    public void flagDuplicateDigest(String storedTS)

    isDuplicateDigest

    public boolean isDuplicateDigest()

    getDuplicateDigestStoredDate

    public Date getDuplicateDigestStoredDate()

    getDuplicateDigestStoredTimestamp

    public String getDuplicateDigestStoredTimestamp()

    flagDuplicateHTTP

    public void flagDuplicateHTTP(Date storedDate)

    flagDuplicateHTTP

    public void flagDuplicateHTTP(String storedTS)

    isDuplicateHTTP

    public boolean isDuplicateHTTP()

    getDuplicateHTTPStoredDate

    public Date getDuplicateHTTPStoredDate()

    getDuplicateHTTPStoredTimestamp

    public String getDuplicateHTTPStoredTimestamp()

    getRobotFlags

    public String getRobotFlags()

    setRobotFlags

    public void setRobotFlags(String robotFlags)

    setRobotFlag

    public void setRobotFlag(String flag)

    isRobotFlagSet

    public boolean isRobotFlagSet(String flag)

    isRobotNoArchive

    public boolean isRobotNoArchive()

    isRobotNoIndex

    public boolean isRobotNoIndex()

    isRobotNoFollow

    public boolean isRobotNoFollow()

    setRobotNoArchive

    public void setRobotNoArchive()

    setRobotNoIndex

    public void setRobotNoIndex()

    setRobotNoFollow

    public void setRobotNoFollow()


    Copyright © 2005-2011 Internet Archive. All Rights Reserved.