1 package org.archive.access.nutch;
2
3 import java.io.IOException;
4
5 import javax.servlet.ServletContext;
6
7 import org.apache.hadoop.conf.Configuration;
8 import org.apache.hadoop.fs.Path;
9 import org.apache.nutch.crawl.Inlinks;
10 import org.apache.nutch.searcher.HitDetails;
11 import org.apache.nutch.searcher.NutchBean;
12 import org.apache.nutch.searcher.Query;
13 import org.apache.nutch.searcher.Summary;
14
15 /***
16 * Proxy that allows us intercept getSummary so we can change key used.
17 * @author stack
18 */
19 public class NutchwaxBean extends NutchBean {
20 public NutchwaxBean(Configuration conf, Path dir) throws IOException {
21 super(conf, dir);
22 }
23
24 public NutchwaxBean(Configuration conf) throws IOException {
25 super(conf);
26 }
27
28 public static NutchBean get(ServletContext app, Configuration conf)
29 throws IOException {
30 NutchBean bean = (NutchBean)app.getAttribute("nutchBean");
31 if (bean == null) {
32 if (LOG.isInfoEnabled()) { LOG.info("creating new bean"); }
33
34 bean = new NutchwaxBean(conf);
35 app.setAttribute("nutchBean", bean);
36 }
37 return bean;
38 }
39
40 public Summary[] getSummary(HitDetails[] hits, Query query)
41 throws IOException {
42
43
44
45
46
47 HitDetails[] amendedHits = new HitDetails[hits.length];
48 for (int j = 0; j < hits.length; j++) {
49 HitDetails h = hits[j];
50 amendedHits[j] = getCollectionQualifiedHitDetails(h);
51 }
52 return super.getSummary(amendedHits, query);
53 }
54
55 public String[] getAnchors(HitDetails h) throws IOException {
56 return super.getAnchors(getCollectionQualifiedHitDetails(h));
57 }
58
59 public Inlinks getInlinks(HitDetails h) throws IOException {
60 return super.getInlinks(getCollectionQualifiedHitDetails(h));
61 }
62
63 /***
64 * TODO: Make it so I don't have to create a new HitDetails changing
65 * the key used doing lookup.
66 * @param h
67 * @return
68 */
69 protected HitDetails getCollectionQualifiedHitDetails(final HitDetails h) {
70 return new HitDetails(h.getValue("segment"),
71 Nutchwax.generateWaxKey(h.getValue("url"),
72 h.getValue("collection")).toString());
73 }
74 }