1   /* $Id: NutchwaxTest.java 1372 2006-12-15 00:49:38Z stack-sf $
2    *
3    * Created Sep 5, 2006
4    *
5    * Copyright (C) 2006 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.access.nutch;
24  
25  import java.io.IOException;
26  
27  import org.apache.hadoop.io.Text;
28  
29  import junit.framework.TestCase;
30  
31  /***
32   * @author stack
33   * @version $Date: 2006-12-15 00:49:38 +0000 (Fri, 15 Dec 2006) $ $Version$
34   */
35  public class NutchwaxTest extends TestCase {
36  	public void testGetCollectionFromWaxKey() throws IOException {
37  		String key = " c=nla2005,u=http://www.funkmymobile.com.au/en/" +
38  		    "download.php?f=c_14&a=khamega&type=14&id=5247&son_nom=" +
39  		    "??%20Janta%20??%20Ja&son_wav=jantaja.mp3&lang=EN&c=GB\n";
40  		String collection = "nla2005";
41  		assertEquals(Nutchwax.getCollectionFromWaxKey(new Text(key)),
42  			collection);
43  		key = "c=nla2005,u=http://www.mobilewallpapers.com.au/en/download." +
44  			"php?f=c_14&a=mowall&type=14&id=5247&son_nom=??%20Janta%20??%20" +
45  			"Ja&son_wav=jantaja.mp3&lang=EN&c=GB";
46  		assertEquals(Nutchwax.getCollectionFromWaxKey(new Text(key)),
47  			collection);
48  		key = "c=5b5c430260d421a0ac8fdd461142e867,u=http://history." +
49  			"sacentral.sa.gov.au/site/page.cfm?u=47&listMode=" +
50  			"listLinks&path=\n 4873,4884,4894";
51  		collection = "5b5c430260d421a0ac8fdd461142e867";
52  		assertEquals(Nutchwax.getCollectionFromWaxKey(new Text(key)),
53  				collection);
54  		String url = "http://history." +
55  			"sacentral.sa.gov.au/site/page.cfm?u=47&listMode=" +
56  			"listLinks&path=\n 4873,4884,4894";
57  		assertEquals(Nutchwax.getUrlFromWaxKey(new Text(key)),
58  				url);
59  	}
60  }