codepad
[
create a new paste
]
login
|
about
Language:
C
C++
D
Haskell
Lua
OCaml
PHP
Perl
Plain Text
Python
Ruby
Scheme
Tcl
import org.apache.commons.digester.Digester; import org.xml.sax.SAXException; import org.apache.lucene.index.IndexWriter; //import org.apache.lucene.analysis.standard.StandardAnalyzer; //import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.analysis.core.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.index.CorruptIndexException; import java.io.File; import java.io.IOException; import java.io.FileNotFoundException; /** * Parses the contents of collection XML file. The id of the file to * parse must be specified as the first command line argument. */ public class CollectionIndexer { private static IndexWriter writer; static final File INDEX_DIR = new File("data/index"); /** * Prints the document information to standard output. * * @param document the <code>Document</code> to print out */ public void addDocument(FlickrDoc flickrDoc) { System.out.println("Adding " + flickrDoc.getId()); Document document = new Document(); document.add(new Field("id", flickrDoc.getId(), Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("title", flickrDoc.getTitle(), Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("description", flickrDoc.getDescription(), Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("time", flickrDoc.getTime(), Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("tags", flickrDoc.getTags(), Field.Store.YES, Field.Index.ANALYZED)); if ( (flickrDoc.getLatitude() != null) && (flickrDoc.getLongitude() != null) ) { document.add(new Field("latitude", flickrDoc.getLatitude(), Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("longitude", flickrDoc.getLongitude(), Field.Store.YES, Field.Index.ANALYZED)); } document.add(new Field("event", flickrDoc.getEvent(), Field.Store.YES, Field.Index.ANALYZED)); try { writer.addDocument(document); } catch (CorruptIndexException cie) { ; } catch (IOException ioe) { ; } } /** * Configures Digester rules and actions, parses the XML file specified * as the first argument. * * @param args command line arguments */ public static void main(String[] args) throws IOException, SAXException { if (INDEX_DIR.exists()) { System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); System.exit(1); } // IndexWriter to use for adding contacts to the index //writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30 ), true, IndexWriter.MaxFieldLength.LIMITED); writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_40, new WhitespaceAnalyzer(org.apache.lucene.util.Version.LUCENE_40))); // instantiate Digester and disable XML validation Digester digester = new Digester(); digester.setValidating(false); // instantiate CollectionIndexer class digester.addObjectCreate("collection", CollectionIndexer.class ); // instantiate Document class digester.addObjectCreate("collection/doc", FlickrDoc.class ); // set type property of Document instance when 'type' attribute is found //digester.addSetProperties("collection/doc", "type", "type" ); // set different properties of Document instance using specified methods digester.addCallMethod("collection/doc/id", "setId", 0); digester.addCallMethod("collection/doc/title", "setTitle", 0); digester.addCallMethod("collection/doc/description", "setDescription", 0); digester.addCallMethod("collection/doc/time", "setTime", 0); digester.addCallMethod("collection/doc/tags", "setTags", 0); digester.addCallMethod("collection/doc/geo/latitude", "setLatitude", 0); digester.addCallMethod("collection/doc/geo/longitude", "setLongitude", 0); digester.addCallMethod("collection/doc/event", "setEvent", 0); // call 'addDocument' method when the next 'collection/document' pattern is seen digester.addSetNext("collection/doc", "addDocument" ); // now that rules and actions are configured, start the parsing process CollectionIndexer abp = (CollectionIndexer) digester.parse(new File(args[0])); //System.out.println( abp.toString() ); // optimize and close the index writer.optimize(); writer.close(); } /** * JavaBean class that holds properties of each Document entry. * It is important that this class be public and static, in order for * Digester to be able to instantiate it. */ public static class FlickrDoc { //private String type; private String id; private String title; private String description; private String time; private String tags; private String latitude; private String longitude; private String event; /* public void setType(String newType) { type = newType; } public String getType() { return type; }*/ public void setId(String newId) { id = newId; } public String getId() { return id; } public void setTitle(String newTitle) { title = newTitle; } public String getTitle() { return title; } public void setDescription(String newDescription) { description = newDescription; } public String getDescription() { return description; } public void setTime(String newTime) { time = newTime; } public String getTime() { return time; } public void setTags(String newTags) { tags = newTags; } public String getTags() { return tags; } public void setLatitude(String newLatitude) { latitude = newLatitude; } public String getLatitude() { return latitude; } public void setLongitude(String newLongitude) { longitude = newLongitude; } public String getLongitude() { return longitude; } public void setEvent(String newEvent) { event = newEvent; } public String getEvent() { return event; } } }
Private
[
?
]
Run code
Submit