codepad
[
create a new paste
]
login
|
about
Language:
C
C++
D
Haskell
Lua
OCaml
PHP
Perl
Plain Text
Python
Ruby
Scheme
Tcl
import org.apache.commons.digester.Digester; import org.xml.sax.SAXException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.index.CorruptIndexException; import java.io.File; import java.io.IOException; import java.io.FileNotFoundException; /** * Parses the contents of collection XML file. The id of the file to * parse must be specified as the first command line argument. */ public class CollectionIndexer { private static IndexWriter writer; static final File INDEX_DIR = new File("data/index"); /** * Prints the document information to standard output. * * @param document the <code>Document</code> to print out */ public void addDocument(FlickrDoc flickrDoc) { System.out.println("Adding " + flickrDoc.getId()); Document document = new Document(); document.add(new Field("type", flickrDoc.getType().getBytes(), Field.Store.YES)); document.add(new Field("id", flickrDoc.getId().getBytes(), Field.Store.YES)); document.add(new Field("title", flickrDoc.getTitle().getBytes(), Field.Store.YES)); document.add(new Field("description", flickrDoc.getDescription().getBytes(), Field.Store.YES)); document.add(new Field("time", flickrDoc.getTime().getBytes(), Field.Store.YES)); document.add(new Field("tags", flickrDoc.getTags().getBytes(), Field.Store.YES)); document.add(new Field("geo", flickrDoc.getGeo().getBytes(), Field.Store.YES)); document.add(new Field("event", flickrDoc.getEvent().getBytes(), Field.Store.YES)); try { writer.addDocument(document); } catch (CorruptIndexException cie) { ; } catch (IOException ioe) { ; } } /** * Configures Digester rules and actions, parses the XML file specified * as the first argument. * * @param args command line arguments */ public static void main(String[] args) throws IOException, SAXException { if (INDEX_DIR.exists()) { System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); System.exit(1); } // IndexWriter to use for adding contacts to the index //writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30 ), true, IndexWriter.MaxFieldLength.LIMITED); writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // instantiate Digester and disable XML validation Digester digester = new Digester(); digester.setValidating(false); // instantiate CollectionIndexer class digester.addObjectCreate("collection", CollectionIndexer.class ); // instantiate Document class digester.addObjectCreate("collection/flickrdoc", FlickrDoc.class ); // set type property of Document instance when 'type' attribute is found digester.addSetProperties("collection/flickrdoc", "type", "type" ); // set different properties of Document instance using specified methods digester.addCallMethod("collection/flickrdoc/id", "setId", 0); digester.addCallMethod("collection/flickrdoc/title", "setTitle", 0); digester.addCallMethod("collection/flickrdoc/description", "setDescription", 0); digester.addCallMethod("collection/flickrdoc/time", "setTime", 0); digester.addCallMethod("collection/flickrdoc/tags", "setTags", 0); digester.addCallMethod("collection/flickrdoc/geo", "setGeo", 0); digester.addCallMethod("collection/flickrdoc/event", "setEvent", 0); // call 'addDocument' method when the next 'collection/document' pattern is seen digester.addSetNext("collection/flickrdoc", "addDocument" ); // now that rules and actions are configured, start the parsing process CollectionIndexer abp = (CollectionIndexer) digester.parse(new File(args[0])); System.out.println( abp.toString() ); // optimize and close the index writer.optimize(); writer.close(); } /** * JavaBean class that holds properties of each Document entry. * It is important that this class be public and static, in order for * Digester to be able to instantiate it. */ public static class FlickrDoc { private String type; private String id; private String title; private String description; private String time; private String tags; private String geo; private String event; public void setType(String newType) { type = newType; } public String getType() { return type; } public void setId(String newId) { id = newId; } public String getId() { return id; } public void setTitle(String newTitle) { title = newTitle; } public String getTitle() { return title; } public void setDescription(String newDescription) { description = newDescription; } public String getDescription() { return description; } public void setTime(String newTime) { time = newTime; } public String getTime() { return time; } public void setTags(String newTags) { tags = newTags; } public String getTags() { return tags; } public void setGeo(String newGeo) { geo = newGeo; } public String getGeo() { return geo; } public void setEvent(String newEvent) { event = newEvent; } public String getEvent() { return event; } } }
Private
[
?
]
Run code
Submit