Useful links to start with Lucene:
http://www.lucenetutorial.com/lucene-in-5-minutes.html
http://stackoverflow.com/questions/468405/how-to-incorporate-multiple-fields-in-queryparser
Here an example how to index and search text:
package ch.prait.lucene;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class PrettyLucene {
public void index(Directory index, List<String> stringsToIndex, String teller) throws Exception {
// create an analyzer
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
// Create the index writer
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
IndexWriter w = new IndexWriter(index, config);
int idCounter = 0;
for (String stringToIndex : stringsToIndex) {
// create a document and add three fields to it whereas 2 are analyzed and one is just
// information stored
Document myDocument = new Document();
myDocument.add(new Field("myString", stringToIndex, Field.Store.YES, Field.Index.ANALYZED));
myDocument.add(new Field("id", String.valueOf(idCounter), Field.Store.YES, Field.Index.ANALYZED));
myDocument.add(new Field("teller", teller, Field.Store.YES, Field.Index.NO));
w.addDocument(myDocument);
}
// close all handles
w.close();
}
public void searchIndex(Directory index, String searchTerm) throws Exception {
// create an analyzer
KeywordAnalyzer analyzer = new KeywordAnalyzer();
// create a topScore Collection and display the results
int hitsPerPage = 10;
IndexReader reader = IndexReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
QueryParser parser = new QueryParser(Version.LUCENE_36, "myString", analyzer);
Query q = parser.parse(searchTerm);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
System.out.println("Found " + hits.length + " hits.");
for (int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
System.out.println((i + 1) + ". " + "Line " + d.get("id") + " told by " + d.get("teller") + ": "
+ d.get("myString"));
}
reader.close();
}
public static void main(String[] args) {
PrettyLucene prettyLucene = new PrettyLucene();
try {
// makes a directory luceneIndex where you start the program. In this folder
// lucene will put the index files
Directory index = new SimpleFSDirectory(new File("luceneIndex"));
List<String> marcellus = new ArrayList<String>();
marcellus.add("Horatio says 'tis but our fantasy,");
marcellus.add("And will not let belief take hold of him");
marcellus.add("Touching this dreaded sight, twice seen of us:");
marcellus.add("Therefore I have entreated him along");
marcellus.add("With us to watch the minutes of this night;");
marcellus.add("That if again this apparition come,");
marcellus.add("He may approve our eyes and speak to it.");
prettyLucene.index(index, marcellus, "Marcellus");
List<String> horatio = new ArrayList<String>();
horatio.add("What art thou that usurp'st this time of night,");
horatio.add("Together with that fair and warlike form");
horatio.add("In which the majesty of buried Denmark");
horatio.add("Did sometimes march? by heaven I charge thee, speak!");
prettyLucene.index(index, horatio, "Horatio");
prettyLucene.searchIndex(index, "speak*");
index.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
Recent Comments