lucene 4.6.0 初学,

来源:转载


lucene初学,

创建索引,查询等

需要lucene-4.6.0的jar包,,我直接导入的全部的!


lucene-4.6.0.jar包

Util 自己写的

package com.test.lucene;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.io.Reader;import java.io.StringReader;import java.sql.Connection;import java.sql.DriverManager;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.SQLException;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class LuceneUtils { /** * 获取IndexWriter来生成索引 * * @param indexpath 索引文件存放的目录文件夹路劲 * @param version Version.XXX版本 * @param openMode OpenMode.XXX 索引创建方式,新建,追加等等 * @return IndexWriter */ public static IndexWriter createIndexWriter(String indexpath, Version version, OpenMode openMode) { IndexWriter writer = null; try { // 索引文件存放的目录文件夹 File indexfile = new File(indexpath); // lucene是将一句句话,一段话Field,分成一个个词Term进行索引搜索的。 Analyzer analyzer = new StandardAnalyzer(version); // 向E://aa保存建立的索引Index内容 Directory dir = FSDirectory.open(indexfile); IndexWriterConfig iwc = new IndexWriterConfig(version, analyzer); // 即创建新索引文件,OpenMode.CREATE_OR_APPEND 表示创建或追加到已有索引文件 iwc.setOpenMode(openMode); // 用到IndexWriter类,这里需要传入的参数为:(索引目录文件夹,配置) writer = new IndexWriter(dir, iwc); } catch (IOException e) { e.printStackTrace(); } return writer; } /** * 创建索引 * * @param dirpath * 被索引的目录文件夹 * @param indexpath * 索引文件存放的目录文件夹 * @param openMode * 索引创建方式OpenMode.CREATE_OR_APPEND 表示创建或追加到已有索引文件 * @return boolean 创建成功或失败 */ public static boolean createDocumentIndex(String dirpath, String indexpath, OpenMode openMode) { boolean bool = false; // 被索引的目录文件夹 File dirfile = new File(dirpath); try { IndexWriter writer = createIndexWriter(indexpath, Version.LUCENE_46, openMode); File[] txtfiles = dirfile.listFiles(); long startTime = new Date().getTime(); for (int i = 0; i < txtfiles.length; i++) { if (txtfiles[i].isFile() && txtfiles[i].getName().endsWith(".txt")) { System.out.println("文件" + txtfiles[i].getCanonicalPath() + "正在索引中。。。"); // Read将txt内容存进内存 Reader read = new FileReader(txtfiles[i]); // 创建Document的实例 Document doc = new Document(); // 将field存进索引的Document // Document添加读取的文章内容(缓存在内存中的文章内容read) doc.add(new TextField("content", read)); // Document添加文章对应路径信息等 doc.add(new TextField("path", txtfiles[i].getAbsolutePath(), Store.YES)); // index加Document,索引创建成功 writer.addDocument(doc); } } // 查看IndexWriter里面有多少个索引 System.out.println("numDocs" + writer.numDocs()); // 索引优化optimize(),合并磁盘上的索引文件,以便减少文件的数量,从而也减少搜索索引的时间 // 3.5以后已过时 writer.optimize(); 以下替代,使用代价较高 writer.forceMerge(1); // 注意关闭IndexWriter,立即将索引文件写入到目录磁盘中,生成索引文件 writer.close(); long endTime = new Date().getTime(); System.out.println("共花了" + (endTime - startTime) + "毫秒将文档增加到索引中" + indexpath); bool = true; } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return bool; } /** * 查询 * * @param queryStr * 要查询的文字 * @param searcher * 查询器 * @return 查询结果 */ public static TopDocs queryString(String queryStr, IndexSearcher searcher) { TopDocs tDocs = null; try { if (searcher == null) { System.out.println("索引目录不存在"); return tDocs; } Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46); QueryParser qp = new QueryParser(Version.LUCENE_46, "content", analyzer); // 查询命令 Query query = qp.parse(queryStr); // 结果保存在Hits中 tDocs = searcher.search(query, null, 10000); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return tDocs; } /** * 创建查询器 * * @param indexpath * 索引所在路劲 * @return * @throws IOException */ public static IndexSearcher getSearcher(String indexpath) throws IOException { // 索引目录 File indexDir = new File(indexpath); if (!indexDir.exists()) { System.out.println("索引目录不存在"); return null; } // 创建directory,Index的映射地址,相当于电话本 FSDirectory directory = FSDirectory.open(indexDir); IndexReader irIndexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(irIndexReader); return searcher; } /** * 高亮设置 * * @param query * @param doc * @param field * @return */ public static String toHighlighter(Query query, Document doc, String field) { try { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46); SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( "<font color=/"red/">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field))); String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); return highlighterStr == null ? doc.get(field) : highlighterStr; } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } return null; } public void sqlTest() { String indexpath = ""; String sql = ""; String url = "jdbc:mysql://localhost:3306/myuser"; try { Class.forName("com.mysql.jdbc.Driver"); // 连接数据库 Connection conn = DriverManager.getConnection(url,"root" ,"root" ); PreparedStatement stmt = conn.prepareStatement(sql); ResultSet rs = stmt.executeQuery(); IndexWriter indexWriter = createIndexWriter(indexpath, Version.LUCENE_46, OpenMode.CREATE);// new IndexWriter(dir, new StandardAnalyzer(// Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); while (rs.next()) { System.out.println(rs.getString("name")); Document doc = new Document(); doc.add(new TextField("name", (String)rs.getString("name"), Store.YES)); indexWriter.addDocument(doc); } } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }}

测试类

package com.test.lucene;import java.io.IOException;import java.util.List;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexableField;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.TopDocs;public class LuceneTest { public static void main(String[] args ) { String dirpath = "E://aaDocumentTest"; String indexpath = "E://aa"; String queryStr = "怎样"; boolean result = true; result = LuceneUtils.createDocumentIndex(dirpath, indexpath,OpenMode.CREATE); IndexSearcher searcher; try { if(result){ searcher = LuceneUtils.getSearcher(indexpath); TopDocs tDocs = LuceneUtils.queryString(queryStr, searcher); System.out.println("一共索引出:" + tDocs.totalHits + "个文件!"); List<IndexableField> fList = null; for (int j = 0; j < tDocs.totalHits; j++) { Document doc = searcher.doc(tDocs.scoreDocs[j].doc); System.out.println("File:" + doc.toString()); System.out.println("File:" + doc.getField("path")); fList = doc.getFields(); } for (IndexableField indexableField : fList) { System.out.println(indexableField.name()); System.out.println(indexableField.stringValue()); } } } catch (IOException e) { e.printStackTrace(); } }}




分享给朋友:
您可能感兴趣的文章:
随机阅读: