lucene解析器分析

来源:转载


import java.io.IOException;import java.io.StringReader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.core.SimpleAnalyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.apache.lucene.util.Version;public class AnalyzerTest { public static void analysis(Analyzer analyzer, String txt) throws IOException { System.out.println("analyzer:" + analyzer.getClass()); TokenStream stream = analyzer.tokenStream("content", new StringReader(txt)); stream.reset(); // while (stream.incrementToken()) { CharTermAttribute attribute = stream.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAttribute = stream.getAttribute(OffsetAttribute.class); System.out.println("off:" + offsetAttribute.startOffset() + "----" + offsetAttribute.endOffset()); System.out.println("attr:" + attribute.toString()); } } public static void main(String[] args) throws IOException { Analyzer a = new StandardAnalyzer(Version.LUCENE_48); a = new SimpleAnalyzer(Version.LUCENE_48); // a = new CJKAnalyzer(Version.LUCENE_48); //a = new MyStopAnalyzer(); String txt = "this is a txt"; System.out.println("textLength:" + txt.length()); System.out.println("0-4:" + txt.substring(5, 7)); String zhTxt = "这是中文测试,hello 中文 The i am i am"; //analysis(a, txt); analysis(a, zhTxt); }}



分享给朋友:
您可能感兴趣的文章:
随机阅读: