lucene 自定义解析器

来源:转载


import java.io.Reader;import java.util.Set;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.Tokenizer;import org.apache.lucene.analysis.core.LowerCaseTokenizer;import org.apache.lucene.analysis.core.StopAnalyzer;import org.apache.lucene.analysis.core.StopFilter;import org.apache.lucene.analysis.util.CharArraySet;import org.apache.lucene.util.Version;public class MyStopAnalyzer extends Analyzer { private Set stops; public MyStopAnalyzer() { stops = StopAnalyzer.ENGLISH_STOP_WORDS_SET; } public MyStopAnalyzer(String[] stopwords) { //lucene自动将数组置换为set stops = StopFilter.makeStopSet(Version.LUCENE_48, stopwords, true); //添加所有的停止词 stops.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET); } @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { //首先转换为小写 Tokenizer t = new LowerCaseTokenizer(Version.LUCENE_48, reader); //转换后去除顿词 return new TokenStreamComponents(t, new StopFilter(Version.LUCENE_48, t, (CharArraySet) stops)); }}



分享给朋友:
您可能感兴趣的文章:
随机阅读: