Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
StopFilter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using Lucene.Net.Analysis.Tokenattributes;
21 using Lucene.Net.Util;
22 using QueryParser = Lucene.Net.QueryParsers.QueryParser;
23 using Version = Lucene.Net.Util.Version;
24 
25 namespace Lucene.Net.Analysis
26 {
27 
29 
30  public sealed class StopFilter:TokenFilter
31  {
32  private readonly CharArraySet stopWords;
33  private bool enablePositionIncrements = false;
34 
35  private readonly ITermAttribute termAtt;
36  private readonly IPositionIncrementAttribute posIncrAtt;
37 
52  public StopFilter(bool enablePositionIncrements, TokenStream input, ISet<string> stopWords, bool ignoreCase)
53  : base(input)
54  {
55  if (stopWords is CharArraySet)
56  {
57  this.stopWords = (CharArraySet) stopWords;
58  }
59  else
60  {
61  this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
62  this.stopWords.AddAll(stopWords);
63  }
64  this.enablePositionIncrements = enablePositionIncrements;
65  termAtt = AddAttribute<ITermAttribute>();
66  posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
67  }
68 
76  public StopFilter(bool enablePositionIncrements, TokenStream @in, ISet<string> stopWords)
77  : this(enablePositionIncrements, @in, stopWords, false)
78  { }
79 
87  public static ISet<string> MakeStopSet(params string[] stopWords)
88  {
89  return MakeStopSet(stopWords, false);
90  }
91 
99  public static ISet<string> MakeStopSet(IList<object> stopWords)
100  {
101  return MakeStopSet(stopWords, false);
102  }
103 
108  public static ISet<string> MakeStopSet(string[] stopWords, bool ignoreCase)
109  {
110  var stopSet = new CharArraySet(stopWords.Length, ignoreCase);
111  stopSet.AddAll(stopWords);
112  return stopSet;
113  }
114 
119  public static ISet<string> MakeStopSet(IList<object> stopWords, bool ignoreCase)
120  {
121  var stopSet = new CharArraySet(stopWords.Count, ignoreCase);
122  foreach(var word in stopWords)
123  stopSet.Add(word.ToString());
124  return stopSet;
125  }
126 
128  public override bool IncrementToken()
129  {
130  // return the first non-stop word found
131  int skippedPositions = 0;
132  while (input.IncrementToken())
133  {
134  if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength()))
135  {
136  if (enablePositionIncrements)
137  {
138  posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
139  }
140  return true;
141  }
142  skippedPositions += posIncrAtt.PositionIncrement;
143  }
144  // reached EOS -- return false
145  return false;
146  }
147 
152  public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
153  {
154  return matchVersion.OnOrAfter(Version.LUCENE_29);
155  }
156 
172  public bool EnablePositionIncrements
173  {
174  get { return enablePositionIncrements; }
175  set { enablePositionIncrements = value; }
176  }
177  }
178 }