23 using System.Collections.Generic;
27 using Lucene.Net.Index;
28 using Lucene.Net.Support;
29 using Lucene.Net.Util;
32 namespace Lucene.Net.Analysis.
Query
50 HashMap<String,ISet<String>> stopWordsPerField =
new HashMap<String,ISet<String>>();
53 public const float defaultMaxDocFreqPercent = 0.4f;
54 private readonly
Version matchVersion;
63 this._delegate = _delegate;
64 SetOverridesTokenStreamMethod<QueryAutoStopWordAnalyzer>();
65 this.matchVersion = matchVersion;
78 return AddStopWords(reader, defaultMaxDocFreqPercent);
95 for (IEnumerator<String> iter = fieldNames.GetEnumerator(); iter.MoveNext();) {
96 String fieldName = iter.Current;
97 numStopWords += AddStopWords(reader, fieldName, maxDocFreq);
114 int numStopWords = 0;
116 for (IEnumerator<String> iter = fieldNames.GetEnumerator(); iter.MoveNext();) {
117 String fieldName = iter.Current;
118 numStopWords += AddStopWords(reader, fieldName, maxPercentDocs);
134 public int AddStopWords(
IndexReader reader, String fieldName,
float maxPercentDocs)
136 return AddStopWords(reader, fieldName, (
int) (reader.
NumDocs() * maxPercentDocs));
150 public int AddStopWords(
IndexReader reader, String fieldName,
int maxDocFreq)
152 var stopWords = Support.Compatibility.SetFactory.CreateHashSet<
string>();
156 while (term != null) {
157 if (term.
Field != internedFieldName) {
160 if (te.
DocFreq() > maxDocFreq) {
161 stopWords.Add(term.
Text);
168 stopWordsPerField.Add(fieldName, stopWords);
173 IDictionary<String,SavedStreams> streamMap = (IDictionary<String,SavedStreams>) PreviousTokenStream;
174 if (streamMap != null)
175 streamMap.Remove(fieldName);
177 return stopWords.Count;
183 result = _delegate.ReusableTokenStream(fieldName, reader);
184 }
catch (IOException) {
185 result = _delegate.TokenStream(fieldName, reader);
187 var stopWords = stopWordsPerField[fieldName];
188 if (stopWords != null) {
195 private class SavedStreams {
206 public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
208 if (overridesTokenStreamMethod) {
216 IDictionary<String, SavedStreams> streamMap = (IDictionary<String, SavedStreams>)PreviousTokenStream;
217 if (streamMap == null) {
218 streamMap =
new HashMap<String, SavedStreams>();
219 PreviousTokenStream = streamMap;
222 SavedStreams streams = streamMap[fieldName];
223 if (streams == null) {
225 streams =
new SavedStreams();
226 streamMap.Add(fieldName, streams);
227 streams.Wrapped = _delegate.ReusableTokenStream(fieldName, reader);
230 var stopWords = stopWordsPerField[fieldName];
231 if (stopWords != null)
233 streams.Wrapped, stopWords);
235 streams.WithStopFilter = streams.Wrapped;
242 TokenStream result = _delegate.ReusableTokenStream(fieldName, reader);
243 if (result == streams.Wrapped) {
245 streams.WithStopFilter.Reset();
251 streams.Wrapped = result;
252 var stopWords = stopWordsPerField[fieldName];
253 if (stopWords != null)
255 streams.Wrapped, stopWords);
257 streams.WithStopFilter = streams.Wrapped;
261 return streams.WithStopFilter;
273 var stopWords = stopWordsPerField[fieldName];
274 if (stopWords != null) {
275 result = stopWords.ToArray();
277 result =
new String[0];
288 List<Term> allStopWords =
new List<Term>();
289 foreach(var fieldName
in stopWordsPerField.Keys)
291 var stopWords = stopWordsPerField[fieldName];
292 foreach(var text
in stopWords) {
293 allStopWords.Add(
new Term(fieldName, text));
296 return allStopWords.ToArray();