Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
SnowballAnalyzer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using Lucene.Net.Analysis;
22 using Lucene.Net.Analysis.Standard;
23 using SF.Snowball.Ext;
24 using Version = Lucene.Net.Util.Version;
25 
26 namespace Lucene.Net.Analysis.Snowball
27 {
28 
29  /// <summary>Filters <see cref="StandardTokenizer"/> with <see cref="StandardFilter"/>, {@link
30  /// LowerCaseFilter}, <see cref="StopFilter"/> and <see cref="SnowballFilter"/>.
31  ///
32  /// Available stemmers are listed in <see cref="SF.Snowball.Ext"/>. The name of a
33  /// stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
34  /// <see cref="EnglishStemmer"/> is named "English".
35  ///
36  /// <p><b>NOTE:</b> This class uses the same <see cref="Version"/>
37  /// dependent settings as <see cref="StandardAnalyzer"/></p>
38  /// </summary>
39  public class SnowballAnalyzer : Analyzer
40  {
41  private System.String name;
42  private ISet<string> stopSet;
43  private readonly Version matchVersion;
44 
45  /// <summary>Builds the named analyzer with no stop words. </summary>
46  public SnowballAnalyzer(Version matchVersion, System.String name)
47  {
48  this.name = name;
49  SetOverridesTokenStreamMethod<SnowballAnalyzer>();
50  this.matchVersion = matchVersion;
51  }
52 
53  /// <summary>Builds the named analyzer with the given stop words. </summary>
54  [Obsolete("Use SnowballAnalyzer(Version, string, ISet) instead.")]
55  public SnowballAnalyzer(Version matchVersion, System.String name, System.String[] stopWords)
56  : this(matchVersion, name)
57  {
58  stopSet = StopFilter.MakeStopSet(stopWords);
59  }
60 
61  /// <summary>
62  /// Builds the named analyzer with the given stop words.
63  /// </summary>
64  public SnowballAnalyzer(Version matchVersion, string name, ISet<string> stopWords)
65  : this(matchVersion, name)
66  {
67  stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopWords));
68  }
69 
70  /// <summary>Constructs a <see cref="StandardTokenizer"/> filtered by a {@link
71  /// StandardFilter}, a <see cref="LowerCaseFilter"/> and a <see cref="StopFilter"/>.
72  /// </summary>
73  public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
74  {
75  TokenStream result = new StandardTokenizer(matchVersion, reader);
76  result = new StandardFilter(result);
77  result = new LowerCaseFilter(result);
78  if (stopSet != null)
80  result, stopSet);
81  result = new SnowballFilter(result, name);
82  return result;
83  }
84 
85  private class SavedStreams
86  {
87  internal Tokenizer source;
88  internal TokenStream result;
89  };
90 
91  /* Returns a (possibly reused) {@link StandardTokenizer} filtered by a
92  * {@link StandardFilter}, a {@link LowerCaseFilter},
93  * a {@link StopFilter}, and a {@link SnowballFilter} */
94 
95  public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
96  {
97  if (overridesTokenStreamMethod)
98  {
99  // LUCENE-1678: force fallback to tokenStream() if we
100  // have been subclassed and that subclass overrides
101  // tokenStream but not reusableTokenStream
102  return TokenStream(fieldName, reader);
103  }
104 
105  SavedStreams streams = (SavedStreams)PreviousTokenStream;
106  if (streams == null)
107  {
108  streams = new SavedStreams();
109  streams.source = new StandardTokenizer(matchVersion, reader);
110  streams.result = new StandardFilter(streams.source);
111  streams.result = new LowerCaseFilter(streams.result);
112  if (stopSet != null)
113  streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
114  streams.result, stopSet);
115  streams.result = new SnowballFilter(streams.result, name);
116  PreviousTokenStream = streams;
117  }
118  else
119  {
120  streams.source.Reset(reader);
121  }
122  return streams.result;
123  }
124  }
125 }