Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
SnowballAnalyzer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using Lucene.Net.Analysis;
22 using Lucene.Net.Analysis.Standard;
23 using SF.Snowball.Ext;
24 using Version = Lucene.Net.Util.Version;
25 
26 namespace Lucene.Net.Analysis.Snowball
27 {
28 
39  public class SnowballAnalyzer : Analyzer
40  {
41  private System.String name;
42  private ISet<string> stopSet;
43  private readonly Version matchVersion;
44 
46  public SnowballAnalyzer(Version matchVersion, System.String name)
47  {
48  this.name = name;
49  SetOverridesTokenStreamMethod<SnowballAnalyzer>();
50  this.matchVersion = matchVersion;
51  }
52 
54  [Obsolete("Use SnowballAnalyzer(Version, string, ISet) instead.")]
55  public SnowballAnalyzer(Version matchVersion, System.String name, System.String[] stopWords)
56  : this(matchVersion, name)
57  {
58  stopSet = StopFilter.MakeStopSet(stopWords);
59  }
60 
64  public SnowballAnalyzer(Version matchVersion, string name, ISet<string> stopWords)
65  : this(matchVersion, name)
66  {
67  stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopWords));
68  }
69 
73  public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
74  {
75  TokenStream result = new StandardTokenizer(matchVersion, reader);
76  result = new StandardFilter(result);
77  result = new LowerCaseFilter(result);
78  if (stopSet != null)
80  result, stopSet);
81  result = new SnowballFilter(result, name);
82  return result;
83  }
84 
85  private class SavedStreams
86  {
87  internal Tokenizer source;
88  internal TokenStream result;
89  };
90 
91  /* Returns a (possibly reused) {@link StandardTokenizer} filtered by a
92  * {@link StandardFilter}, a {@link LowerCaseFilter},
93  * a {@link StopFilter}, and a {@link SnowballFilter} */
94 
95  public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
96  {
97  if (overridesTokenStreamMethod)
98  {
99  // LUCENE-1678: force fallback to tokenStream() if we
100  // have been subclassed and that subclass overrides
101  // tokenStream but not reusableTokenStream
102  return TokenStream(fieldName, reader);
103  }
104 
105  SavedStreams streams = (SavedStreams)PreviousTokenStream;
106  if (streams == null)
107  {
108  streams = new SavedStreams();
109  streams.source = new StandardTokenizer(matchVersion, reader);
110  streams.result = new StandardFilter(streams.source);
111  streams.result = new LowerCaseFilter(streams.result);
112  if (stopSet != null)
113  streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
114  streams.result, stopSet);
115  streams.result = new SnowballFilter(streams.result, name);
116  PreviousTokenStream = streams;
117  }
118  else
119  {
120  streams.source.Reset(reader);
121  }
122  return streams.result;
123  }
124  }
125 }