Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
GermanAnalyzer.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
25 using System.Collections;
26 using System.Linq;
27 using Lucene.Net.Analysis.Standard;
28 using Lucene.Net.Analysis;
29 using Version = Lucene.Net.Util.Version;
30 
31 namespace Lucene.Net.Analysis.De
32 {
40  public class GermanAnalyzer : Analyzer
41  {
45  //TODO: make this private in 3.1
46  private static readonly String[] GERMAN_STOP_WORDS =
47  {
48  "einer", "eine", "eines", "einem", "einen",
49  "der", "die", "das", "dass", "daß",
50  "du", "er", "sie", "es",
51  "was", "wer", "wie", "wir",
52  "und", "oder", "ohne", "mit",
53  "am", "im", "in", "aus", "auf",
54  "ist", "sein", "war", "wird",
55  "ihr", "ihre", "ihres",
56  "als", "für", "von",
57  "dich", "dir", "mich", "mir",
58  "mein", "kein",
59  "durch", "wegen"
60  };
61 
65  public static ISet<string> GetDefaultStopSet()
66  {
67  return DefaultSetHolder.DEFAULT_SET;
68  }
69 
70  private static class DefaultSetHolder
71  {
72  internal static readonly ISet<string> DEFAULT_SET = CharArraySet.UnmodifiableSet(new CharArraySet(
73  (IEnumerable<string>)GERMAN_STOP_WORDS,
74  false));
75  }
76 
80  //TODO: make this readonly in 3.1
81  private ISet<string> stopSet;
82 
86  //TODO: make this readonly in 3.1
87  private ISet<string> exclusionSet;
88 
89  private Version matchVersion;
90  private readonly bool _normalizeDin2;
91 
96  [Obsolete("Use GermanAnalyzer(Version) instead")]
97  public GermanAnalyzer()
98  : this(Version.LUCENE_CURRENT)
99  {
100  }
101 
107  public GermanAnalyzer(Version matchVersion)
108  : this(matchVersion, DefaultSetHolder.DEFAULT_SET)
109  { }
110 
119  public GermanAnalyzer(Version matchVersion, bool normalizeDin2)
120  : this(matchVersion, DefaultSetHolder.DEFAULT_SET, normalizeDin2)
121  { }
122 
128  public GermanAnalyzer(Version matchVersion, ISet<string> stopwords)
129  : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
130  {
131  }
132 
141  public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, bool normalizeDin2)
142  : this(matchVersion, stopwords, CharArraySet.EMPTY_SET, normalizeDin2)
143  {
144  }
145 
152  public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionSet)
153  : this(matchVersion, stopwords, stemExclusionSet, false)
154  { }
155 
156 
166  public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionSet, bool normalizeDin2)
167  {
168  stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
169  exclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionSet));
170  this.matchVersion = matchVersion;
171  _normalizeDin2 = normalizeDin2;
172  SetOverridesTokenStreamMethod<GermanAnalyzer>();
173  }
174 
179  [Obsolete("use GermanAnalyzer(Version, Set) instead")]
180  public GermanAnalyzer(Version matchVersion, params string[] stopwords)
181  : this(matchVersion, StopFilter.MakeStopSet(stopwords))
182  {
183  }
184 
188  [Obsolete("Use GermanAnalyzer(Version, ISet)")]
189  public GermanAnalyzer(Version matchVersion, IDictionary<string, string> stopwords)
190  : this(matchVersion, stopwords.Keys.ToArray())
191  {
192 
193  }
194 
198  [Obsolete("Use GermanAnalyzer(Version, ISet)")]
199  public GermanAnalyzer(Version matchVersion, FileInfo stopwords)
200  : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
201  {
202  }
203 
207  [Obsolete("Use GermanAnalyzer(Version, ISet, ISet) instead")]
208  public void SetStemExclusionTable(String[] exclusionlist)
209  {
210  exclusionSet = StopFilter.MakeStopSet(exclusionlist);
211  PreviousTokenStream = null;
212  }
213 
217  [Obsolete("Use GermanAnalyzer(Version, ISet, ISet) instead")]
218  public void SetStemExclusionTable(IDictionary<string, string> exclusionlist)
219  {
220  exclusionSet = Support.Compatibility.SetFactory.CreateHashSet(exclusionlist.Keys);
221  PreviousTokenStream = null;
222  }
223 
227  [Obsolete("Use GermanAnalyzer(Version, ISet, ISet) instead")]
228  public void SetStemExclusionTable(FileInfo exclusionlist)
229  {
230  exclusionSet = WordlistLoader.GetWordSet(exclusionlist);
231  PreviousTokenStream = null;
232  }
233 
240  public override TokenStream TokenStream(String fieldName, TextReader reader)
241  {
242  TokenStream result = new StandardTokenizer(matchVersion, reader);
243  result = new StandardFilter(result);
244  result = new LowerCaseFilter(result);
245  result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
246  result = new GermanStemFilter(result, exclusionSet, _normalizeDin2);
247  return result;
248  }
249  }
250 }