Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
GermanStemFilter.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
25 using System.Collections;
26 using Lucene.Net.Analysis.Tokenattributes;
27 
28 namespace Lucene.Net.Analysis.De
29 {
30  /// <summary>
31  /// A filter that stems German words. It supports a table of words that should
32  /// not be stemmed at all. The stemmer used can be changed at runtime after the
33  /// filter object is created (as long as it is a GermanStemmer).
34  /// </summary>
35  public sealed class GermanStemFilter : TokenFilter
36  {
37  /// <summary>
38  /// The actual token in the input stream.
39  /// </summary>
40  private GermanStemmer stemmer = null;
41  private ISet<string> exclusionSet = null;
42 
43  private ITermAttribute termAtt;
44 
46  : this(_in, false)
47  { }
48 
49  public GermanStemFilter(TokenStream _in, bool useDin2Stemmer)
50  : this(_in, null, useDin2Stemmer)
51  { }
52 
53  /// <summary>
54  /// Builds a GermanStemFilter that uses an exclusiontable.
55  /// </summary>
56  /// <param name="_in"></param>
57  /// <param name="exclusiontable"></param>
58  public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable)
59  : this(_in, exclusiontable, false)
60  { }
61 
62  /// <summary>
63  /// Builds a GermanStemFilter that uses an exclusiontable.
64  /// </summary>
65  /// <param name="_in"></param>
66  /// <param name="exclusiontable"></param>
67  /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This
68  /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
69  /// respectively, before the DIN1 stemmer is invoked.</param>
70  public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool normalizeDin2)
71  : base(_in)
72  {
73  exclusionSet = exclusiontable;
74  stemmer = normalizeDin2 ? new GermanDIN2Stemmer() : new GermanStemmer();
75  termAtt = AddAttribute<ITermAttribute>();
76  }
77 
78  /// <returns>
79  /// Returns true for next token in the stream, or false at EOS
80  /// </returns>
81  public override bool IncrementToken()
82  {
83  if (input.IncrementToken())
84  {
85  String term = termAtt.Term;
86  // Check the exclusion table.
87  if (exclusionSet == null || !exclusionSet.Contains(term))
88  {
89  String s = stemmer.Stem(term);
90  // If not stemmed, don't waste the time adjusting the token.
91  if ((s != null) && !s.Equals(term))
92  termAtt.SetTermBuffer(s);
93  }
94  return true;
95  }
96  else
97  {
98  return false;
99  }
100  }
101 
102  /// <summary>
103  /// Set a alternative/custom GermanStemmer for this filter.
104  /// </summary>
105  /// <param name="stemmer"></param>
106  public void SetStemmer(GermanStemmer stemmer)
107  {
108  if (stemmer != null)
109  {
110  this.stemmer = stemmer;
111  }
112  }
113 
114  /// <summary>
115  /// Set an alternative exclusion list for this filter.
116  /// </summary>
117  /// <param name="exclusiontable"></param>
118  public void SetExclusionTable(ISet<string> exclusiontable)
119  {
120  exclusionSet = exclusiontable;
121  }
122  }
123 }