Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
DutchStemFilter.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
25 using System.Collections;
26 using Lucene.Net.Analysis.Tokenattributes;
27 using Lucene.Net.Support;
28 
29 namespace Lucene.Net.Analysis.Nl
30 {
31  /*
32  * A {@link TokenFilter} that stems Dutch words.
33  * <p>
34  * It supports a table of words that should
35  * not be stemmed at all. The stemmer used can be changed at runtime after the
36  * filter object is created (as long as it is a {@link DutchStemmer}).
37  * </p>
38  * NOTE: This stemmer does not implement the Snowball algorithm correctly,
39  * specifically doubled consonants. It is recommended that you consider using
40  * the "Dutch" stemmer in the snowball package instead. This stemmer will likely
41  * be deprecated in a future release.
42  */
43  public sealed class DutchStemFilter : TokenFilter
44  {
45  /*
46  * The actual token in the input stream.
47  */
48  private DutchStemmer stemmer = null;
49  private ISet<string> exclusions = null;
50 
51  private ITermAttribute termAtt;
52 
54  : base(_in)
55  {
56  stemmer = new DutchStemmer();
57  termAtt = AddAttribute<ITermAttribute>();
58  }
59 
60  /*
61  * Builds a DutchStemFilter that uses an exclusion table.
62  */
63  public DutchStemFilter(TokenStream _in, ISet<string> exclusiontable)
64  : this(_in)
65  {
66  exclusions = exclusiontable;
67  }
68 
69  /*
70  * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
71  */
72  public DutchStemFilter(TokenStream _in, ISet<string> exclusiontable, IDictionary<string, string> stemdictionary)
73  : this(_in, exclusiontable)
74  {
75  stemmer.SetStemDictionary(stemdictionary);
76  }
77 
78  /*
79  * Returns the next token in the stream, or null at EOS
80  */
81  public override bool IncrementToken()
82  {
83  if (input.IncrementToken())
84  {
85  String term = termAtt.Term;
86 
87  // Check the exclusion table.
88  if (exclusions == null || !exclusions.Contains(term))
89  {
90  String s = stemmer.Stem(term);
91  // If not stemmed, don't waste the time adjusting the token.
92  if ((s != null) && !s.Equals(term))
93  termAtt.SetTermBuffer(s);
94  }
95  return true;
96  }
97  else
98  {
99  return false;
100  }
101  }
102 
103  /*
104  * Set a alternative/custom {@link DutchStemmer} for this filter.
105  */
106  public void SetStemmer(DutchStemmer stemmer)
107  {
108  if (stemmer != null)
109  {
110  this.stemmer = stemmer;
111  }
112  }
113 
114  /*
115  * Set an alternative exclusion list for this filter.
116  */
117  public void SetExclusionTable(ISet<string> exclusiontable)
118  {
119  exclusions = exclusiontable;
120  }
121 
122  /*
123  * Set dictionary for stemming, this dictionary overrules the algorithm,
124  * so you can correct for a particular unwanted word-stem pair.
125  */
126  public void SetStemDictionary(IDictionary<string, string> dict)
127  {
128  if (stemmer != null)
129  stemmer.SetStemDictionary(dict);
130  }
131  }
132 }