Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
FrenchStemFilter.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
25 using System.Text;
26 using System.Collections;
27 
28 using Lucene.Net.Analysis;
29 using Lucene.Net.Analysis.Tokenattributes;
30 
31 namespace Lucene.Net.Analysis.Fr
32 {
33  /*
34  * A {@link TokenFilter} that stems french words.
35  * <p>
36  * It supports a table of words that should
37  * not be stemmed at all. The used stemmer can be changed at runtime after the
38  * filter object is created (as long as it is a {@link FrenchStemmer}).
39  * </p>
40  * NOTE: This stemmer does not implement the Snowball algorithm correctly,
41  * especially involving case problems. It is recommended that you consider using
42  * the "French" stemmer in the snowball package instead. This stemmer will likely
43  * be deprecated in a future release.
44  */
45  public sealed class FrenchStemFilter : TokenFilter
46  {
47 
48  /*
49  * The actual token in the input stream.
50  */
51  private FrenchStemmer stemmer = null;
52  private ISet<string> exclusions = null;
53 
54  private ITermAttribute termAtt;
55 
57  : base(_in)
58  {
59 
60  stemmer = new FrenchStemmer();
61  termAtt = AddAttribute<ITermAttribute>();
62  }
63 
64 
65  public FrenchStemFilter(TokenStream _in, ISet<string> exclusiontable)
66  : this(_in)
67  {
68  exclusions = exclusiontable;
69  }
70 
71  /*
72  * @return Returns true for the next token in the stream, or false at EOS
73  */
74  public override bool IncrementToken()
75  {
76  if (input.IncrementToken())
77  {
78  String term = termAtt.Term;
79 
80  // Check the exclusion table
81  if (exclusions == null || !exclusions.Contains(term))
82  {
83  String s = stemmer.Stem(term);
84  // If not stemmed, don't waste the time adjusting the token.
85  if ((s != null) && !s.Equals(term))
86  termAtt.SetTermBuffer(s);
87  }
88  return true;
89  }
90  else
91  {
92  return false;
93  }
94  }
95  /*
96  * Set a alternative/custom {@link FrenchStemmer} for this filter.
97  */
98  public void SetStemmer(FrenchStemmer stemmer)
99  {
100  if (stemmer != null)
101  {
102  this.stemmer = stemmer;
103  }
104  }
105  /*
106  * Set an alternative exclusion list for this filter.
107  */
108  public void SetExclusionTable(IDictionary<string, string> exclusiontable)
109  {
110  exclusions = Support.Compatibility.SetFactory.CreateHashSet(exclusiontable.Keys);
111  }
112  }
113 }