Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
StandardFilter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Analysis.Tokenattributes;
20 using Token = Lucene.Net.Analysis.Token;
21 using TokenFilter = Lucene.Net.Analysis.TokenFilter;
22 using TokenStream = Lucene.Net.Analysis.TokenStream;
23 
24 namespace Lucene.Net.Analysis.Standard
25 {
26 
27  /// <summary>Normalizes tokens extracted with <see cref="StandardTokenizer" />. </summary>
28 
29  public sealed class StandardFilter:TokenFilter
30  {
31 
32 
33  /// <summary>Construct filtering <i>in</i>. </summary>
34  public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
35  {
36  termAtt = AddAttribute<ITermAttribute>();
37  typeAtt = AddAttribute<ITypeAttribute>();
38  }
39 
40  private static readonly System.String APOSTROPHE_TYPE;
41  private static readonly System.String ACRONYM_TYPE;
42 
43  // this filters uses attribute type
44  private ITypeAttribute typeAtt;
45  private ITermAttribute termAtt;
46 
47  /// <summary>Returns the next token in the stream, or null at EOS.
48  /// <p/>Removes <tt>'s</tt> from the end of words.
49  /// <p/>Removes dots from acronyms.
50  /// </summary>
51  public override bool IncrementToken()
52  {
53  if (!input.IncrementToken())
54  {
55  return false;
56  }
57 
58  char[] buffer = termAtt.TermBuffer();
59  int bufferLength = termAtt.TermLength();
60  System.String type = typeAtt.Type;
61 
62  if ((System.Object) type == (System.Object) APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
63  {
64  // Strip last 2 characters off
65  termAtt.SetTermLength(bufferLength - 2);
66  }
67  else if ((System.Object) type == (System.Object) ACRONYM_TYPE)
68  {
69  // remove dots
70  int upto = 0;
71  for (int i = 0; i < bufferLength; i++)
72  {
73  char c = buffer[i];
74  if (c != '.')
75  buffer[upto++] = c;
76  }
77  termAtt.SetTermLength(upto);
78  }
79 
80  return true;
81  }
82  static StandardFilter()
83  {
86  }
87  }
88 }