Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
LetterTokenizer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using AttributeSource = Lucene.Net.Util.AttributeSource;
19 
20 namespace Lucene.Net.Analysis
21 {
22 
23  /// <summary>A LetterTokenizer is a tokenizer that divides text at non-letters. That's
24  /// to say, it defines tokens as maximal strings of adjacent letters, as defined
25  /// by java.lang.Character.isLetter() predicate.
26  /// Note: this does a decent job for most European languages, but does a terrible
27  /// job for some Asian languages, where words are not separated by spaces.
28  /// </summary>
29 
31  {
32  /// <summary>Construct a new LetterTokenizer. </summary>
33  public LetterTokenizer(System.IO.TextReader @in):base(@in)
34  {
35  }
36 
37  /// <summary>Construct a new LetterTokenizer using a given <see cref="AttributeSource" />. </summary>
38  public LetterTokenizer(AttributeSource source, System.IO.TextReader @in)
39  : base(source, @in)
40  {
41  }
42 
43  /// <summary>Construct a new LetterTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
44  public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in)
45  : base(factory, @in)
46  {
47  }
48 
49  /// <summary>Collects only characters which satisfy
50  /// <see cref="char.IsLetter(char)" />.
51  /// </summary>
52  protected internal override bool IsTokenChar(char c)
53  {
54  return System.Char.IsLetter(c);
55  }
56  }
57 }