Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
ArabicLetterTokenizer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System.IO;
19 using System.Collections;
20 
21 using Lucene.Net.Analysis;
22 using Lucene.Net.Util;
23 
24 namespace Lucene.Net.Analysis.AR
25 {
26 
27  /*
28  * Tokenizer that breaks text into runs of letters and diacritics.
29  * <p>
30  * The problem with the standard Letter tokenizer is that it fails on diacritics.
31  * Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
32  * </p>
33  *
34  */
36  {
37 
38  public ArabicLetterTokenizer(TextReader @in): base(@in)
39  {
40 
41  }
42 
43  public ArabicLetterTokenizer(AttributeSource source, TextReader @in) : base(source, @in)
44  {
45 
46  }
47 
48  public ArabicLetterTokenizer(AttributeFactory factory, TextReader @in) : base(factory, @in)
49  {
50 
51  }
52 
53  /*
54  * Allows for Letter category or NonspacingMark category
55  * <see cref="LetterTokenizer.IsTokenChar(char)"/>
56  */
57  protected override bool IsTokenChar(char c)
58  {
59  return base.IsTokenChar(c) || char.GetUnicodeCategory(c)==System.Globalization.UnicodeCategory.NonSpacingMark ;
60  }
61 
62  }
63 }