Class IndicTokenizer
Simple Tokenizer for text in Indian Languages.
Implements
Inherited Members
TokenStream.Dispose()
AttributeSource.GetAttributeFactory()
AttributeSource.GetAttributeClassesEnumerator()
AttributeSource.GetAttributeImplsEnumerator()
AttributeSource.AddAttributeImpl(Attribute)
AttributeSource.AddAttribute<T>()
AttributeSource.HasAttributes
AttributeSource.HasAttribute<T>()
AttributeSource.GetAttribute<T>()
AttributeSource.ClearAttributes()
AttributeSource.CaptureState()
AttributeSource.RestoreState(AttributeSource.State)
AttributeSource.GetHashCode()
AttributeSource.ReflectWith(IAttributeReflector)
AttributeSource.CloneAttributes()
AttributeSource.CopyTo(AttributeSource)
AttributeSource.ToString()
Namespace: Lucene.Net.Analysis.In
Assembly: Lucene.Net.Analysis.Common.dll
Syntax
[Obsolete("(3.6) Use StandardTokenizer instead.")]
public sealed class IndicTokenizer : CharTokenizer, IDisposable
Constructors
IndicTokenizer(LuceneVersion, AttributeFactory, TextReader)
Simple Tokenizer for text in Indian Languages.
Declaration
public IndicTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader input)
Parameters
Type | Name | Description |
---|---|---|
LuceneVersion | matchVersion | |
AttributeSource.AttributeFactory | factory | |
TextReader | input |
IndicTokenizer(LuceneVersion, TextReader)
Simple Tokenizer for text in Indian Languages.
Declaration
public IndicTokenizer(LuceneVersion matchVersion, TextReader input)
Parameters
Type | Name | Description |
---|---|---|
LuceneVersion | matchVersion | |
TextReader | input |
Methods
IsTokenChar(int)
Returns true iff a codepoint should be included in a token. This tokenizer generates as tokens adjacent sequences of codepoints which satisfy this predicate. Codepoints for which this is false are used to define token boundaries and are not included in tokens.
Declaration
protected override bool IsTokenChar(int c)
Parameters
Type | Name | Description |
---|---|---|
int | c |
Returns
Type | Description |
---|---|
bool |