Class CompoundWordTokenFilterBase
Base class for decomposition token filters.
You must specify the required Lucene.Net.Util.LuceneVersion compatibility when creating CompoundWordTokenFilterBase:
- As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 supplementary characters in strings and char arrays provided as compound word dictionaries.
- As of 4.4, CompoundWordTokenFilterBase doesn't update offsets.
Inheritance
System.Object
Lucene.Net.Util.AttributeSource
Lucene.Net.Analysis.TokenStream
Lucene.Net.Analysis.TokenFilter
CompoundWordTokenFilterBase
Implements
System.IDisposable
Inherited Members
Lucene.Net.Analysis.TokenFilter.m_input
Lucene.Net.Analysis.TokenFilter.End()
Lucene.Net.Analysis.TokenStream.Dispose()
Lucene.Net.Util.AttributeSource.GetAttributeFactory()
Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator()
Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator()
Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute)
Lucene.Net.Util.AttributeSource.AddAttribute<T>()
Lucene.Net.Util.AttributeSource.HasAttributes
Lucene.Net.Util.AttributeSource.HasAttribute<T>()
Lucene.Net.Util.AttributeSource.GetAttribute<T>()
Lucene.Net.Util.AttributeSource.ClearAttributes()
Lucene.Net.Util.AttributeSource.CaptureState()
Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State)
Lucene.Net.Util.AttributeSource.GetHashCode()
Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector)
Lucene.Net.Util.AttributeSource.CloneAttributes()
Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource)
Lucene.Net.Util.AttributeSource.ToString()
System.Object.Equals(System.Object, System.Object)
System.Object.GetType()
System.Object.MemberwiseClone()
System.Object.ReferenceEquals(System.Object, System.Object)
Namespace: Lucene.Net.Analysis.Compound
Assembly: Lucene.Net.Analysis.Common.dll
Syntax
public abstract class CompoundWordTokenFilterBase : TokenFilter, IDisposable
Constructors
| Improve this Doc View SourceCompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary)
Parameters
Type | Name | Description |
---|---|---|
Lucene.Net.Util.LuceneVersion | matchVersion | |
Lucene.Net.Analysis.TokenStream | input | |
CharArraySet | dictionary |
CompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet, Boolean)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch)
Parameters
Type | Name | Description |
---|---|---|
Lucene.Net.Util.LuceneVersion | matchVersion | |
Lucene.Net.Analysis.TokenStream | input | |
CharArraySet | dictionary | |
System.Boolean | onlyLongestMatch |
CompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet, Int32, Int32, Int32, Boolean)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
Parameters
Type | Name | Description |
---|---|---|
Lucene.Net.Util.LuceneVersion | matchVersion | |
Lucene.Net.Analysis.TokenStream | input | |
CharArraySet | dictionary | |
System.Int32 | minWordSize | |
System.Int32 | minSubwordSize | |
System.Int32 | maxSubwordSize | |
System.Boolean | onlyLongestMatch |
Fields
| Improve this Doc View SourceDEFAULT_MAX_SUBWORD_SIZE
The default for maximal length of subwords that get propagated to the output of this filter
Declaration
public const int DEFAULT_MAX_SUBWORD_SIZE = 15
Field Value
Type | Description |
---|---|
System.Int32 |
DEFAULT_MIN_SUBWORD_SIZE
The default for minimal length of subwords that get propagated to the output of this filter
Declaration
public const int DEFAULT_MIN_SUBWORD_SIZE = 2
Field Value
Type | Description |
---|---|
System.Int32 |
DEFAULT_MIN_WORD_SIZE
The default for minimal word length that gets decomposed
Declaration
public const int DEFAULT_MIN_WORD_SIZE = 5
Field Value
Type | Description |
---|---|
System.Int32 |
m_dictionary
Declaration
protected readonly CharArraySet m_dictionary
Field Value
Type | Description |
---|---|
CharArraySet |
m_matchVersion
Declaration
protected readonly LuceneVersion m_matchVersion
Field Value
Type | Description |
---|---|
Lucene.Net.Util.LuceneVersion |
m_maxSubwordSize
Declaration
protected readonly int m_maxSubwordSize
Field Value
Type | Description |
---|---|
System.Int32 |
m_minSubwordSize
Declaration
protected readonly int m_minSubwordSize
Field Value
Type | Description |
---|---|
System.Int32 |
m_minWordSize
Declaration
protected readonly int m_minWordSize
Field Value
Type | Description |
---|---|
System.Int32 |
m_offsetAtt
Declaration
protected readonly IOffsetAttribute m_offsetAtt
Field Value
Type | Description |
---|---|
Lucene.Net.Analysis.TokenAttributes.IOffsetAttribute |
m_onlyLongestMatch
Declaration
protected readonly bool m_onlyLongestMatch
Field Value
Type | Description |
---|---|
System.Boolean |
m_termAtt
Declaration
protected readonly ICharTermAttribute m_termAtt
Field Value
Type | Description |
---|---|
Lucene.Net.Analysis.TokenAttributes.ICharTermAttribute |
m_tokens
Declaration
protected readonly Queue<CompoundWordTokenFilterBase.CompoundToken> m_tokens
Field Value
Type | Description |
---|---|
System.Collections.Generic.Queue<CompoundWordTokenFilterBase.CompoundToken> |
Methods
| Improve this Doc View SourceDecompose()
Decomposes the current m_termAtt and places CompoundWordTokenFilterBase.CompoundToken instances in the m_tokens list. The original token may not be placed in the list, as it is automatically passed through this filter.
Declaration
protected abstract void Decompose()
IncrementToken()
Declaration
public sealed override bool IncrementToken()
Returns
Type | Description |
---|---|
System.Boolean |
Overrides
Lucene.Net.Analysis.TokenStream.IncrementToken()
|
Improve this Doc
View Source
Reset()
Declaration
public override void Reset()
Overrides
Lucene.Net.Analysis.TokenFilter.Reset()
Implements
System.IDisposable