Class CompoundWordTokenFilterBase
Base class for decomposition token filters.
You must specify the required Lucene.Net.Util.LuceneVersion compatibility when creating CompoundWordTokenFilterBase:
- As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 supplementary characters in strings and char arrays provided as compound word dictionaries.
- As of 4.4, CompoundWordTokenFilterBase doesn't update offsets.
Inheritance
System.Object
Lucene.Net.Util.AttributeSource
Lucene.Net.Analysis.TokenStream
Lucene.Net.Analysis.TokenFilter
CompoundWordTokenFilterBase
Implements
System.IDisposable
Inherited Members
Lucene.Net.Analysis.TokenFilter.m_input
Lucene.Net.Analysis.TokenFilter.End()
Lucene.Net.Analysis.TokenStream.Dispose()
Lucene.Net.Util.AttributeSource.GetAttributeFactory()
Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator()
Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator()
Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute)
Lucene.Net.Util.AttributeSource.AddAttribute<T>()
Lucene.Net.Util.AttributeSource.HasAttributes
Lucene.Net.Util.AttributeSource.HasAttribute<T>()
Lucene.Net.Util.AttributeSource.GetAttribute<T>()
Lucene.Net.Util.AttributeSource.ClearAttributes()
Lucene.Net.Util.AttributeSource.CaptureState()
Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State)
Lucene.Net.Util.AttributeSource.GetHashCode()
Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector)
Lucene.Net.Util.AttributeSource.CloneAttributes()
Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource)
Lucene.Net.Util.AttributeSource.ToString()
System.Object.Equals(System.Object, System.Object)
System.Object.GetType()
System.Object.MemberwiseClone()
System.Object.ReferenceEquals(System.Object, System.Object)
Namespace: Lucene.Net.Analysis.Compound
Assembly: Lucene.Net.Analysis.Common.dll
Syntax
public abstract class CompoundWordTokenFilterBase : TokenFilter, IDisposable
Constructors
| Improve this Doc View SourceCompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary)
Parameters
| Type | Name | Description |
|---|---|---|
| Lucene.Net.Util.LuceneVersion | matchVersion | |
| Lucene.Net.Analysis.TokenStream | input | |
| CharArraySet | dictionary |
CompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet, Boolean)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch)
Parameters
| Type | Name | Description |
|---|---|---|
| Lucene.Net.Util.LuceneVersion | matchVersion | |
| Lucene.Net.Analysis.TokenStream | input | |
| CharArraySet | dictionary | |
| System.Boolean | onlyLongestMatch |
CompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet, Int32, Int32, Int32, Boolean)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
Parameters
| Type | Name | Description |
|---|---|---|
| Lucene.Net.Util.LuceneVersion | matchVersion | |
| Lucene.Net.Analysis.TokenStream | input | |
| CharArraySet | dictionary | |
| System.Int32 | minWordSize | |
| System.Int32 | minSubwordSize | |
| System.Int32 | maxSubwordSize | |
| System.Boolean | onlyLongestMatch |
Fields
| Improve this Doc View SourceDEFAULT_MAX_SUBWORD_SIZE
The default for maximal length of subwords that get propagated to the output of this filter
Declaration
public const int DEFAULT_MAX_SUBWORD_SIZE = 15
Field Value
| Type | Description |
|---|---|
| System.Int32 |
DEFAULT_MIN_SUBWORD_SIZE
The default for minimal length of subwords that get propagated to the output of this filter
Declaration
public const int DEFAULT_MIN_SUBWORD_SIZE = 2
Field Value
| Type | Description |
|---|---|
| System.Int32 |
DEFAULT_MIN_WORD_SIZE
The default for minimal word length that gets decomposed
Declaration
public const int DEFAULT_MIN_WORD_SIZE = 5
Field Value
| Type | Description |
|---|---|
| System.Int32 |
m_dictionary
Declaration
protected readonly CharArraySet m_dictionary
Field Value
| Type | Description |
|---|---|
| CharArraySet |
m_matchVersion
Declaration
protected readonly LuceneVersion m_matchVersion
Field Value
| Type | Description |
|---|---|
| Lucene.Net.Util.LuceneVersion |
m_maxSubwordSize
Declaration
protected readonly int m_maxSubwordSize
Field Value
| Type | Description |
|---|---|
| System.Int32 |
m_minSubwordSize
Declaration
protected readonly int m_minSubwordSize
Field Value
| Type | Description |
|---|---|
| System.Int32 |
m_minWordSize
Declaration
protected readonly int m_minWordSize
Field Value
| Type | Description |
|---|---|
| System.Int32 |
m_offsetAtt
Declaration
protected readonly IOffsetAttribute m_offsetAtt
Field Value
| Type | Description |
|---|---|
| Lucene.Net.Analysis.TokenAttributes.IOffsetAttribute |
m_onlyLongestMatch
Declaration
protected readonly bool m_onlyLongestMatch
Field Value
| Type | Description |
|---|---|
| System.Boolean |
m_termAtt
Declaration
protected readonly ICharTermAttribute m_termAtt
Field Value
| Type | Description |
|---|---|
| Lucene.Net.Analysis.TokenAttributes.ICharTermAttribute |
m_tokens
Declaration
protected readonly Queue<CompoundWordTokenFilterBase.CompoundToken> m_tokens
Field Value
| Type | Description |
|---|---|
| System.Collections.Generic.Queue<CompoundWordTokenFilterBase.CompoundToken> |
Methods
| Improve this Doc View SourceDecompose()
Decomposes the current m_termAtt and places CompoundWordTokenFilterBase.CompoundToken instances in the m_tokens list. The original token may not be placed in the list, as it is automatically passed through this filter.
Declaration
protected abstract void Decompose()
IncrementToken()
Declaration
public override sealed bool IncrementToken()
Returns
| Type | Description |
|---|---|
| System.Boolean |
Overrides
Lucene.Net.Analysis.TokenStream.IncrementToken()
|
Improve this Doc
View Source
Reset()
Declaration
public override void Reset()
Overrides
Lucene.Net.Analysis.TokenFilter.Reset()
Implements
System.IDisposable