Class CompoundWordTokenFilterBase
Base class for decomposition token filters.
You must specify the required Lucene.
- As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 supplementary characters in strings and char arrays provided as compound word dictionaries.
- As of 4.4, Compound
Word doesn't update offsets.Token Filter Base
Inheritance
System.Object
Lucene.Net.Util.AttributeSource
Lucene.Net.Analysis.TokenStream
Lucene.Net.Analysis.TokenFilter
CompoundWordTokenFilterBase
Implements
System.IDisposable
Inherited Members
Lucene.Net.Analysis.TokenFilter.m_input
Lucene.Net.Analysis.TokenFilter.End()
Lucene.Net.Analysis.TokenStream.Dispose()
Lucene.Net.Util.AttributeSource.GetAttributeFactory()
Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator()
Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator()
Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute)
Lucene.Net.Util.AttributeSource.AddAttribute<T>()
Lucene.Net.Util.AttributeSource.HasAttributes
Lucene.Net.Util.AttributeSource.HasAttribute<T>()
Lucene.Net.Util.AttributeSource.GetAttribute<T>()
Lucene.Net.Util.AttributeSource.ClearAttributes()
Lucene.Net.Util.AttributeSource.CaptureState()
Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State)
Lucene.Net.Util.AttributeSource.GetHashCode()
Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector)
Lucene.Net.Util.AttributeSource.CloneAttributes()
Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource)
Lucene.Net.Util.AttributeSource.ToString()
System.Object.Equals(System.Object, System.Object)
System.Object.GetType()
System.Object.MemberwiseClone()
System.Object.ReferenceEquals(System.Object, System.Object)
Namespace: Lucene.Net.Analysis.Compound
Assembly: Lucene.Net.Analysis.Common.dll
Syntax
public abstract class CompoundWordTokenFilterBase : TokenFilter, IDisposable
Constructors
| Improve this Doc View SourceCompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary)
Parameters
Type | Name | Description |
---|---|---|
Lucene. |
matchVersion | |
Lucene. |
input | |
Char |
dictionary |
CompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet, Boolean)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch)
Parameters
Type | Name | Description |
---|---|---|
Lucene. |
matchVersion | |
Lucene. |
input | |
Char |
dictionary | |
System. |
onlyLongestMatch |
CompoundWordTokenFilterBase(LuceneVersion, TokenStream, CharArraySet, Int32, Int32, Int32, Boolean)
Declaration
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
Parameters
Type | Name | Description |
---|---|---|
Lucene. |
matchVersion | |
Lucene. |
input | |
Char |
dictionary | |
System. |
minWordSize | |
System. |
minSubwordSize | |
System. |
maxSubwordSize | |
System. |
onlyLongestMatch |
Fields
| Improve this Doc View SourceDEFAULT_MAX_SUBWORD_SIZE
The default for maximal length of subwords that get propagated to the output of this filter
Declaration
public const int DEFAULT_MAX_SUBWORD_SIZE = 15
Field Value
Type | Description |
---|---|
System. |
DEFAULT_MIN_SUBWORD_SIZE
The default for minimal length of subwords that get propagated to the output of this filter
Declaration
public const int DEFAULT_MIN_SUBWORD_SIZE = 2
Field Value
Type | Description |
---|---|
System. |
DEFAULT_MIN_WORD_SIZE
The default for minimal word length that gets decomposed
Declaration
public const int DEFAULT_MIN_WORD_SIZE = 5
Field Value
Type | Description |
---|---|
System. |
m_dictionary
Declaration
protected readonly CharArraySet m_dictionary
Field Value
Type | Description |
---|---|
Char |
m_matchVersion
Declaration
protected readonly LuceneVersion m_matchVersion
Field Value
Type | Description |
---|---|
Lucene. |
m_maxSubwordSize
Declaration
protected readonly int m_maxSubwordSize
Field Value
Type | Description |
---|---|
System. |
m_minSubwordSize
Declaration
protected readonly int m_minSubwordSize
Field Value
Type | Description |
---|---|
System. |
m_minWordSize
Declaration
protected readonly int m_minWordSize
Field Value
Type | Description |
---|---|
System. |
m_offsetAtt
Declaration
protected readonly IOffsetAttribute m_offsetAtt
Field Value
Type | Description |
---|---|
Lucene. |
m_onlyLongestMatch
Declaration
protected readonly bool m_onlyLongestMatch
Field Value
Type | Description |
---|---|
System. |
m_termAtt
Declaration
protected readonly ICharTermAttribute m_termAtt
Field Value
Type | Description |
---|---|
Lucene. |
m_tokens
Declaration
protected readonly Queue<CompoundWordTokenFilterBase.CompoundToken> m_tokens
Field Value
Type | Description |
---|---|
System. |
Methods
| Improve this Doc View SourceDecompose()
Decomposes the current m_term
Declaration
protected abstract void Decompose()
IncrementToken()
Declaration
public override sealed bool IncrementToken()
Returns
Type | Description |
---|---|
System. |
Overrides
Lucene.Net.Analysis.TokenStream.IncrementToken()
|
Improve this Doc
View Source
Reset()
Declaration
public override void Reset()
Overrides
Lucene.Net.Analysis.TokenFilter.Reset()
Implements
System.IDisposable