Class WikipediaTokenizer
Inheritance
System.Object
Lucene.Net.Util.AttributeSource
Lucene.Net.Analysis.TokenStream
Lucene.Net.Analysis.Tokenizer
WikipediaTokenizer
Implements
System.IDisposable
Inherited Members
Lucene.Net.Analysis.Tokenizer.m_input
Lucene.Net.Analysis.TokenStream.Dispose()
Lucene.Net.Util.AttributeSource.GetAttributeFactory()
Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator()
Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator()
Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute)
Lucene.Net.Util.AttributeSource.AddAttribute<T>()
Lucene.Net.Util.AttributeSource.HasAttributes
Lucene.Net.Util.AttributeSource.HasAttribute<T>()
Lucene.Net.Util.AttributeSource.GetAttribute<T>()
Lucene.Net.Util.AttributeSource.ClearAttributes()
Lucene.Net.Util.AttributeSource.CaptureState()
Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State)
Lucene.Net.Util.AttributeSource.GetHashCode()
Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector)
Lucene.Net.Util.AttributeSource.CloneAttributes()
Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource)
Lucene.Net.Util.AttributeSource.ToString()
System.Object.Equals(System.Object, System.Object)
System.Object.GetType()
System.Object.MemberwiseClone()
System.Object.ReferenceEquals(System.Object, System.Object)
Assembly: Lucene.Net.Analysis.Common.dll
Syntax
public sealed class WikipediaTokenizer : Tokenizer, IDisposable
Constructors
|
Improve this Doc
View Source
WikipediaTokenizer(AttributeSource.AttributeFactory, TextReader, Int32, ICollection<String>)
Creates a new instance of the WikipediaTokenizer. Attaches the
input
to a the newly created JFlex scanner. Uses the given Lucene.Net.Util.AttributeSource.AttributeFactory.
Declaration
public WikipediaTokenizer(AttributeSource.AttributeFactory factory, TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
Parameters
Type |
Name |
Description |
Lucene.Net.Util.AttributeSource.AttributeFactory |
factory |
The Lucene.Net.Util.AttributeSource.AttributeFactory
|
System.IO.TextReader |
input |
The input
|
System.Int32 |
tokenOutput |
One of TOKENS_ONLY, UNTOKENIZED_ONLY, BOTH
|
System.Collections.Generic.ICollection<System.String> |
untokenizedTypes |
Untokenized types
|
|
Improve this Doc
View Source
WikipediaTokenizer(TextReader)
Creates a new instance of the WikipediaTokenizer. Attaches the
input
to a newly created JFlex scanner.
Declaration
public WikipediaTokenizer(TextReader input)
Parameters
Type |
Name |
Description |
System.IO.TextReader |
input |
The Input System.IO.TextReader
|
|
Improve this Doc
View Source
WikipediaTokenizer(TextReader, Int32, ICollection<String>)
Creates a new instance of the WikipediaTokenizer. Attaches the
input
to a the newly created JFlex scanner.
Declaration
public WikipediaTokenizer(TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
Parameters
Type |
Name |
Description |
System.IO.TextReader |
input |
The input
|
System.Int32 |
tokenOutput |
One of TOKENS_ONLY, UNTOKENIZED_ONLY, BOTH
|
System.Collections.Generic.ICollection<System.String> |
untokenizedTypes |
Untokenized types
|
Fields
|
Improve this Doc
View Source
ACRONYM_ID
Declaration
public const int ACRONYM_ID = 2
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
ALPHANUM_ID
Declaration
public const int ALPHANUM_ID = 0
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
APOSTROPHE_ID
Declaration
public const int APOSTROPHE_ID = 1
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
BOLD
Declaration
public const string BOLD = "b"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
BOLD_ID
Declaration
public const int BOLD_ID = 12
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
BOLD_ITALICS
Declaration
public const string BOLD_ITALICS = "bi"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
BOLD_ITALICS_ID
Declaration
public const int BOLD_ITALICS_ID = 14
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
BOTH
Output the both the untokenized token and the splits
Declaration
public const int BOTH = 2
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
CATEGORY
Declaration
public const string CATEGORY = "c"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
CATEGORY_ID
Declaration
public const int CATEGORY_ID = 11
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
CITATION
Declaration
public const string CITATION = "ci"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
CITATION_ID
Declaration
public const int CITATION_ID = 10
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
CJ_ID
Declaration
public const int CJ_ID = 7
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
COMPANY_ID
Declaration
public const int COMPANY_ID = 3
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
EMAIL_ID
Declaration
public const int EMAIL_ID = 4
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
EXTERNAL_LINK
Declaration
public const string EXTERNAL_LINK = "el"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
EXTERNAL_LINK_ID
Declaration
public const int EXTERNAL_LINK_ID = 9
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
EXTERNAL_LINK_URL
Declaration
public const string EXTERNAL_LINK_URL = "elu"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
EXTERNAL_LINK_URL_ID
Declaration
public const int EXTERNAL_LINK_URL_ID = 17
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
HEADING
Declaration
public const string HEADING = "h"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
HEADING_ID
Declaration
public const int HEADING_ID = 15
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
HOST_ID
Declaration
public const int HOST_ID = 5
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
INTERNAL_LINK
Declaration
public const string INTERNAL_LINK = "il"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
INTERNAL_LINK_ID
Declaration
public const int INTERNAL_LINK_ID = 8
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
ITALICS
Declaration
public const string ITALICS = "i"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
ITALICS_ID
Declaration
public const int ITALICS_ID = 13
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
NUM_ID
Declaration
public const int NUM_ID = 6
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
SUB_HEADING
Declaration
public const string SUB_HEADING = "sh"
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
SUB_HEADING_ID
Declaration
public const int SUB_HEADING_ID = 16
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
TOKEN_TYPES
String token types that correspond to token type int constants
Declaration
public static readonly string[] TOKEN_TYPES
Field Value
Type |
Description |
System.String[] |
|
|
Improve this Doc
View Source
TOKENS_ONLY
Declaration
public const int TOKENS_ONLY = 0
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
UNTOKENIZED_ONLY
Only output untokenized tokens, which are tokens that would normally be split into several tokens
Declaration
public const int UNTOKENIZED_ONLY = 1
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
UNTOKENIZED_TOKEN_FLAG
This flag is used to indicate that the produced "Token" would, if TOKENS_ONLY was used, produce multiple tokens.
Declaration
public const int UNTOKENIZED_TOKEN_FLAG = 1
Field Value
Type |
Description |
System.Int32 |
|
Methods
|
Improve this Doc
View Source
Dispose(Boolean)
Declaration
protected override void Dispose(bool disposing)
Parameters
Type |
Name |
Description |
System.Boolean |
disposing |
|
Overrides
|
Improve this Doc
View Source
End()
Declaration
public override void End()
Overrides
Lucene.Net.Analysis.TokenStream.End()
|
Improve this Doc
View Source
IncrementToken()
Lucene.Net.Analysis.TokenStream.IncrementToken()
Declaration
public sealed override bool IncrementToken()
Returns
Type |
Description |
System.Boolean |
|
Overrides
Lucene.Net.Analysis.TokenStream.IncrementToken()
|
Improve this Doc
View Source
Reset()
Lucene.Net.Analysis.TokenStream.Reset()
Declaration
public override void Reset()
Overrides
Lucene.Net.Analysis.Tokenizer.Reset()
Implements
System.IDisposable