Class WikipediaTokenizer
Inheritance
System.Object
WikipediaTokenizer
Assembly: Lucene.Net.Analysis.Common.dll
Syntax
public sealed class WikipediaTokenizer : Tokenizer, IDisposable
Constructors
|
Improve this Doc
View Source
WikipediaTokenizer(AttributeSource.AttributeFactory, TextReader, Int32, ICollection<String>)
Declaration
public WikipediaTokenizer(AttributeSource.AttributeFactory factory, TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
Parameters
|
Improve this Doc
View Source
WikipediaTokenizer(TextReader)
Creates a new instance of the WikipediaTokenizer. Attaches the
input
to a newly created JFlex scanner.
Declaration
public WikipediaTokenizer(TextReader input)
Parameters
Type |
Name |
Description |
TextReader |
input |
The Input
|
|
Improve this Doc
View Source
WikipediaTokenizer(TextReader, Int32, ICollection<String>)
Creates a new instance of the WikipediaTokenizer. Attaches the
input
to a the newly created JFlex scanner.
Declaration
public WikipediaTokenizer(TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
Parameters
Type |
Name |
Description |
TextReader |
input |
The input
|
System.Int32 |
tokenOutput |
One of TOKENS_ONLY, UNTOKENIZED_ONLY, BOTH
|
ICollection<System.String> |
untokenizedTypes |
Untokenized types
|
Fields
|
Improve this Doc
View Source
ACRONYM_ID
Declaration
public const int ACRONYM_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
ALPHANUM_ID
Declaration
public const int ALPHANUM_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
APOSTROPHE_ID
Declaration
public const int APOSTROPHE_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
BOLD
Declaration
public const string BOLD = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
BOLD_ID
Declaration
public const int BOLD_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
BOLD_ITALICS
Declaration
public const string BOLD_ITALICS = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
BOLD_ITALICS_ID
Declaration
public const int BOLD_ITALICS_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
BOTH
Output the both the untokenized token and the splits
Declaration
public const int BOTH = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
CATEGORY
Declaration
public const string CATEGORY = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
CATEGORY_ID
Declaration
public const int CATEGORY_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
CITATION
Declaration
public const string CITATION = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
CITATION_ID
Declaration
public const int CITATION_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
CJ_ID
Declaration
public const int CJ_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
COMPANY_ID
Declaration
public const int COMPANY_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
EMAIL_ID
Declaration
public const int EMAIL_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
EXTERNAL_LINK
Declaration
public const string EXTERNAL_LINK = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
EXTERNAL_LINK_ID
Declaration
public const int EXTERNAL_LINK_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
EXTERNAL_LINK_URL
Declaration
public const string EXTERNAL_LINK_URL = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
EXTERNAL_LINK_URL_ID
Declaration
public const int EXTERNAL_LINK_URL_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
HEADING
Declaration
public const string HEADING = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
HEADING_ID
Declaration
public const int HEADING_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
HOST_ID
Declaration
public const int HOST_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
INTERNAL_LINK
Declaration
public const string INTERNAL_LINK = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
INTERNAL_LINK_ID
Declaration
public const int INTERNAL_LINK_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
ITALICS
Declaration
public const string ITALICS = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
ITALICS_ID
Declaration
public const int ITALICS_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
NUM_ID
Declaration
public const int NUM_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
SUB_HEADING
Declaration
public const string SUB_HEADING = null
Field Value
Type |
Description |
System.String |
|
|
Improve this Doc
View Source
SUB_HEADING_ID
Declaration
public const int SUB_HEADING_ID = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
TOKEN_TYPES
String token types that correspond to token type int constants
Declaration
public static readonly string[] TOKEN_TYPES
Field Value
Type |
Description |
System.String[] |
|
|
Improve this Doc
View Source
TOKENS_ONLY
Declaration
public const int TOKENS_ONLY = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
UNTOKENIZED_ONLY
Only output untokenized tokens, which are tokens that would normally be split into several tokens
Declaration
public const int UNTOKENIZED_ONLY = null
Field Value
Type |
Description |
System.Int32 |
|
|
Improve this Doc
View Source
UNTOKENIZED_TOKEN_FLAG
This flag is used to indicate that the produced "Token" would, if TOKENS_ONLY was used, produce multiple tokens.
Declaration
public const int UNTOKENIZED_TOKEN_FLAG = null
Field Value
Type |
Description |
System.Int32 |
|
Methods
|
Improve this Doc
View Source
Dispose(Boolean)
Declaration
protected override void Dispose(bool disposing)
Parameters
Type |
Name |
Description |
System.Boolean |
disposing |
|
|
Improve this Doc
View Source
End()
Declaration
public override void End()
Overrides
|
Improve this Doc
View Source
IncrementToken()
Declaration
public override sealed bool IncrementToken()
Returns
Type |
Description |
System.Boolean |
|
Overrides
|
Improve this Doc
View Source
Reset()
Declaration
public override void Reset()
Overrides
Implements
IDisposable