Class WikipediaTokenizer
  
  
  
  
    Inheritance
    System.Object
    Lucene.Net.Util.AttributeSource
    Lucene.Net.Analysis.TokenStream
    Lucene.Net.Analysis.Tokenizer
    WikipediaTokenizer
   
  
    Implements
    System.IDisposable
   
  
    Inherited Members
    
      Lucene.Net.Analysis.Tokenizer.m_input
    
    
    
    
      Lucene.Net.Analysis.TokenStream.Dispose()
    
    
      Lucene.Net.Util.AttributeSource.GetAttributeFactory()
    
    
      Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator()
    
    
      Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator()
    
    
      Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute)
    
    
      Lucene.Net.Util.AttributeSource.AddAttribute<T>()
    
    
      Lucene.Net.Util.AttributeSource.HasAttributes
    
    
      Lucene.Net.Util.AttributeSource.HasAttribute<T>()
    
    
      Lucene.Net.Util.AttributeSource.GetAttribute<T>()
    
    
      Lucene.Net.Util.AttributeSource.ClearAttributes()
    
    
      Lucene.Net.Util.AttributeSource.CaptureState()
    
    
      Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State)
    
    
      Lucene.Net.Util.AttributeSource.GetHashCode()
    
    
    
    
      Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector)
    
    
      Lucene.Net.Util.AttributeSource.CloneAttributes()
    
    
      Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource)
    
    
      Lucene.Net.Util.AttributeSource.ToString()
    
    
      System.Object.Equals(System.Object, System.Object)
    
    
      System.Object.GetType()
    
    
      System.Object.MemberwiseClone()
    
    
      System.Object.ReferenceEquals(System.Object, System.Object)
    
   
  
  Assembly: Lucene.Net.Analysis.Common.dll
  Syntax
  
    public sealed class WikipediaTokenizer : Tokenizer, IDisposable
   
  Constructors
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  WikipediaTokenizer(AttributeSource.AttributeFactory, TextReader, Int32, ICollection<String>)
  Creates a new instance of the WikipediaTokenizer.  Attaches the
input to a the newly created JFlex scanner. Uses the given Lucene.Net.Util.AttributeSource.AttributeFactory.
Declaration
  
    public WikipediaTokenizer(AttributeSource.AttributeFactory factory, TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
   
  Parameters
  
    
      
        | Type | Name | Description | 
    
    
      
        | Lucene.Net.Util.AttributeSource.AttributeFactory | factory | The Lucene.Net.Util.AttributeSource.AttributeFactory  | 
      
        | System.IO.TextReader | input | The input  | 
      
        | System.Int32 | tokenOutput | One of TOKENS_ONLY, UNTOKENIZED_ONLY, BOTH  | 
      
        | System.Collections.Generic.ICollection<System.String> | untokenizedTypes | Untokenized types  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  WikipediaTokenizer(TextReader)
  Creates a new instance of the WikipediaTokenizer. Attaches the
input to a newly created JFlex scanner.
Declaration
  
    public WikipediaTokenizer(TextReader input)
   
  Parameters
  
    
      
        | Type | Name | Description | 
    
    
      
        | System.IO.TextReader | input | The Input System.IO.TextReader  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  WikipediaTokenizer(TextReader, Int32, ICollection<String>)
  Creates a new instance of the WikipediaTokenizer.  Attaches the
input to a the newly created JFlex scanner.
Declaration
  
    public WikipediaTokenizer(TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
   
  Parameters
  
    
      
        | Type | Name | Description | 
    
    
      
        | System.IO.TextReader | input | The input  | 
      
        | System.Int32 | tokenOutput | One of TOKENS_ONLY, UNTOKENIZED_ONLY, BOTH  | 
      
        | System.Collections.Generic.ICollection<System.String> | untokenizedTypes | Untokenized types  | 
    
  
  Fields
  
  
    |
    Improve this Doc
  
  
    View Source
  
  ACRONYM_ID
  
  
  Declaration
  
    public const int ACRONYM_ID = 2
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  ALPHANUM_ID
  
  
  Declaration
  
    public const int ALPHANUM_ID = 0
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  APOSTROPHE_ID
  
  
  Declaration
  
    public const int APOSTROPHE_ID = 1
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  BOLD
  
  
  Declaration
  
    public const string BOLD = "b"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  BOLD_ID
  
  
  Declaration
  
    public const int BOLD_ID = 12
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  BOLD_ITALICS
  
  
  Declaration
  
    public const string BOLD_ITALICS = "bi"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  BOLD_ITALICS_ID
  
  
  Declaration
  
    public const int BOLD_ITALICS_ID = 14
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  BOTH
  Output the both the untokenized token and the splits
Declaration
  
    public const int BOTH = 2
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  CATEGORY
  
  
  Declaration
  
    public const string CATEGORY = "c"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  CATEGORY_ID
  
  
  Declaration
  
    public const int CATEGORY_ID = 11
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  CITATION
  
  
  Declaration
  
    public const string CITATION = "ci"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  CITATION_ID
  
  
  Declaration
  
    public const int CITATION_ID = 10
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  CJ_ID
  
  
  Declaration
  
    public const int CJ_ID = 7
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  COMPANY_ID
  
  
  Declaration
  
    public const int COMPANY_ID = 3
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  EMAIL_ID
  
  
  Declaration
  
    public const int EMAIL_ID = 4
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  EXTERNAL_LINK
  
  
  Declaration
  
    public const string EXTERNAL_LINK = "el"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  EXTERNAL_LINK_ID
  
  
  Declaration
  
    public const int EXTERNAL_LINK_ID = 9
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  EXTERNAL_LINK_URL
  
  
  Declaration
  
    public const string EXTERNAL_LINK_URL = "elu"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  EXTERNAL_LINK_URL_ID
  
  
  Declaration
  
    public const int EXTERNAL_LINK_URL_ID = 17
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  HEADING
  
  
  Declaration
  
    public const string HEADING = "h"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  HEADING_ID
  
  
  Declaration
  
    public const int HEADING_ID = 15
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  HOST_ID
  
  
  Declaration
  
    public const int HOST_ID = 5
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  INTERNAL_LINK
  
  
  Declaration
  
    public const string INTERNAL_LINK = "il"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  INTERNAL_LINK_ID
  
  
  Declaration
  
    public const int INTERNAL_LINK_ID = 8
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  ITALICS
  
  
  Declaration
  
    public const string ITALICS = "i"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  ITALICS_ID
  
  
  Declaration
  
    public const int ITALICS_ID = 13
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  NUM_ID
  
  
  Declaration
  
    public const int NUM_ID = 6
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  SUB_HEADING
  
  
  Declaration
  
    public const string SUB_HEADING = "sh"
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  SUB_HEADING_ID
  
  
  Declaration
  
    public const int SUB_HEADING_ID = 16
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  TOKEN_TYPES
  String token types that correspond to token type int constants 
Declaration
  
    public static readonly string[] TOKEN_TYPES
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String[] |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  TOKENS_ONLY
  
  
  Declaration
  
    public const int TOKENS_ONLY = 0
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  UNTOKENIZED_ONLY
  Only output untokenized tokens, which are tokens that would normally be split into several tokens
Declaration
  
    public const int UNTOKENIZED_ONLY = 1
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  UNTOKENIZED_TOKEN_FLAG
  This flag is used to indicate that the produced "Token" would, if TOKENS_ONLY was used, produce multiple tokens.
Declaration
  
    public const int UNTOKENIZED_TOKEN_FLAG = 1
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  Methods
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  Dispose(Boolean)
  
  
  Declaration
  
    protected override void Dispose(bool disposing)
   
  Parameters
  
    
      
        | Type | Name | Description | 
    
    
      
        | System.Boolean | disposing |  | 
    
  
  Overrides
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  End()
  
  
  Declaration
  
    public override void End()
   
  Overrides
  Lucene.Net.Analysis.TokenStream.End()
  
    |
    Improve this Doc
  
  
    View Source
  
  
  IncrementToken()
  Lucene.Net.Analysis.TokenStream.IncrementToken()
  
  Declaration
  
    public override sealed bool IncrementToken()
   
  Returns
  
    
      
        | Type | Description | 
    
    
      
        | System.Boolean |  | 
    
  
  Overrides
  Lucene.Net.Analysis.TokenStream.IncrementToken()
  
    |
    Improve this Doc
  
  
    View Source
  
  
  Reset()
  Lucene.Net.Analysis.TokenStream.Reset()
  
  Declaration
  
    public override void Reset()
   
  Overrides
  Lucene.Net.Analysis.Tokenizer.Reset()
  Implements
  
      System.IDisposable