Class UAX29URLEmailTokenizer
  
  This class implements Word Break rules from the Unicode Text Segmentation 
algorithm, as specified in                 `
Unicode Standard Annex #29 
URLs and email addresses are also tokenized according to the relevant RFCs.
Tokens produced are of the following types:
- <ALPHANUM>: A sequence of alphabetic and numeric characters
- <NUM>: A number
- <URL>: A URL
- <EMAIL>: An email address
- <SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
        Asian languages, including Thai, Lao, Myanmar, and Khmer
- <IDEOGRAPHIC>: A single CJKV ideographic character
- <HIRAGANA>: A single hiragana character
You must specify the required Lucene.Net.Util.LuceneVersion
compatibility when creating UAX29URLEmailTokenizer:
-  As of 3.4, Hiragana and Han characters are no longer wrongly split
        from their combining characters. If you use a previous version number,
        you get the exact broken behavior for backwards compatibility.
    Inheritance
    System.Object
    Lucene.Net.Util.AttributeSource
    Lucene.Net.Analysis.TokenStream
    Lucene.Net.Analysis.Tokenizer
    UAX29URLEmailTokenizer
   
  
    Implements
    System.IDisposable
   
  
    Inherited Members
    
      Lucene.Net.Analysis.Tokenizer.m_input
    
    
    
    
      Lucene.Net.Analysis.TokenStream.Dispose()
    
    
      Lucene.Net.Util.AttributeSource.GetAttributeFactory()
    
    
      Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator()
    
    
      Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator()
    
    
      Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute)
    
    
      Lucene.Net.Util.AttributeSource.AddAttribute<T>()
    
    
      Lucene.Net.Util.AttributeSource.HasAttributes
    
    
      Lucene.Net.Util.AttributeSource.HasAttribute<T>()
    
    
      Lucene.Net.Util.AttributeSource.GetAttribute<T>()
    
    
      Lucene.Net.Util.AttributeSource.ClearAttributes()
    
    
      Lucene.Net.Util.AttributeSource.CaptureState()
    
    
      Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State)
    
    
      Lucene.Net.Util.AttributeSource.GetHashCode()
    
    
    
    
      Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector)
    
    
      Lucene.Net.Util.AttributeSource.CloneAttributes()
    
    
      Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource)
    
    
      Lucene.Net.Util.AttributeSource.ToString()
    
    
      System.Object.Equals(System.Object, System.Object)
    
    
      System.Object.GetType()
    
    
      System.Object.MemberwiseClone()
    
    
      System.Object.ReferenceEquals(System.Object, System.Object)
    
   
  
  Assembly: Lucene.Net.Analysis.Common.dll
  Syntax
  
    public sealed class UAX29URLEmailTokenizer : Tokenizer, IDisposable
   
  Constructors
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  UAX29URLEmailTokenizer(LuceneVersion, AttributeSource.AttributeFactory, TextReader)
  
  
  Declaration
  
    public UAX29URLEmailTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader input)
   
  Parameters
  
    
      
        | Type | Name | Description | 
    
    
      
        | Lucene.Net.Util.LuceneVersion | matchVersion |  | 
      
        | Lucene.Net.Util.AttributeSource.AttributeFactory | factory |  | 
      
        | System.IO.TextReader | input |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  UAX29URLEmailTokenizer(LuceneVersion, TextReader)
  
  
  Declaration
  
    public UAX29URLEmailTokenizer(LuceneVersion matchVersion, TextReader input)
   
  Parameters
  
    
      
        | Type | Name | Description | 
    
    
      
        | Lucene.Net.Util.LuceneVersion | matchVersion | Lucene compatibility version  | 
      
        | System.IO.TextReader | input | The input reader  | 
    
  
  Fields
  
  
    |
    Improve this Doc
  
  
    View Source
  
  ALPHANUM
  
  
  Declaration
  
    public const int ALPHANUM = 0
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  EMAIL
  
  
  Declaration
  
    public const int EMAIL = 8
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  HANGUL
  
  
  Declaration
  
    public const int HANGUL = 6
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  HIRAGANA
  
  
  Declaration
  
    public const int HIRAGANA = 4
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  IDEOGRAPHIC
  
  
  Declaration
  
    public const int IDEOGRAPHIC = 3
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  KATAKANA
  
  
  Declaration
  
    public const int KATAKANA = 5
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  NUM
  
  
  Declaration
  
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  SOUTHEAST_ASIAN
  
  
  Declaration
  
    public const int SOUTHEAST_ASIAN = 2
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  TOKEN_TYPES
  String token types that correspond to token type int constants 
Declaration
  
    public static readonly string[] TOKEN_TYPES
   
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.String[] |  | 
    
  
  
    |
    Improve this Doc
  
  
    View Source
  
  URL
  
  
  Declaration
  
  Field Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  Properties
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  MaxTokenLength
  Set the max allowed token length.  Any token longer
than this is skipped. 
Declaration
  
    public int MaxTokenLength { get; set; }
   
  Property Value
  
    
      
        | Type | Description | 
    
    
      
        | System.Int32 |  | 
    
  
  Methods
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  Dispose(Boolean)
  
  
  Declaration
  
    protected override void Dispose(bool disposing)
   
  Parameters
  
    
      
        | Type | Name | Description | 
    
    
      
        | System.Boolean | disposing |  | 
    
  
  Overrides
  
  
    |
    Improve this Doc
  
  
    View Source
  
  
  End()
  
  
  Declaration
  
    public override sealed void End()
   
  Overrides
  Lucene.Net.Analysis.TokenStream.End()
  
    |
    Improve this Doc
  
  
    View Source
  
  
  IncrementToken()
  
  
  Declaration
  
    public override sealed bool IncrementToken()
   
  Returns
  
    
      
        | Type | Description | 
    
    
      
        | System.Boolean |  | 
    
  
  Overrides
  Lucene.Net.Analysis.TokenStream.IncrementToken()
  
    |
    Improve this Doc
  
  
    View Source
  
  
  Reset()
  
  
  Declaration
  
    public override void Reset()
   
  Overrides
  Lucene.Net.Analysis.Tokenizer.Reset()
  Implements
  
      System.IDisposable