Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
StandardTokenizerImpl.cs
Go to the documentation of this file.
1 /* The following code was generated by JFlex 1.4.1 on 9/4/08 6:49 PM */
2 /*
3  * Licensed to the Apache Software Foundation (ASF) under one or more
4  * contributor license agreements. See the NOTICE file distributed with
5  * this work for additional information regarding copyright ownership.
6  * The ASF licenses this file to You under the Apache License, Version 2.0
7  * (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 
20 /*
21  NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate the tokenizer,
22  the tokenizer, only use Java 1.4 !!!
23  This grammar currently uses constructs (eg :digit:, :letter:) whose
24  meaning can vary according to the JRE used to run jflex. See
25  https://issues.apache.org/jira/browse/LUCENE-1126 for details.
26  For current backwards compatibility it is needed to support
27  only Java 1.4 - this will change in Lucene 3.1.
28 */
29 
30 using System;
31 using Lucene.Net.Analysis.Tokenattributes;
32 using Token = Lucene.Net.Analysis.Token;
33 
34 namespace Lucene.Net.Analysis.Standard
35 {
36 
37 
38  /// <summary> This class is a scanner generated by
39  /// <a href="http://www.jflex.de/">JFlex</a> 1.4.1
40  /// on 9/4/08 6:49 PM from the specification file
41  /// <tt>/tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
42  /// </summary>
44  {
45 
46  /// <summary>This character denotes the end of file </summary>
47  public const int YYEOF = - 1;
48 
49  /// <summary>initial size of the lookahead buffer </summary>
50  private const int ZZ_BUFFERSIZE = 16384;
51 
52  /// <summary>lexical states </summary>
53  public const int YYINITIAL = 0;
54 
55  /// <summary> Translates characters to character classes</summary>
56  private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0000\x0001\x000C\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0005" + "\x0001\x0003\x0004\x0000\x0001\x0009\x0001\x0007\x0001\x0004\x0001\x0009\x000A\x0002\x0006\x0000\x0001\x0006\x001A\x000A" + "\x0004\x0000\x0001\x0008\x0001\x0000\x001A\x000A\x002F\x0000\x0001\x000A\x000A\x0000\x0001\x000A\x0004\x0000\x0001\x000A" + "\x0005\x0000\x0017\x000A\x0001\x0000\x001F\x000A\x0001\x0000\u0128\x000A\x0002\x0000\x0012\x000A\x001C\x0000\x005E\x000A" + "\x0002\x0000\x0009\x000A\x0002\x0000\x0007\x000A\x000E\x0000\x0002\x000A\x000E\x0000\x0005\x000A\x0009\x0000\x0001\x000A" + "\x008B\x0000\x0001\x000A\x000B\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0014\x000A" + "\x0001\x0000\x002C\x000A\x0001\x0000\x0008\x000A\x0002\x0000\x001A\x000A\x000C\x0000\x0082\x000A\x000A\x0000\x0039\x000A" + "\x0002\x0000\x0002\x000A\x0002\x0000\x0002\x000A\x0003\x0000\x0026\x000A\x0002\x0000\x0002\x000A\x0037\x0000\x0026\x000A" + "\x0002\x0000\x0001\x000A\x0007\x0000\x0027\x000A\x0048\x0000\x001B\x000A\x0005\x0000\x0003\x000A\x002E\x0000\x001A\x000A" + "\x0005\x0000\x000B\x000A\x0015\x0000\x000A\x0002\x0007\x0000\x0063\x000A\x0001\x0000\x0001\x000A\x000F\x0000\x0002\x000A" + "\x0009\x0000\x000A\x0002\x0003\x000A\x0013\x0000\x0001\x000A\x0001\x0000\x001B\x000A\x0053\x0000\x0026\x000A\u015f\x0000" + "\x0035\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x0007\x0000\x000A\x000A\x0004\x0000\x000A\x0002\x0015\x0000" + "\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0003\x0000" + "\x0004\x000A\x0022\x0000\x0002\x000A\x0001\x0000\x0003\x000A\x0004\x0000\x000A\x0002\x0002\x000A\x0013\x0000\x0006\x000A" + "\x0004\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0002\x000A\x0001\x0000\x0002\x000A" +
57  "\x0001\x0000\x0002\x000A\x001F\x0000\x0004\x000A\x0001\x0000\x0001\x000A\x0007\x0000\x000A\x0002\x0002\x0000\x0003\x000A" + "\x0010\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x0005\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x000F\x0000\x0001\x000A" + "\x0005\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0002\x0000\x0004\x000A\x0003\x0000\x0001\x000A\x001E\x0000\x0002\x000A\x0001\x0000\x0003\x000A" + "\x0004\x0000\x000A\x0002\x0015\x0000\x0006\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0004\x000A\x0003\x0000\x0002\x000A" + "\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A\x0003\x0000\x0002\x000A\x0003\x0000\x0003\x000A\x0003\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x002D\x0000\x0009\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0024\x0000\x0001\x000A" + "\x0001\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x0010\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0012\x000A\x0003\x0000\x0018\x000A" + "\x0001\x0000\x0009\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0007\x000A\x0039\x0000\x0001\x0001\x0030\x000A\x0001\x0001" + "\x0002\x000A\x000C\x0001\x0007\x000A\x0009\x0001\x000A\x0002\x0027\x0000\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000" + "\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0001\x000A\x0006\x0000\x0004\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0002\x000A\x0001\x0000\x0004\x000A\x0001\x0000" +
58  "\x0002\x000A\x0009\x0000\x0001\x000A\x0002\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0009\x0000\x000A\x0002\x0002\x0000" + "\x0002\x000A\x0022\x0000\x0001\x000A\x001F\x0000\x000A\x0002\x0016\x0000\x0008\x000A\x0001\x0000\x0022\x000A\x001D\x0000" + "\x0004\x000A\x0074\x0000\x0022\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0002\x000A\x0015\x0000\x000A\x0002\x0006\x0000" + "\x0006\x000A\x004A\x0000\x0026\x000A\x000A\x0000\x0027\x000A\x0009\x0000\x005A\x000A\x0005\x0000\x0044\x000A\x0005\x0000" + "\x0052\x000A\x0006\x0000\x0007\x000A\x0001\x0000\x003F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000" + "\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0027\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0004\x000A\x0002\x0000\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0017\x000A\x0001\x0000" + "\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0027\x000A\x0001\x0000" + "\x0013\x000A\x000E\x0000\x0009\x0002\x002E\x0000\x0055\x000A\x000C\x0000\u026c\x000A\x0002\x0000\x0008\x000A\x000A\x0000" + "\x001A\x000A\x0005\x0000\x004B\x000A\x0095\x0000\x0034\x000A\x002C\x0000\x000A\x0002\x0026\x0000\x000A\x0002\x0006\x0000" + "\x0058\x000A\x0008\x0000\x0029\x000A\u0557\x0000\x009C\x000A\x0004\x0000\x005A\x000A\x0006\x0000\x0016\x000A\x0002\x0000" + "\x0006\x000A\x0002\x0000\x0026\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0008\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x001F\x000A\x0002\x0000\x0035\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0003\x0000\x0004\x000A\x0002\x0000\x0006\x000A\x0004\x0000" + "\x000D\x000A\x0005\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0082\x0000\x0001\x000A\x0082\x0000\x0001\x000A\x0004\x0000" +
59  "\x0001\x000A\x0002\x0000\x000A\x000A\x0001\x0000\x0001\x000A\x0003\x0000\x0005\x000A\x0006\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0007\x000A\u0ecb\x0000" + "\x0002\x000A\x002A\x0000\x0005\x000A\x000A\x0000\x0001\x000B\x0054\x000B\x0008\x000B\x0002\x000B\x0002\x000B\x005A\x000B" + "\x0001\x000B\x0003\x000B\x0006\x000B\x0028\x000B\x0003\x000B\x0001\x0000\x005E\x000A\x0011\x0000\x0018\x000A\x0038\x0000" + "\x0010\x000B\u0100\x0000\x0080\x000B\x0080\x0000\u19b6\x000B\x000A\x000B\x0040\x0000\u51a6\x000B\x005A\x000B\u048d\x000A" + "\u0773\x0000\u2ba4\x000A\u215c\x0000\u012e\x000B\x00D2\x000B\x0007\x000A\x000C\x0000\x0005\x000A\x0005\x0000\x0001\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x000D\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x006C\x000A\x0021\x0000\u016b\x000A\x0012\x0000\x0040\x000A\x0002\x0000\x0036\x000A" + "\x0028\x0000\x000C\x000A\x0074\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0087\x000A\x0013\x0000\x000A\x0002" + "\x0007\x0000\x001A\x000A\x0006\x0000\x001A\x000A\x000A\x0000\x0001\x000B\x003A\x000B\x001F\x000A\x0003\x0000\x0006\x000A" + "\x0002\x0000\x0006\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0003\x000A\x0023\x0000";
60 
61  /// <summary> Translates characters to character classes</summary>
62  private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
63 
64  /// <summary> Translates DFA states to action switch labels.</summary>
65  private static readonly int[] ZZ_ACTION = ZzUnpackAction();
66 
67  private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0003\x0002\x0001\x0003\x0001\x0001\x000B\x0000\x0001\x0002\x0003\x0004" + "\x0002\x0000\x0001\x0005\x0001\x0000\x0001\x0005\x0003\x0004\x0006\x0005\x0001\x0006\x0001\x0004" + "\x0002\x0007\x0001\x0008\x0001\x0000\x0001\x0008\x0003\x0000\x0002\x0008\x0001\x0009\x0001\x000A" + "\x0001\x0004";
68 
69  private static int[] ZzUnpackAction()
70  {
71  int[] result = new int[51];
72  int offset = 0;
73  offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
74  return result;
75  }
76 
77  private static int ZzUnpackAction(System.String packed, int offset, int[] result)
78  {
79  int i = 0; /* index in packed string */
80  int j = offset; /* index in unpacked array */
81  int l = packed.Length;
82  while (i < l)
83  {
84  int count = packed[i++];
85  int value_Renamed = packed[i++];
86  do
87  result[j++] = value_Renamed;
88  while (--count > 0);
89  }
90  return j;
91  }
92 
93 
94  /// <summary> Translates a state to a row index in the transition table</summary>
95  private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
96 
97  private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000E\x0000\x001C\x0000\x002A\x0000\x0038\x0000\x000E\x0000\x0046\x0000\x0054" + "\x0000\x0062\x0000\x0070\x0000\x007E\x0000\x008C\x0000\x009A\x0000\x00A8\x0000\x00B6\x0000\x00C4" + "\x0000\x00D2\x0000\x00E0\x0000\x00EE\x0000\x00FC\x0000\u010a\x0000\u0118\x0000\u0126\x0000\u0134" + "\x0000\u0142\x0000\u0150\x0000\u015e\x0000\u016c\x0000\u017a\x0000\u0188\x0000\u0196\x0000\u01a4" + "\x0000\u01b2\x0000\u01c0\x0000\u01ce\x0000\u01dc\x0000\u01ea\x0000\u01f8\x0000\x00D2\x0000\u0206" + "\x0000\u0214\x0000\u0222\x0000\u0230\x0000\u023e\x0000\u024c\x0000\u025a\x0000\x0054\x0000\x008C" + "\x0000\u0268\x0000\u0276\x0000\u0284";
98 
99  private static int[] ZzUnpackRowMap()
100  {
101  int[] result = new int[51];
102  int offset = 0;
103  offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
104  return result;
105  }
106 
107  private static int ZzUnpackRowMap(System.String packed, int offset, int[] result)
108  {
109  int i = 0; /* index in packed string */
110  int j = offset; /* index in unpacked array */
111  int l = packed.Length;
112  while (i < l)
113  {
114  int high = packed[i++] << 16;
115  result[j++] = high | packed[i++];
116  }
117  return j;
118  }
119 
120  /// <summary> The transition table of the DFA</summary>
121  private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
122 
123  private const System.String ZZ_TRANS_PACKED_0 = "\x0001\x0002\x0001\x0003\x0001\x0004\x0007\x0002\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0002" + "\x000F\x0000\x0002\x0003\x0001\x0000\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x0003\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x0000\x0001\x000C\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x0004\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x000F\x0001\x0010" + "\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0010\x0000\x0001\x0002\x0001\x0000" + "\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0017" + "\x0004\x0000\x0001\x0018\x0001\x0019\x0007\x0000\x0001\x001A\x0005\x0000\x0001\x001B\x0007\x0000" + "\x0001\x000B\x0004\x0000\x0001\x001C\x0001\x001D\x0007\x0000\x0001\x001E\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0007\x0000\x0001\x0021\x0004\x0000\x0001\x0022\x0001\x0023\x0007\x0000\x0001\x0024" + "\x000D\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0026\x000D\x0000" + "\x0001\x0027\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0028\x0004\x0000\x0001\x0003\x0001\x0004" + "\x0001\x000F\x0001\x0008\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0004\x0000" + "\x0002\x0014\x0001\x0000\x0001\x0029\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014" + "\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x002B\x0001\x0000\x0001\x0009\x0002\x002C" + "\x0001\x002D\x0001\x0015\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x0029\x0001\x0000" + "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0001\x0000\x0001\x002E" + "\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0017\x0004\x0000\x0002\x0018\x0001\x0000\x0001\x002A" + "\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0018\x0004\x0000\x0001\x0018\x0001\x0019" + "\x0001\x0000\x0001\x002C\x0001\x0000\x0001\x0009\x0002\x002C\x0001\x002D\x0001\x0019\x0004\x0000" +
124  "\x0001\x0018\x0001\x0019\x0001\x0000\x0001\x002A\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000" + "\x0001\x001A\x0005\x0000\x0001\x001B\x0001\x0000\x0001\x002D\x0002\x0000\x0003\x002D\x0001\x001B" + "\x0004\x0000\x0002\x001C\x0001\x0000\x0001\x002F\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x001C\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x0030\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x001D\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x002F" + "\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001E\x0004\x0000\x0002\x001F\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001F\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0009\x0002\x000D\x0001\x000E\x0001\x0020" + "\x0004\x0000\x0001\x001F\x0001\x0020\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A" + "\x0001\x000B\x0001\x0021\x0004\x0000\x0002\x0022\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0022\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0001\x0023\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0024\x0006\x0000\x0001\x000F\x0006\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015" + "\x0001\x0000\x0001\x0031\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000" + "\x0002\x0017\x0001\x0000\x0001\x002E\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0028\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0014\x0004\x0000\x0002\x0018\x0007\x0000\x0001\x0018\x0004\x0000" + "\x0002\x001C\x0007\x0000\x0001\x001C\x0004\x0000\x0002\x001F\x0007\x0000\x0001\x001F\x0004\x0000" + "\x0002\x0022\x0007\x0000\x0001\x0022\x0004\x0000\x0002\x0032\x0007\x0000\x0001\x0032\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0033\x0004\x0000\x0002\x0032\x0001\x0000\x0001\x002E\x0002\x0000" + "\x0001\x002E\x0002\x0000\x0001\x0032\x0004\x0000\x0002\x0014\x0001\x0000\x0001\x0031\x0001\x0000" +
125  "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014\x0003\x0000";
126 
127  private static int[] ZzUnpackTrans()
128  {
129  int[] result = new int[658];
130  int offset = 0;
131  offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
132  return result;
133  }
134 
135  private static int ZzUnpackTrans(System.String packed, int offset, int[] result)
136  {
137  int i = 0; /* index in packed string */
138  int j = offset; /* index in unpacked array */
139  int l = packed.Length;
140  while (i < l)
141  {
142  int count = packed[i++];
143  int value_Renamed = packed[i++];
144  value_Renamed--;
145  do
146  result[j++] = value_Renamed;
147  while (--count > 0);
148  }
149  return j;
150  }
151 
152 
153  /* error codes */
154  private const int ZZ_UNKNOWN_ERROR = 0;
155  private const int ZZ_NO_MATCH = 1;
156  private const int ZZ_PUSHBACK_2BIG = 2;
157 
158  /* error messages for the codes above */
159  private static readonly System.String[] ZZ_ERROR_MSG = new System.String[]{"Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large"};
160 
161  /// <summary> ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c></summary>
162  private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
163 
164  private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0003\x0001\x0001\x0009\x0001\x0001\x000B\x0000\x0004\x0001\x0002\x0000" + "\x0001\x0001\x0001\x0000\x000F\x0001\x0001\x0000\x0001\x0001\x0003\x0000\x0005\x0001";
165 
166  private static int[] ZzUnpackAttribute()
167  {
168  int[] result = new int[51];
169  int offset = 0;
170  offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
171  return result;
172  }
173 
174  private static int ZzUnpackAttribute(System.String packed, int offset, int[] result)
175  {
176  int i = 0; /* index in packed string */
177  int j = offset; /* index in unpacked array */
178  int l = packed.Length;
179  while (i < l)
180  {
181  int count = packed[i++];
182  int value_Renamed = packed[i++];
183  do
184  result[j++] = value_Renamed;
185  while (--count > 0);
186  }
187  return j;
188  }
189 
190  /// <summary>the input device </summary>
191  private System.IO.TextReader zzReader;
192 
193  /// <summary>the current state of the DFA </summary>
194  private int zzState;
195 
196  /// <summary>the current lexical state </summary>
197  private int zzLexicalState = YYINITIAL;
198 
199  /// <summary>this buffer contains the current text to be matched and is
200  /// the source of the yytext() string
201  /// </summary>
202  private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
203 
204  /// <summary>the textposition at the last accepting state </summary>
205  private int zzMarkedPos;
206 
207  /// <summary>the textposition at the last state to be included in yytext </summary>
208  private int zzPushbackPos;
209 
210  /// <summary>the current text position in the buffer </summary>
211  private int zzCurrentPos;
212 
213  /// <summary>startRead marks the beginning of the yytext() string in the buffer </summary>
214  private int zzStartRead;
215 
216  /// <summary>endRead marks the last character in the buffer, that has been read
217  /// from input
218  /// </summary>
219  private int zzEndRead;
220 
221  /// <summary>number of newlines encountered up to the start of the matched text </summary>
222  private int yyline;
223 
224  /// <summary>the number of characters up to the start of the matched text </summary>
225  private int yychar;
226 
227  /// <summary> the number of characters from the last newline up to the start of the
228  /// matched text
229  /// </summary>
230  private int yycolumn;
231 
232  /// <summary> zzAtBOL == true &lt;=&gt; the scanner is currently at the beginning of a line</summary>
233  private bool zzAtBOL = true;
234 
235  /// <summary>zzAtEOF == true &lt;=&gt; the scanner is at the EOF </summary>
236  private bool zzAtEOF;
237 
238  /* user code: */
239 
240  public static readonly int ALPHANUM;
241  public static readonly int APOSTROPHE;
242  public static readonly int ACRONYM;
243  public static readonly int COMPANY;
244  public static readonly int EMAIL;
245  public static readonly int HOST;
246  public static readonly int NUM;
247  public static readonly int CJ;
248  /// <deprecated> this solves a bug where HOSTs that end with '.' are identified
249  /// as ACRONYMs.
250  /// </deprecated>
251  [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs")]
252  public static readonly int ACRONYM_DEP;
253 
254  public static readonly System.String[] TOKEN_TYPES;
255 
256  public int Yychar()
257  {
258  return yychar;
259  }
260 
261  /*
262  * Resets the Tokenizer to a new Reader.
263  */
264  internal void Reset(System.IO.TextReader r)
265  {
266  // reset to default buffer size, if buffer has grown
267  if (zzBuffer.Length > ZZ_BUFFERSIZE)
268  {
269  zzBuffer = new char[ZZ_BUFFERSIZE];
270  }
271  Yyreset(r);
272  }
273 
274  /// <summary> Fills Lucene token with the current token text.</summary>
275  internal void GetText(Token t)
276  {
277  t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
278  }
279 
280  /// <summary> Fills TermAttribute with the current token text.</summary>
281  internal void GetText(ITermAttribute t)
282  {
283  t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
284  }
285 
286 
287  /// <summary> Creates a new scanner
288  /// There is also a java.io.InputStream version of this constructor.
289  ///
290  /// </summary>
291  /// <param name="in_Renamed"> the java.io.Reader to read input from.
292  /// </param>
293  internal StandardTokenizerImpl(System.IO.TextReader in_Renamed)
294  {
295  this.zzReader = in_Renamed;
296  }
297 
298  /// <summary> Creates a new scanner.
299  /// There is also java.io.Reader version of this constructor.
300  ///
301  /// </summary>
302  /// <param name="in_Renamed"> the java.io.Inputstream to read input from.
303  /// </param>
304  internal StandardTokenizerImpl(System.IO.Stream in_Renamed):this(new System.IO.StreamReader(in_Renamed, System.Text.Encoding.Default))
305  {
306  }
307 
308  /// <summary> Unpacks the compressed character translation table.
309  ///
310  /// </summary>
311  /// <param name="packed"> the packed character translation table
312  /// </param>
313  /// <returns> the unpacked character translation table
314  /// </returns>
315  private static char[] ZzUnpackCMap(System.String packed)
316  {
317  char[] map = new char[0x10000];
318  int i = 0; /* index in packed string */
319  int j = 0; /* index in unpacked array */
320  while (i < 1154)
321  {
322  int count = packed[i++];
323  char value_Renamed = packed[i++];
324  do
325  map[j++] = value_Renamed;
326  while (--count > 0);
327  }
328  return map;
329  }
330 
331 
332  /// <summary> Refills the input buffer.
333  /// </summary>
334  /// <returns><c>false</c>, iff there was new input.
335  ///
336  /// </returns>
337  /// <exception cref="System.IO.IOException"> if any I/O-Error occurs
338  /// </exception>
339  private bool ZzRefill()
340  {
341 
342  /* first: make room (if you can) */
343  if (zzStartRead > 0)
344  {
345  Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
346 
347  /* translate stored positions */
348  zzEndRead -= zzStartRead;
349  zzCurrentPos -= zzStartRead;
350  zzMarkedPos -= zzStartRead;
351  zzPushbackPos -= zzStartRead;
352  zzStartRead = 0;
353  }
354 
355  /* is the buffer big enough? */
356  if (zzCurrentPos >= zzBuffer.Length)
357  {
358  /* if not: blow it up */
359  char[] newBuffer = new char[zzCurrentPos * 2];
360  Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length);
361  zzBuffer = newBuffer;
362  }
363 
364  /* finally: fill the buffer with new input */
365  int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
366 
367  if (numRead <= 0)
368  {
369  return true;
370  }
371  else
372  {
373  zzEndRead += numRead;
374  return false;
375  }
376  }
377 
378 
379  /// <summary> Closes the input stream.</summary>
380  public void Yyclose()
381  {
382  zzAtEOF = true; /* indicate end of file */
383  zzEndRead = zzStartRead; /* invalidate buffer */
384 
385  if (zzReader != null)
386  zzReader.Close();
387  }
388 
389 
390  /// <summary> Resets the scanner to read from a new input stream.
391  /// Does not close the old reader.
392  ///
393  /// All internal variables are reset, the old input stream
394  /// <b>cannot</b> be reused (internal buffer is discarded and lost).
395  /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
396  ///
397  /// </summary>
398  /// <param name="reader"> the new input stream
399  /// </param>
400  public void Yyreset(System.IO.TextReader reader)
401  {
402  zzReader = reader;
403  zzAtBOL = true;
404  zzAtEOF = false;
405  zzEndRead = zzStartRead = 0;
406  zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
407  yyline = yychar = yycolumn = 0;
408  zzLexicalState = YYINITIAL;
409  }
410 
411 
412  /// <summary> Returns the current lexical state.</summary>
413  public int Yystate()
414  {
415  return zzLexicalState;
416  }
417 
418 
419  /// <summary> Enters a new lexical state
420  ///
421  /// </summary>
422  /// <param name="newState">the new lexical state
423  /// </param>
424  public void Yybegin(int newState)
425  {
426  zzLexicalState = newState;
427  }
428 
429 
430  /// <summary> Returns the text matched by the current regular expression.</summary>
431  public System.String Yytext()
432  {
433  return new System.String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
434  }
435 
436 
437  /// <summary> Returns the character at position <tt>pos</tt> from the
438  /// matched text.
439  ///
440  /// It is equivalent to yytext().charAt(pos), but faster
441  ///
442  /// </summary>
443  /// <param name="pos">the position of the character to fetch.
444  /// A value from 0 to yylength()-1.
445  ///
446  /// </param>
447  /// <returns> the character at position pos
448  /// </returns>
449  public char Yycharat(int pos)
450  {
451  return zzBuffer[zzStartRead + pos];
452  }
453 
454 
455  /// <summary> Returns the length of the matched text region.</summary>
456  public int Yylength()
457  {
458  return zzMarkedPos - zzStartRead;
459  }
460 
461 
462  /// <summary> Reports an error that occured while scanning.
463  ///
464  /// In a wellformed scanner (no or only correct usage of
465  /// yypushback(int) and a match-all fallback rule) this method
466  /// will only be called with things that "Can't Possibly Happen".
467  /// If this method is called, something is seriously wrong
468  /// (e.g. a JFlex bug producing a faulty scanner etc.).
469  ///
470  /// Usual syntax/scanner level error handling should be done
471  /// in error fallback rules.
472  ///
473  /// </summary>
474  /// <param name="errorCode"> the code of the errormessage to display
475  /// </param>
476  private void ZzScanError(int errorCode)
477  {
478  System.String message;
479  try
480  {
481  message = ZZ_ERROR_MSG[errorCode];
482  }
483  catch (System.IndexOutOfRangeException)
484  {
485  message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
486  }
487 
488  throw new System.ApplicationException(message);
489  }
490 
491 
492  /// <summary> Pushes the specified amount of characters back into the input stream.
493  ///
494  /// They will be read again by then next call of the scanning method
495  ///
496  /// </summary>
497  /// <param name="number"> the number of characters to be read again.
498  /// This number must not be greater than yylength()!
499  /// </param>
500  public virtual void Yypushback(int number)
501  {
502  if (number > Yylength())
503  ZzScanError(ZZ_PUSHBACK_2BIG);
504 
505  zzMarkedPos -= number;
506  }
507 
508 
509  /// <summary> Resumes scanning until the next regular expression is matched,
510  /// the end of input is encountered or an I/O-Error occurs.
511  ///
512  /// </summary>
513  /// <returns> the next token
514  /// </returns>
515  /// <exception cref="System.IO.IOException"> if any I/O-Error occurs
516  /// </exception>
517  public virtual int GetNextToken()
518  {
519  int zzInput;
520  int zzAction;
521 
522  // cached fields:
523  int zzCurrentPosL;
524  int zzMarkedPosL;
525  int zzEndReadL = zzEndRead;
526  char[] zzBufferL = zzBuffer;
527  char[] zzCMapL = ZZ_CMAP;
528 
529  int[] zzTransL = ZZ_TRANS;
530  int[] zzRowMapL = ZZ_ROWMAP;
531  int[] zzAttrL = ZZ_ATTRIBUTE;
532 
533  while (true)
534  {
535  zzMarkedPosL = zzMarkedPos;
536 
537  yychar += zzMarkedPosL - zzStartRead;
538 
539  zzAction = - 1;
540 
541  zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
542 
543  zzState = zzLexicalState;
544 
545 
546  {
547  while (true)
548  {
549 
550  if (zzCurrentPosL < zzEndReadL)
551  zzInput = zzBufferL[zzCurrentPosL++];
552  else if (zzAtEOF)
553  {
554  zzInput = YYEOF;
555  goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
556  }
557  else
558  {
559  // store back cached positions
560  zzCurrentPos = zzCurrentPosL;
561  zzMarkedPos = zzMarkedPosL;
562  bool eof = ZzRefill();
563  // get translated positions and possibly new buffer
564  zzCurrentPosL = zzCurrentPos;
565  zzMarkedPosL = zzMarkedPos;
566  zzBufferL = zzBuffer;
567  zzEndReadL = zzEndRead;
568  if (eof)
569  {
570  zzInput = YYEOF;
571  goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
572  }
573  else
574  {
575  zzInput = zzBufferL[zzCurrentPosL++];
576  }
577  }
578  int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
579  if (zzNext == - 1)
580  {
581  goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
582  }
583  zzState = zzNext;
584 
585  int zzAttributes = zzAttrL[zzState];
586  if ((zzAttributes & 1) == 1)
587  {
588  zzAction = zzState;
589  zzMarkedPosL = zzCurrentPosL;
590  if ((zzAttributes & 8) == 8)
591  {
592  goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
593  }
594  }
595  }
596  }
597 
598 zzForAction_brk: ; // {{Aroush-2.9}} this 'lable' maybe in the wrong place
599 
600 
601  // store back cached position
602  zzMarkedPos = zzMarkedPosL;
603 
604  switch (zzAction < 0?zzAction:ZZ_ACTION[zzAction])
605  {
606 
607  case 4:
608  {
609  return HOST;
610  }
611 
612  case 11: break;
613 
614  case 9:
615  {
616  return ACRONYM;
617  }
618 
619  case 12: break;
620 
621  case 8:
622  {
623  return ACRONYM_DEP;
624  }
625 
626  case 13: break;
627 
628  case 1:
629  {
630  /* ignore */
631  }
632  goto case 14;
633 
634  case 14: break;
635 
636  case 5:
637  {
638  return NUM;
639  }
640 
641  case 15: break;
642 
643  case 3:
644  {
645  return CJ;
646  }
647 
648  case 16: break;
649 
650  case 2:
651  {
652  return ALPHANUM;
653  }
654 
655  case 17: break;
656 
657  case 7:
658  {
659  return COMPANY;
660  }
661 
662  case 18: break;
663 
664  case 6:
665  {
666  return APOSTROPHE;
667  }
668 
669  case 19: break;
670 
671  case 10:
672  {
673  return EMAIL;
674  }
675 
676  case 20: break;
677 
678  default:
679  if (zzInput == YYEOF && zzStartRead == zzCurrentPos)
680  {
681  zzAtEOF = true;
682  return YYEOF;
683  }
684  else
685  {
686  ZzScanError(ZZ_NO_MATCH);
687  }
688  break;
689 
690  }
691  }
692  }
693  static StandardTokenizerImpl()
694  {
695  ALPHANUM = StandardTokenizer.ALPHANUM;
696  APOSTROPHE = StandardTokenizer.APOSTROPHE;
697  ACRONYM = StandardTokenizer.ACRONYM;
698  COMPANY = StandardTokenizer.COMPANY;
699  EMAIL = StandardTokenizer.EMAIL;
700  HOST = StandardTokenizer.HOST;
701  NUM = StandardTokenizer.NUM;
702  CJ = StandardTokenizer.CJ;
703  ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
704  TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
705  }
706  }
707 }