25 using System.Collections;
26 using System.Globalization;
28 using Lucene.Net.Analysis;
29 using Lucene.Net.Analysis.Tokenattributes;
30 using Lucene.Net.Util;
32 namespace Lucene.Net.Analysis.Cn
80 termAtt = AddAttribute<ITermAttribute>();
81 offsetAtt = AddAttribute<IOffsetAttribute>();
84 private int offset = 0, bufferIndex = 0, dataLen = 0;
85 private static readonly
int MAX_WORD_LEN = 255;
86 private static readonly
int IO_BUFFER_SIZE = 1024;
87 private readonly
char[] buffer =
new char[MAX_WORD_LEN];
88 private readonly
char[] ioBuffer =
new char[IO_BUFFER_SIZE];
96 private void Push(
char c)
98 if (length == 0) start = offset - 1;
99 buffer[length++] = Char.ToLower(c);
107 termAtt.SetTermBuffer(buffer, 0, length);
108 offsetAtt.
SetOffset(CorrectOffset(start), CorrectOffset(start + length));
116 public override bool IncrementToken()
130 if (bufferIndex >= dataLen)
132 dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
142 c = ioBuffer[bufferIndex++];
145 switch (
char.GetUnicodeCategory(c))
148 case UnicodeCategory.DecimalDigitNumber:
149 case UnicodeCategory.LowercaseLetter:
150 case UnicodeCategory.UppercaseLetter:
152 if (length == MAX_WORD_LEN)
return Flush();
155 case UnicodeCategory.OtherLetter:
166 if (length > 0)
return Flush();
172 public override sealed
void End()
175 int finalOffset = CorrectOffset(offset);
176 this.offsetAtt.SetOffset(finalOffset, finalOffset);
179 public override void Reset()
182 offset = bufferIndex = dataLen = 0;
185 public override void Reset(TextReader input)