23 using System.Collections.Generic;
25 using System.Collections;
26 using System.Globalization;
28 using Lucene.Net.Analysis;
29 using Lucene.Net.Analysis.Tokenattributes;
31 namespace Lucene.Net.Analysis.Cn
51 public static String[] STOP_WORDS =
53 "and",
"are",
"as",
"at",
"be",
"but",
"by",
54 "for",
"if",
"in",
"into",
"is",
"it",
55 "no",
"not",
"of",
"on",
"or",
"such",
56 "that",
"the",
"their",
"then",
"there",
"these",
57 "they",
"this",
"to",
"was",
"will",
"with"
66 stopTable =
new CharArraySet((IEnumerable<string>)STOP_WORDS,
false);
67 termAtt = AddAttribute<ITermAttribute>();
70 public override bool IncrementToken()
72 while (input.IncrementToken())
74 char[] text = termAtt.TermBuffer();
75 int termLength = termAtt.TermLength();
78 if (!stopTable.Contains(text, 0, termLength))
80 switch (
char.GetUnicodeCategory(text[0]))
82 case UnicodeCategory.LowercaseLetter:
83 case UnicodeCategory.UppercaseLetter:
90 case UnicodeCategory.OtherLetter: