19 using Lucene.Net.Analysis.Tokenattributes;
20 using Lucene.Net.Util;
22 using Token = Lucene.Net.Analysis.Token;
27 namespace Lucene.Net.Analysis.Standard
58 private void InitBlock()
65 public const int ALPHANUM = 0;
66 public const int APOSTROPHE = 1;
67 public const int ACRONYM = 2;
68 public const int COMPANY = 3;
69 public const int EMAIL = 4;
70 public const int HOST = 5;
71 public const int NUM = 6;
72 public const int CJ = 7;
77 [Obsolete(
"this solves a bug where HOSTs that end with '.' are identified as ACRONYMs.")]
78 public const int ACRONYM_DEP = 8;
81 public static readonly System.String[] TOKEN_TYPES =
new System.String[]{
"<ALPHANUM>",
"<APOSTROPHE>",
"<ACRONYM>",
"<COMPANY>",
"<EMAIL>",
"<HOST>",
"<NUM>",
"<CJ>",
"<ACRONYM_DEP>"};
83 private bool replaceInvalidAcronym;
85 private int maxTokenLength;
90 public int MaxTokenLength
92 get {
return maxTokenLength; }
93 set { this.maxTokenLength = value; }
110 Init(input, matchVersion);
118 Init(input, matchVersion);
128 Init(input, matchVersion);
131 private void Init(System.IO.TextReader input,
Version matchVersion)
133 if (matchVersion.OnOrAfter(
Version.LUCENE_24))
135 replaceInvalidAcronym =
true;
139 replaceInvalidAcronym =
false;
142 termAtt = AddAttribute<ITermAttribute>();
143 offsetAtt = AddAttribute<IOffsetAttribute>();
144 posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
145 typeAtt = AddAttribute<ITypeAttribute>();
159 public override bool IncrementToken()
166 int tokenType = scanner.GetNextToken();
173 if (scanner.Yylength() <= maxTokenLength)
175 posIncrAtt.PositionIncrement = posIncr;
176 scanner.GetText(termAtt);
177 int start = scanner.Yychar();
178 offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength()));
184 if (replaceInvalidAcronym)
187 termAtt.SetTermLength(termAtt.TermLength() - 1);
207 public override void End()
210 int finalOffset = CorrectOffset(scanner.Yychar() + scanner.Yylength());
211 offsetAtt.SetOffset(finalOffset, finalOffset);
214 public override void Reset(System.IO.TextReader reader)
217 scanner.Reset(reader);
226 [Obsolete(
"Remove in 3.X and make true the only valid value. See https://issues.apache.org/jira/browse/LUCENE-1068")]
227 public void SetReplaceInvalidAcronym(
bool replaceInvalidAcronym)
229 this.replaceInvalidAcronym = replaceInvalidAcronym;