Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
Token.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Analysis.Tokenattributes;
20 using Lucene.Net.Support;
21 using Lucene.Net.Util;
22 using Payload = Lucene.Net.Index.Payload;
23 using TermPositions = Lucene.Net.Index.TermPositions;
24 using ArrayUtil = Lucene.Net.Util.ArrayUtil;
25 using Attribute = Lucene.Net.Util.Attribute;
26 
27 namespace Lucene.Net.Analysis
28 {
29 
30  /// <summary>A Token is an occurrence of a term from the text of a field. It consists of
31  /// a term's text, the start and end offset of the term in the text of the field,
32  /// and a type string.
33  /// <p/>
34  /// The start and end offsets permit applications to re-associate a token with
35  /// its source text, e.g., to display highlighted query terms in a document
36  /// browser, or to show matching text fragments in a <abbr
37  /// title="KeyWord In Context">KWIC</abbr> display, etc.
38  /// <p/>
39  /// The type is a string, assigned by a lexical analyzer
40  /// (a.k.a. tokenizer), naming the lexical or syntactic class that the token
41  /// belongs to. For example an end of sentence marker token might be implemented
42  /// with type "eos". The default token type is "word".
43  /// <p/>
44  /// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
45  /// length byte array. Use <see cref="TermPositions.PayloadLength" /> and
46  /// <see cref="TermPositions.GetPayload(byte[], int)" /> to retrieve the payloads from the index.
47  /// </summary>
48  /// <summary><br/><br/>
49  /// </summary>
50  /// <summary><p/><b>NOTE:</b> As of 2.9, Token implements all <see cref="IAttribute" /> interfaces
51  /// that are part of core Lucene and can be found in the <see cref="Lucene.Net.Analysis.Tokenattributes"/> namespace.
52  /// Even though it is not necessary to use Token anymore, with the new TokenStream API it can
53  /// be used as convenience class that implements all <see cref="IAttribute" />s, which is especially useful
54  /// to easily switch from the old to the new TokenStream API.
55  /// <br/><br/>
56  /// <p/>Tokenizers and TokenFilters should try to re-use a Token instance when
57  /// possible for best performance, by implementing the
58  /// <see cref="TokenStream.IncrementToken()" /> API.
59  /// Failing that, to create a new Token you should first use
60  /// one of the constructors that starts with null text. To load
61  /// the token from a char[] use <see cref="SetTermBuffer(char[], int, int)" />.
62  /// To load from a String use <see cref="SetTermBuffer(String)" /> or <see cref="SetTermBuffer(String, int, int)" />.
63  /// Alternatively you can get the Token's termBuffer by calling either <see cref="TermBuffer()" />,
64  /// if you know that your text is shorter than the capacity of the termBuffer
65  /// or <see cref="ResizeTermBuffer(int)" />, if there is any possibility
66  /// that you may need to grow the buffer. Fill in the characters of your term into this
67  /// buffer, with <see cref="string.ToCharArray(int, int)" /> if loading from a string,
68  /// or with <see cref="Array.Copy(Array, long, Array, long, long)" />, and finally call <see cref="SetTermLength(int)" /> to
69  /// set the length of the term text. See <a target="_top"
70  /// href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
71  /// for details.<p/>
72  /// <p/>Typical Token reuse patterns:
73  /// <list type="bullet">
74  /// <item> Copying text from a string (type is reset to <see cref="DEFAULT_TYPE" /> if not
75  /// specified):<br/>
76  /// <code>
77  /// return reusableToken.reinit(string, startOffset, endOffset[, type]);
78  /// </code>
79  /// </item>
80  /// <item> Copying some text from a string (type is reset to <see cref="DEFAULT_TYPE" />
81  /// if not specified):<br/>
82  /// <code>
83  /// return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
84  /// </code>
85  /// </item>
86  /// <item> Copying text from char[] buffer (type is reset to <see cref="DEFAULT_TYPE" />
87  /// if not specified):<br/>
88  /// <code>
89  /// return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
90  /// </code>
91  /// </item>
92  /// <item> Copying some text from a char[] buffer (type is reset to
93  /// <see cref="DEFAULT_TYPE" /> if not specified):<br/>
94  /// <code>
95  /// return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
96  /// </code>
97  /// </item>
98  /// <item> Copying from one one Token to another (type is reset to
99  /// <see cref="DEFAULT_TYPE" /> if not specified):<br/>
100  /// <code>
101  /// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
102  /// </code>
103  /// </item>
104  /// </list>
105  /// A few things to note:
106  /// <list type="bullet">
107  /// <item>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</item>
108  /// <item>Because <c>TokenStreams</c> can be chained, one cannot assume that the <c>Token's</c> current type is correct.</item>
109  /// <item>The startOffset and endOffset represent the start and offset in the
110  /// source text, so be careful in adjusting them.</item>
111  /// <item>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</item>
112  /// </list>
113  /// <p/>
114  /// </summary>
115  /// <seealso cref="Lucene.Net.Index.Payload">
116  /// </seealso>
117  [Serializable]
119  {
120  public const String DEFAULT_TYPE = "word";
121 
122  private const int MIN_BUFFER_SIZE = 10;
123 
124  private char[] termBuffer;
125  private int termLength;
126  private int startOffset, endOffset;
127  private string type = DEFAULT_TYPE;
128  private int flags;
129  private Payload payload;
130  private int positionIncrement = 1;
131 
132  /// <summary>Constructs a Token will null text. </summary>
133  public Token()
134  {
135  }
136 
137  /// <summary>Constructs a Token with null text and start &amp; end
138  /// offsets.
139  /// </summary>
140  /// <param name="start">start offset in the source text</param>
141  /// <param name="end">end offset in the source text</param>
142  public Token(int start, int end)
143  {
144  startOffset = start;
145  endOffset = end;
146  }
147 
148  /// <summary>Constructs a Token with null text and start &amp; end
149  /// offsets plus the Token type.
150  /// </summary>
151  /// <param name="start">start offset in the source text</param>
152  /// <param name="end">end offset in the source text</param>
153  /// <param name="typ">the lexical type of this Token</param>
154  public Token(int start, int end, String typ)
155  {
156  startOffset = start;
157  endOffset = end;
158  type = typ;
159  }
160 
161  /// <summary> Constructs a Token with null text and start &amp; end
162  /// offsets plus flags. NOTE: flags is EXPERIMENTAL.
163  /// </summary>
164  /// <param name="start">start offset in the source text</param>
165  /// <param name="end">end offset in the source text</param>
166  /// <param name="flags">The bits to set for this token</param>
167  public Token(int start, int end, int flags)
168  {
169  startOffset = start;
170  endOffset = end;
171  this.flags = flags;
172  }
173 
174  /// <summary>Constructs a Token with the given term text, and start
175  /// &amp; end offsets. The type defaults to "word."
176  /// <b>NOTE:</b> for better indexing speed you should
177  /// instead use the char[] termBuffer methods to set the
178  /// term text.
179  /// </summary>
180  /// <param name="text">term text</param>
181  /// <param name="start">start offset</param>
182  /// <param name="end">end offset</param>
183  public Token(String text, int start, int end)
184  {
185  SetTermBuffer(text);
186  startOffset = start;
187  endOffset = end;
188  }
189 
190  /// <summary>Constructs a Token with the given text, start and end
191  /// offsets, &amp; type. <b>NOTE:</b> for better indexing
192  /// speed you should instead use the char[] termBuffer
193  /// methods to set the term text.
194  /// </summary>
195  /// <param name="text">term text</param>
196  /// <param name="start">start offset</param>
197  /// <param name="end">end offset</param>
198  /// <param name="typ">token type</param>
199  public Token(System.String text, int start, int end, System.String typ)
200  {
201  SetTermBuffer(text);
202  startOffset = start;
203  endOffset = end;
204  type = typ;
205  }
206 
207  /// <summary> Constructs a Token with the given text, start and end
208  /// offsets, &amp; type. <b>NOTE:</b> for better indexing
209  /// speed you should instead use the char[] termBuffer
210  /// methods to set the term text.
211  /// </summary>
212  /// <param name="text"></param>
213  /// <param name="start"></param>
214  /// <param name="end"></param>
215  /// <param name="flags">token type bits</param>
216  public Token(System.String text, int start, int end, int flags)
217  {
218  SetTermBuffer(text);
219  startOffset = start;
220  endOffset = end;
221  this.flags = flags;
222  }
223 
224  /// <summary> Constructs a Token with the given term buffer (offset
225  /// &amp; length), start and end
226  /// offsets
227  /// </summary>
228  /// <param name="startTermBuffer"></param>
229  /// <param name="termBufferOffset"></param>
230  /// <param name="termBufferLength"></param>
231  /// <param name="start"></param>
232  /// <param name="end"></param>
233  public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end)
234  {
235  SetTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
236  startOffset = start;
237  endOffset = end;
238  }
239 
240  /// <summary>Set the position increment. This determines the position of this token
241  /// relative to the previous Token in a <see cref="TokenStream" />, used in phrase
242  /// searching.
243  ///
244  /// <p/>The default value is one.
245  ///
246  /// <p/>Some common uses for this are:<list>
247  ///
248  /// <item>Set it to zero to put multiple terms in the same position. This is
249  /// useful if, e.g., a word has multiple stems. Searches for phrases
250  /// including either stem will match. In this case, all but the first stem's
251  /// increment should be set to zero: the increment of the first instance
252  /// should be one. Repeating a token with an increment of zero can also be
253  /// used to boost the scores of matches on that token.</item>
254  ///
255  /// <item>Set it to values greater than one to inhibit exact phrase matches.
256  /// If, for example, one does not want phrases to match across removed stop
257  /// words, then one could build a stop word filter that removes stop words and
258  /// also sets the increment to the number of stop words removed before each
259  /// non-stop word. Then exact phrase queries will only match when the terms
260  /// occur with no intervening stop words.</item>
261  ///
262  /// </list>
263  /// </summary>
264  /// <value> the distance from the prior term </value>
265  /// <seealso cref="Lucene.Net.Index.TermPositions">
266  /// </seealso>
267  public virtual int PositionIncrement
268  {
269  set
270  {
271  if (value < 0)
272  throw new System.ArgumentException("Increment must be zero or greater: " + value);
273  this.positionIncrement = value;
274  }
275  get { return positionIncrement; }
276  }
277 
278  /// <summary>Returns the Token's term text.
279  ///
280  /// This method has a performance penalty
281  /// because the text is stored internally in a char[]. If
282  /// possible, use <see cref="TermBuffer()" /> and <see cref="TermLength()"/>
283  /// directly instead. If you really need a
284  /// String, use this method, which is nothing more than
285  /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
286  /// </summary>
287  public string Term
288  {
289  get
290  {
291  InitTermBuffer();
292  return new System.String(termBuffer, 0, termLength);
293  }
294  }
295 
296  /// <summary>Copies the contents of buffer, starting at offset for
297  /// length characters, into the termBuffer array.
298  /// </summary>
299  /// <param name="buffer">the buffer to copy</param>
300  /// <param name="offset">the index in the buffer of the first character to copy</param>
301  /// <param name="length">the number of characters to copy</param>
302  public void SetTermBuffer(char[] buffer, int offset, int length)
303  {
304  GrowTermBuffer(length);
305  Array.Copy(buffer, offset, termBuffer, 0, length);
306  termLength = length;
307  }
308 
309  /// <summary>Copies the contents of buffer into the termBuffer array.</summary>
310  /// <param name="buffer">the buffer to copy
311  /// </param>
312  public void SetTermBuffer(System.String buffer)
313  {
314  int length = buffer.Length;
315  GrowTermBuffer(length);
316  TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
317  termLength = length;
318  }
319 
320  /// <summary>Copies the contents of buffer, starting at offset and continuing
321  /// for length characters, into the termBuffer array.
322  /// </summary>
323  /// <param name="buffer">the buffer to copy
324  /// </param>
325  /// <param name="offset">the index in the buffer of the first character to copy
326  /// </param>
327  /// <param name="length">the number of characters to copy
328  /// </param>
329  public void SetTermBuffer(System.String buffer, int offset, int length)
330  {
331  System.Diagnostics.Debug.Assert(offset <= buffer.Length);
332  System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
333  GrowTermBuffer(length);
334  TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
335  termLength = length;
336  }
337 
338  /// <summary>Returns the internal termBuffer character array which
339  /// you can then directly alter. If the array is too
340  /// small for your token, use <see cref="ResizeTermBuffer(int)" />
341  /// to increase it. After
342  /// altering the buffer be sure to call <see cref="SetTermLength" />
343  /// to record the number of valid
344  /// characters that were placed into the termBuffer.
345  /// </summary>
346  public char[] TermBuffer()
347  {
348  InitTermBuffer();
349  return termBuffer;
350  }
351 
352  /// <summary>Grows the termBuffer to at least size newSize, preserving the
353  /// existing content. Note: If the next operation is to change
354  /// the contents of the term buffer use
355  /// <see cref="SetTermBuffer(char[], int, int)" />,
356  /// <see cref="SetTermBuffer(String)" />, or
357  /// <see cref="SetTermBuffer(String, int, int)" />
358  /// to optimally combine the resize with the setting of the termBuffer.
359  /// </summary>
360  /// <param name="newSize">minimum size of the new termBuffer
361  /// </param>
362  /// <returns> newly created termBuffer with length >= newSize
363  /// </returns>
364  public virtual char[] ResizeTermBuffer(int newSize)
365  {
366  if (termBuffer == null)
367  {
368  termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];
369  }
370  else
371  {
372  if (termBuffer.Length < newSize)
373  {
374  // Not big enough; create a new array with slight
375  // over allocation and preserve content
376  var newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
377  Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
378  termBuffer = newCharBuffer;
379  }
380  }
381  return termBuffer;
382  }
383 
384  /// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content.
385  /// its always used in places that set the content
386  /// </summary>
387  /// <param name="newSize">minimum size of the buffer
388  /// </param>
389  private void GrowTermBuffer(int newSize)
390  {
391  if (termBuffer == null)
392  {
393  // The buffer is always at least MIN_BUFFER_SIZE
394  termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
395  }
396  else
397  {
398  if (termBuffer.Length < newSize)
399  {
400  // Not big enough; create a new array with slight
401  // over allocation:
402  termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
403  }
404  }
405  }
406 
407  private void InitTermBuffer()
408  {
409  if (termBuffer == null)
410  {
411  termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
412  termLength = 0;
413  }
414  }
415 
416  /// <summary>Return number of valid characters (length of the term)
417  /// in the termBuffer array.
418  /// </summary>
419  public int TermLength()
420  {
421  InitTermBuffer();
422  return termLength;
423  }
424 
425  /// <summary>Set number of valid characters (length of the term) in
426  /// the termBuffer array. Use this to truncate the termBuffer
427  /// or to synchronize with external manipulation of the termBuffer.
428  /// Note: to grow the size of the array,
429  /// use <see cref="ResizeTermBuffer(int)" /> first.
430  /// </summary>
431  /// <param name="length">the truncated length
432  /// </param>
433  public void SetTermLength(int length)
434  {
435  InitTermBuffer();
436  if (length > termBuffer.Length)
437  throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
438  termLength = length;
439  }
440 
441  /// <summary>Gets or sets this Token's starting offset, the position of the first character
442  /// corresponding to this token in the source text.
443  /// Note that the difference between endOffset() and startOffset() may not be
444  /// equal to <see cref="TermLength"/>, as the term text may have been altered by a
445  /// stemmer or some other filter.
446  /// </summary>
447  public virtual int StartOffset
448  {
449  get { return startOffset; }
450  set { this.startOffset = value; }
451  }
452 
453  /// <summary>Gets or sets this Token's ending offset, one greater than the position of the
454  /// last character corresponding to this token in the source text. The length
455  /// of the token in the source text is (endOffset - startOffset).
456  /// </summary>
457  public virtual int EndOffset
458  {
459  get { return endOffset; }
460  set { this.endOffset = value; }
461  }
462 
463  /// <summary>Set the starting and ending offset.
464  /// See StartOffset() and EndOffset()
465  /// </summary>
466  public virtual void SetOffset(int startOffset, int endOffset)
467  {
468  this.startOffset = startOffset;
469  this.endOffset = endOffset;
470  }
471 
472  /// <summary>Returns this Token's lexical type. Defaults to "word". </summary>
473  public string Type
474  {
475  get { return type; }
476  set { this.type = value; }
477  }
478 
479  /// <summary> EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
480  /// <p/>
481  ///
482  /// Get the bitset for any bits that have been set. This is completely distinct from <see cref="Type()" />, although they do share similar purposes.
483  /// The flags can be used to encode information about the token for use by other <see cref="TokenFilter"/>s.
484  ///
485  ///
486  /// </summary>
487  /// <value> The bits </value>
488  public virtual int Flags
489  {
490  get { return flags; }
491  set { flags = value; }
492  }
493 
494  /// <summary> Returns this Token's payload.</summary>
495  public virtual Payload Payload
496  {
497  get { return payload; }
498  set { payload = value; }
499  }
500 
501  public override String ToString()
502  {
503  var sb = new System.Text.StringBuilder();
504  sb.Append('(');
505  InitTermBuffer();
506  if (termBuffer == null)
507  sb.Append("null");
508  else
509  sb.Append(termBuffer, 0, termLength);
510  sb.Append(',').Append(startOffset).Append(',').Append(endOffset);
511  if (!type.Equals("word"))
512  sb.Append(",type=").Append(type);
513  if (positionIncrement != 1)
514  sb.Append(",posIncr=").Append(positionIncrement);
515  sb.Append(')');
516  return sb.ToString();
517  }
518 
519  /// <summary>Resets the term text, payload, flags, and positionIncrement,
520  /// startOffset, endOffset and token type to default.
521  /// </summary>
522  public override void Clear()
523  {
524  payload = null;
525  // Leave termBuffer to allow re-use
526  termLength = 0;
527  positionIncrement = 1;
528  flags = 0;
529  startOffset = endOffset = 0;
530  type = DEFAULT_TYPE;
531  }
532 
533  public override System.Object Clone()
534  {
535  var t = (Token) base.Clone();
536  // Do a deep clone
537  if (termBuffer != null)
538  {
539  t.termBuffer = new char[termBuffer.Length];
540  termBuffer.CopyTo(t.termBuffer, 0);
541  }
542  if (payload != null)
543  {
544  t.payload = (Payload) payload.Clone();
545  }
546  return t;
547  }
548 
549  /// <summary>Makes a clone, but replaces the term buffer &amp;
550  /// start/end offset in the process. This is more
551  /// efficient than doing a full clone (and then calling
552  /// setTermBuffer) because it saves a wasted copy of the old
553  /// termBuffer.
554  /// </summary>
555  public virtual Token Clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
556  {
557  var t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)
558  {positionIncrement = positionIncrement, flags = flags, type = type};
559  if (payload != null)
560  t.payload = (Payload) payload.Clone();
561  return t;
562  }
563 
564  public override bool Equals(Object obj)
565  {
566  if (obj == this)
567  return true;
568 
569  var other = obj as Token;
570  if (other == null)
571  return false;
572 
573  InitTermBuffer();
574  other.InitTermBuffer();
575 
576  if (termLength == other.termLength && startOffset == other.startOffset && endOffset == other.endOffset &&
577  flags == other.flags && positionIncrement == other.positionIncrement && SubEqual(type, other.type) &&
578  SubEqual(payload, other.payload))
579  {
580  for (int i = 0; i < termLength; i++)
581  if (termBuffer[i] != other.termBuffer[i])
582  return false;
583  return true;
584  }
585  return false;
586  }
587 
588  private bool SubEqual(System.Object o1, System.Object o2)
589  {
590  if (o1 == null)
591  return o2 == null;
592  return o1.Equals(o2);
593  }
594 
595  public override int GetHashCode()
596  {
597  InitTermBuffer();
598  int code = termLength;
599  code = code * 31 + startOffset;
600  code = code * 31 + endOffset;
601  code = code * 31 + flags;
602  code = code * 31 + positionIncrement;
603  code = code * 31 + type.GetHashCode();
604  code = (payload == null?code:code * 31 + payload.GetHashCode());
605  code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
606  return code;
607  }
608 
609  // like clear() but doesn't clear termBuffer/text
610  private void ClearNoTermBuffer()
611  {
612  payload = null;
613  positionIncrement = 1;
614  flags = 0;
615  startOffset = endOffset = 0;
616  type = DEFAULT_TYPE;
617  }
618 
619  /// <summary>Shorthand for calling <see cref="Clear" />,
620  /// <see cref="SetTermBuffer(char[], int, int)" />,
621  /// <see cref="StartOffset" />,
622  /// <see cref="EndOffset" />,
623  /// <see cref="Type" />
624  /// </summary>
625  /// <returns> this Token instance
626  /// </returns>
627  public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
628  {
629  ClearNoTermBuffer();
630  payload = null;
631  positionIncrement = 1;
632  SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
633  startOffset = newStartOffset;
634  endOffset = newEndOffset;
635  type = newType;
636  return this;
637  }
638 
639  /// <summary>Shorthand for calling <see cref="Clear" />,
640  /// <see cref="SetTermBuffer(char[], int, int)" />,
641  /// <see cref="StartOffset" />,
642  /// <see cref="EndOffset" />
643  /// <see cref="Type" /> on Token.DEFAULT_TYPE
644  /// </summary>
645  /// <returns> this Token instance
646  /// </returns>
647  public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
648  {
649  ClearNoTermBuffer();
650  SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
651  startOffset = newStartOffset;
652  endOffset = newEndOffset;
653  type = DEFAULT_TYPE;
654  return this;
655  }
656 
657  /// <summary>Shorthand for calling <see cref="Clear" />,
658  /// <see cref="SetTermBuffer(String)" />,
659  /// <see cref="StartOffset" />,
660  /// <see cref="EndOffset" />
661  /// <see cref="Type" />
662  /// </summary>
663  /// <returns> this Token instance
664  /// </returns>
665  public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset, System.String newType)
666  {
667  ClearNoTermBuffer();
668  SetTermBuffer(newTerm);
669  startOffset = newStartOffset;
670  endOffset = newEndOffset;
671  type = newType;
672  return this;
673  }
674 
675  /// <summary>Shorthand for calling <see cref="Clear" />,
676  /// <see cref="SetTermBuffer(String, int, int)" />,
677  /// <see cref="StartOffset" />,
678  /// <see cref="EndOffset" />
679  /// <see cref="Type" />
680  /// </summary>
681  /// <returns> this Token instance
682  /// </returns>
683  public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
684  {
685  ClearNoTermBuffer();
686  SetTermBuffer(newTerm, newTermOffset, newTermLength);
687  startOffset = newStartOffset;
688  endOffset = newEndOffset;
689  type = newType;
690  return this;
691  }
692 
693  /// <summary>Shorthand for calling <see cref="Clear" />,
694  /// <see cref="SetTermBuffer(String)" />,
695  /// <see cref="StartOffset" />,
696  /// <see cref="EndOffset" />
697  /// <see cref="Type" /> on Token.DEFAULT_TYPE
698  /// </summary>
699  /// <returns> this Token instance
700  /// </returns>
701  public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset)
702  {
703  ClearNoTermBuffer();
704  SetTermBuffer(newTerm);
705  startOffset = newStartOffset;
706  endOffset = newEndOffset;
707  type = DEFAULT_TYPE;
708  return this;
709  }
710 
711  /// <summary>Shorthand for calling <see cref="Clear" />,
712  /// <see cref="SetTermBuffer(String, int, int)" />,
713  /// <see cref="StartOffset" />,
714  /// <see cref="EndOffset" />
715  /// <see cref="Type" /> on Token.DEFAULT_TYPE
716  /// </summary>
717  /// <returns> this Token instance
718  /// </returns>
719  public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
720  {
721  ClearNoTermBuffer();
722  SetTermBuffer(newTerm, newTermOffset, newTermLength);
723  startOffset = newStartOffset;
724  endOffset = newEndOffset;
725  type = DEFAULT_TYPE;
726  return this;
727  }
728 
729  /// <summary> Copy the prototype token's fields into this one. Note: Payloads are shared.</summary>
730  /// <param name="prototype">
731  /// </param>
732  public virtual void Reinit(Token prototype)
733  {
734  prototype.InitTermBuffer();
735  SetTermBuffer(prototype.termBuffer, 0, prototype.termLength);
736  positionIncrement = prototype.positionIncrement;
737  flags = prototype.flags;
738  startOffset = prototype.startOffset;
739  endOffset = prototype.endOffset;
740  type = prototype.type;
741  payload = prototype.payload;
742  }
743 
744  /// <summary> Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.</summary>
745  /// <param name="prototype">
746  /// </param>
747  /// <param name="newTerm">
748  /// </param>
749  public virtual void Reinit(Token prototype, System.String newTerm)
750  {
751  SetTermBuffer(newTerm);
752  positionIncrement = prototype.positionIncrement;
753  flags = prototype.flags;
754  startOffset = prototype.startOffset;
755  endOffset = prototype.endOffset;
756  type = prototype.type;
757  payload = prototype.payload;
758  }
759 
760  /// <summary> Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.</summary>
761  /// <param name="prototype">
762  /// </param>
763  /// <param name="newTermBuffer">
764  /// </param>
765  /// <param name="offset">
766  /// </param>
767  /// <param name="length">
768  /// </param>
769  public virtual void Reinit(Token prototype, char[] newTermBuffer, int offset, int length)
770  {
771  SetTermBuffer(newTermBuffer, offset, length);
772  positionIncrement = prototype.positionIncrement;
773  flags = prototype.flags;
774  startOffset = prototype.startOffset;
775  endOffset = prototype.endOffset;
776  type = prototype.type;
777  payload = prototype.payload;
778  }
779 
780  public override void CopyTo(Attribute target)
781  {
782  if (target is Token)
783  {
784  var to = (Token) target;
785  to.Reinit(this);
786  // reinit shares the payload, so clone it:
787  if (payload != null)
788  {
789  to.payload = (Payload) payload.Clone();
790  }
791  }
792  else
793  {
794  InitTermBuffer();
795  ((ITermAttribute) target).SetTermBuffer(termBuffer, 0, termLength);
796  ((IOffsetAttribute) target).SetOffset(startOffset, endOffset);
797  ((IPositionIncrementAttribute) target).PositionIncrement = positionIncrement;
798  ((IPayloadAttribute) target).Payload = (payload == null)?null:(Payload) payload.Clone();
799  ((IFlagsAttribute) target).Flags = flags;
800  ((ITypeAttribute) target).Type = type;
801  }
802  }
803 
804  ///<summary>
805  /// Convenience factory that returns <c>Token</c> as implementation for the basic
806  /// attributes and return the default impl (with &quot;Impl&quot; appended) for all other
807  /// attributes.
808  /// @since 3.0
809  /// </summary>
810  public static AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
811  new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
812 
813  /// <summary>
814  /// <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
815  /// and for all other attributes calls the given delegate factory.
816  /// </summary>
817  public class TokenAttributeFactory : AttributeSource.AttributeFactory
818  {
819 
820  private readonly AttributeSource.AttributeFactory _delegateFactory;
821 
822  /// <summary>
823  /// <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
824  /// and for all other attributes calls the given delegate factory.
825  /// </summary>
826  public TokenAttributeFactory(AttributeSource.AttributeFactory delegateFactory)
827  {
828  this._delegateFactory = delegateFactory;
829  }
830 
831  public override Attribute CreateAttributeInstance<T>()
832  {
833  return typeof(T).IsAssignableFrom(typeof(Token))
834  ? new Token()
835  : _delegateFactory.CreateAttributeInstance<T>();
836  }
837 
838  public override bool Equals(Object other)
839  {
840  if (this == other) return true;
841 
842  var af = other as TokenAttributeFactory;
843  return af != null && _delegateFactory.Equals(af._delegateFactory);
844  }
845 
846  public override int GetHashCode()
847  {
848  return _delegateFactory.GetHashCode() ^ 0x0a45aa31;
849  }
850  }
851  }
852 }