Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
Token.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Analysis.Tokenattributes;
20 using Lucene.Net.Support;
21 using Lucene.Net.Util;
22 using Payload = Lucene.Net.Index.Payload;
23 using TermPositions = Lucene.Net.Index.TermPositions;
24 using ArrayUtil = Lucene.Net.Util.ArrayUtil;
25 using Attribute = Lucene.Net.Util.Attribute;
26 
27 namespace Lucene.Net.Analysis
28 {
29 
117  [Serializable]
119  {
120  public const String DEFAULT_TYPE = "word";
121 
122  private const int MIN_BUFFER_SIZE = 10;
123 
124  private char[] termBuffer;
125  private int termLength;
126  private int startOffset, endOffset;
127  private string type = DEFAULT_TYPE;
128  private int flags;
129  private Payload payload;
130  private int positionIncrement = 1;
131 
133  public Token()
134  {
135  }
136 
142  public Token(int start, int end)
143  {
144  startOffset = start;
145  endOffset = end;
146  }
147 
154  public Token(int start, int end, String typ)
155  {
156  startOffset = start;
157  endOffset = end;
158  type = typ;
159  }
160 
167  public Token(int start, int end, int flags)
168  {
169  startOffset = start;
170  endOffset = end;
171  this.flags = flags;
172  }
173 
183  public Token(String text, int start, int end)
184  {
185  SetTermBuffer(text);
186  startOffset = start;
187  endOffset = end;
188  }
189 
199  public Token(System.String text, int start, int end, System.String typ)
200  {
201  SetTermBuffer(text);
202  startOffset = start;
203  endOffset = end;
204  type = typ;
205  }
206 
216  public Token(System.String text, int start, int end, int flags)
217  {
218  SetTermBuffer(text);
219  startOffset = start;
220  endOffset = end;
221  this.flags = flags;
222  }
223 
233  public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end)
234  {
235  SetTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
236  startOffset = start;
237  endOffset = end;
238  }
239 
267  public virtual int PositionIncrement
268  {
269  set
270  {
271  if (value < 0)
272  throw new System.ArgumentException("Increment must be zero or greater: " + value);
273  this.positionIncrement = value;
274  }
275  get { return positionIncrement; }
276  }
277 
287  public string Term
288  {
289  get
290  {
291  InitTermBuffer();
292  return new System.String(termBuffer, 0, termLength);
293  }
294  }
295 
302  public void SetTermBuffer(char[] buffer, int offset, int length)
303  {
304  GrowTermBuffer(length);
305  Array.Copy(buffer, offset, termBuffer, 0, length);
306  termLength = length;
307  }
308 
312  public void SetTermBuffer(System.String buffer)
313  {
314  int length = buffer.Length;
315  GrowTermBuffer(length);
316  TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
317  termLength = length;
318  }
319 
329  public void SetTermBuffer(System.String buffer, int offset, int length)
330  {
331  System.Diagnostics.Debug.Assert(offset <= buffer.Length);
332  System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
333  GrowTermBuffer(length);
334  TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
335  termLength = length;
336  }
337 
346  public char[] TermBuffer()
347  {
348  InitTermBuffer();
349  return termBuffer;
350  }
351 
364  public virtual char[] ResizeTermBuffer(int newSize)
365  {
366  if (termBuffer == null)
367  {
368  termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];
369  }
370  else
371  {
372  if (termBuffer.Length < newSize)
373  {
374  // Not big enough; create a new array with slight
375  // over allocation and preserve content
376  var newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
377  Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
378  termBuffer = newCharBuffer;
379  }
380  }
381  return termBuffer;
382  }
383 
389  private void GrowTermBuffer(int newSize)
390  {
391  if (termBuffer == null)
392  {
393  // The buffer is always at least MIN_BUFFER_SIZE
394  termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
395  }
396  else
397  {
398  if (termBuffer.Length < newSize)
399  {
400  // Not big enough; create a new array with slight
401  // over allocation:
402  termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
403  }
404  }
405  }
406 
407  private void InitTermBuffer()
408  {
409  if (termBuffer == null)
410  {
411  termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
412  termLength = 0;
413  }
414  }
415 
419  public int TermLength()
420  {
421  InitTermBuffer();
422  return termLength;
423  }
424 
433  public void SetTermLength(int length)
434  {
435  InitTermBuffer();
436  if (length > termBuffer.Length)
437  throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
438  termLength = length;
439  }
440 
447  public virtual int StartOffset
448  {
449  get { return startOffset; }
450  set { this.startOffset = value; }
451  }
452 
457  public virtual int EndOffset
458  {
459  get { return endOffset; }
460  set { this.endOffset = value; }
461  }
462 
466  public virtual void SetOffset(int startOffset, int endOffset)
467  {
468  this.startOffset = startOffset;
469  this.endOffset = endOffset;
470  }
471 
473  public string Type
474  {
475  get { return type; }
476  set { this.type = value; }
477  }
478 
488  public virtual int Flags
489  {
490  get { return flags; }
491  set { flags = value; }
492  }
493 
495  public virtual Payload Payload
496  {
497  get { return payload; }
498  set { payload = value; }
499  }
500 
501  public override String ToString()
502  {
503  var sb = new System.Text.StringBuilder();
504  sb.Append('(');
505  InitTermBuffer();
506  if (termBuffer == null)
507  sb.Append("null");
508  else
509  sb.Append(termBuffer, 0, termLength);
510  sb.Append(',').Append(startOffset).Append(',').Append(endOffset);
511  if (!type.Equals("word"))
512  sb.Append(",type=").Append(type);
513  if (positionIncrement != 1)
514  sb.Append(",posIncr=").Append(positionIncrement);
515  sb.Append(')');
516  return sb.ToString();
517  }
518 
522  public override void Clear()
523  {
524  payload = null;
525  // Leave termBuffer to allow re-use
526  termLength = 0;
527  positionIncrement = 1;
528  flags = 0;
529  startOffset = endOffset = 0;
530  type = DEFAULT_TYPE;
531  }
532 
533  public override System.Object Clone()
534  {
535  var t = (Token) base.Clone();
536  // Do a deep clone
537  if (termBuffer != null)
538  {
539  t.termBuffer = new char[termBuffer.Length];
540  termBuffer.CopyTo(t.termBuffer, 0);
541  }
542  if (payload != null)
543  {
544  t.payload = (Payload) payload.Clone();
545  }
546  return t;
547  }
548 
555  public virtual Token Clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
556  {
557  var t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)
558  {positionIncrement = positionIncrement, flags = flags, type = type};
559  if (payload != null)
560  t.payload = (Payload) payload.Clone();
561  return t;
562  }
563 
564  public override bool Equals(Object obj)
565  {
566  if (obj == this)
567  return true;
568 
569  var other = obj as Token;
570  if (other == null)
571  return false;
572 
573  InitTermBuffer();
574  other.InitTermBuffer();
575 
576  if (termLength == other.termLength && startOffset == other.startOffset && endOffset == other.endOffset &&
577  flags == other.flags && positionIncrement == other.positionIncrement && SubEqual(type, other.type) &&
578  SubEqual(payload, other.payload))
579  {
580  for (int i = 0; i < termLength; i++)
581  if (termBuffer[i] != other.termBuffer[i])
582  return false;
583  return true;
584  }
585  return false;
586  }
587 
588  private bool SubEqual(System.Object o1, System.Object o2)
589  {
590  if (o1 == null)
591  return o2 == null;
592  return o1.Equals(o2);
593  }
594 
595  public override int GetHashCode()
596  {
597  InitTermBuffer();
598  int code = termLength;
599  code = code * 31 + startOffset;
600  code = code * 31 + endOffset;
601  code = code * 31 + flags;
602  code = code * 31 + positionIncrement;
603  code = code * 31 + type.GetHashCode();
604  code = (payload == null?code:code * 31 + payload.GetHashCode());
605  code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
606  return code;
607  }
608 
609  // like clear() but doesn't clear termBuffer/text
610  private void ClearNoTermBuffer()
611  {
612  payload = null;
613  positionIncrement = 1;
614  flags = 0;
615  startOffset = endOffset = 0;
616  type = DEFAULT_TYPE;
617  }
618 
627  public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
628  {
629  ClearNoTermBuffer();
630  payload = null;
631  positionIncrement = 1;
632  SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
633  startOffset = newStartOffset;
634  endOffset = newEndOffset;
635  type = newType;
636  return this;
637  }
638 
647  public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
648  {
649  ClearNoTermBuffer();
650  SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
651  startOffset = newStartOffset;
652  endOffset = newEndOffset;
653  type = DEFAULT_TYPE;
654  return this;
655  }
656 
665  public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset, System.String newType)
666  {
667  ClearNoTermBuffer();
668  SetTermBuffer(newTerm);
669  startOffset = newStartOffset;
670  endOffset = newEndOffset;
671  type = newType;
672  return this;
673  }
674 
683  public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
684  {
685  ClearNoTermBuffer();
686  SetTermBuffer(newTerm, newTermOffset, newTermLength);
687  startOffset = newStartOffset;
688  endOffset = newEndOffset;
689  type = newType;
690  return this;
691  }
692 
701  public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset)
702  {
703  ClearNoTermBuffer();
704  SetTermBuffer(newTerm);
705  startOffset = newStartOffset;
706  endOffset = newEndOffset;
707  type = DEFAULT_TYPE;
708  return this;
709  }
710 
719  public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
720  {
721  ClearNoTermBuffer();
722  SetTermBuffer(newTerm, newTermOffset, newTermLength);
723  startOffset = newStartOffset;
724  endOffset = newEndOffset;
725  type = DEFAULT_TYPE;
726  return this;
727  }
728 
732  public virtual void Reinit(Token prototype)
733  {
734  prototype.InitTermBuffer();
735  SetTermBuffer(prototype.termBuffer, 0, prototype.termLength);
736  positionIncrement = prototype.positionIncrement;
737  flags = prototype.flags;
738  startOffset = prototype.startOffset;
739  endOffset = prototype.endOffset;
740  type = prototype.type;
741  payload = prototype.payload;
742  }
743 
749  public virtual void Reinit(Token prototype, System.String newTerm)
750  {
751  SetTermBuffer(newTerm);
752  positionIncrement = prototype.positionIncrement;
753  flags = prototype.flags;
754  startOffset = prototype.startOffset;
755  endOffset = prototype.endOffset;
756  type = prototype.type;
757  payload = prototype.payload;
758  }
759 
769  public virtual void Reinit(Token prototype, char[] newTermBuffer, int offset, int length)
770  {
771  SetTermBuffer(newTermBuffer, offset, length);
772  positionIncrement = prototype.positionIncrement;
773  flags = prototype.flags;
774  startOffset = prototype.startOffset;
775  endOffset = prototype.endOffset;
776  type = prototype.type;
777  payload = prototype.payload;
778  }
779 
780  public override void CopyTo(Attribute target)
781  {
782  if (target is Token)
783  {
784  var to = (Token) target;
785  to.Reinit(this);
786  // reinit shares the payload, so clone it:
787  if (payload != null)
788  {
789  to.payload = (Payload) payload.Clone();
790  }
791  }
792  else
793  {
794  InitTermBuffer();
795  ((ITermAttribute) target).SetTermBuffer(termBuffer, 0, termLength);
796  ((IOffsetAttribute) target).SetOffset(startOffset, endOffset);
797  ((IPositionIncrementAttribute) target).PositionIncrement = positionIncrement;
798  ((IPayloadAttribute) target).Payload = (payload == null)?null:(Payload) payload.Clone();
799  ((IFlagsAttribute) target).Flags = flags;
800  ((ITypeAttribute) target).Type = type;
801  }
802  }
803 
810  public static AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
811  new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
812 
817  public class TokenAttributeFactory : AttributeSource.AttributeFactory
818  {
819 
820  private readonly AttributeSource.AttributeFactory _delegateFactory;
821 
826  public TokenAttributeFactory(AttributeSource.AttributeFactory delegateFactory)
827  {
828  this._delegateFactory = delegateFactory;
829  }
830 
831  public override Attribute CreateAttributeInstance<T>()
832  {
833  return typeof(T).IsAssignableFrom(typeof(Token))
834  ? new Token()
835  : _delegateFactory.CreateAttributeInstance<T>();
836  }
837 
838  public override bool Equals(Object other)
839  {
840  if (this == other) return true;
841 
842  var af = other as TokenAttributeFactory;
843  return af != null && _delegateFactory.Equals(af._delegateFactory);
844  }
845 
846  public override int GetHashCode()
847  {
848  return _delegateFactory.GetHashCode() ^ 0x0a45aa31;
849  }
850  }
851  }
852 }