Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
FieldEnumerator.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections;
20 using System.Collections.Generic;
21 using System.Linq;
22 using System.Text;
23 
24 using Lucene.Net.Util;
25 
26 namespace Lucene.Net.Index
27 {
28  /// <summary>
29  /// The type of parser for the value of the term.
30  /// </summary>
31  public enum FieldParser
32  {
33  String,
34  Numeric
35  }
36 
37  /// <summary>
38  /// <para>Base class for the typed enumerators.</para>
39  ///
40  /// <para>
41  /// There are five implementations of FieldEnumerator<typeparamref name="T"/> for
42  /// strings, integers, longs, floats, and doubles. The numeric enumerators support both
43  /// standard Field and NumericField implementations. The string and numeric enumerators
44  /// have slightly different options, but both should be used within a using statment
45  /// to close the underlying TermEnum/TermDocs. Refer to the unit tests for usage examples.
46  /// </para>
47  /// </summary>
48  /// <typeparam name="T">The type of data being enumerated.</typeparam>
49  public abstract class FieldEnumerator<T> : IDisposable
50  {
51  /// <summary>
52  /// Whether the enumerator will include TermDocs.
53  /// </summary>
54  protected bool includeDocs;
55 
56  /// <summary>
57  /// The underlying TermEnum;
58  /// </summary>
59  private TermEnum termEnum;
60 
61  /// <summary>
62  /// The optional TermDocs.
63  /// </summary>
64  private TermDocs termDocs;
65 
66  /// <summary>
67  /// The specialized TermEnum enumerator.
68  /// </summary>
69  protected TermEnumerator tEnum;
70 
71  /// <summary>
72  /// The specialized TermDoc enumerator.
73  /// </summary>
74  private TermDocEnumerator.TermDocUsingTermsEnumerator tdEnum;
75 
76  /// <summary>
77  /// Whether or not the instance has been disposed.
78  /// </summary>
79  private bool disposed;
80 
81  /// <summary>
82  /// Initialization method called by subclasses to simulate a shared
83  /// base constructor as generic classes cannot have a parameterized ctor.
84  /// </summary>
85  /// <param name="reader">The index reader to read from.</param>
86  /// <param name="field">The field to enumerate.</param>
87  protected void Init(IndexReader reader, string field)
88  {
89  this.Init(reader, field, true);
90  }
91 
92  /// <summary>
93  /// Initialization method called by subclasses to simulate a shared
94  /// base constructor as generic classes cannot have a parameterized ctor.
95  /// </summary>
96  /// <param name="reader">The index reader to read from.</param>
97  /// <param name="fieldName">The field to enumerate.</param>
98  /// <param name="includeDocs">Whether this enumerator will support TermDocs.</param>
99  protected void Init(IndexReader reader, string fieldName, bool includeDocs)
100  {
101  this.termEnum = reader.Terms(new Term(fieldName));
102  if (includeDocs)
103  {
104  this.termDocs = reader.TermDocs();
105  this.tdEnum = new TermDocEnumerator.TermDocUsingTermsEnumerator(this.termDocs, this.termEnum);
106  }
107  this.tEnum = new TermEnumerator(termEnum, termDocs, fieldName, this);
108  }
109 
110  /// <summary>
111  /// Method to attempt to parse out the value from the encoded string
112  /// and sets the value of Current.
113  /// </summary>
114  /// <param name="s">The encoded string.</param>
115  /// <returns>True if the value was successfully parsed, false if we reached the
116  /// end of encoded values in the fiele and only the tries remain.</returns>
117  protected abstract bool TryParse(string s);
118 
119  /// <summary>
120  /// Access the enumerator for the terms.
121  /// </summary>
122  public TermEnumerator Terms
123  {
124  get { return this.tEnum; }
125  }
126 
127  /// <summary>
128  /// Access the enumerator for the TermDocs.
129  /// </summary>
130  public TermDocEnumerator.TermDocUsingTermsEnumerator Docs
131  {
132  get
133  {
134  if (this.termDocs == null)
135  {
136  throw new NotSupportedException("This instance does not support enumeration over the document ids.");
137  }
138  else
139  {
140  return this.tdEnum;
141  }
142  }
143  }
144 
145  #region IDisposable Members
146 
147  /// <summary>
148  /// Dispose of the instance.
149  /// </summary>
150  public void Dispose()
151  {
152  if (!this.disposed)
153  {
154  this.disposed = true;
155  if (this.termEnum != null)
156  {
157  this.termEnum.Close();
158  }
159  if (this.termDocs != null)
160  {
161  this.termDocs.Close();
162  }
163  GC.SuppressFinalize(this);
164  }
165  }
166 
167  #endregion
168 
169  /// <summary>
170  /// The enumerator over the terms in an index.
171  /// </summary>
172  public class TermEnumerator : IEnumerator<T>, IEnumerable<T>
173  {
174  /// <summary>
175  /// The underlying TermEnum;
176  /// </summary>
177  private TermEnum termEnum;
178 
179  /// <summary>
180  /// The optional TermDocs.
181  /// </summary>
182  private TermDocs termDocs;
183 
184  /// <summary>
185  /// The current term in the enum.
186  /// </summary>
187  private Term currentTerm;
188 
189  /// <summary>
190  /// The field name, if any for the enum.
191  /// </summary>
192  protected string fieldName;
193 
194  /// <summary>
195  /// Whether the enumerator has moved beyond the first position.
196  /// </summary>
197  private bool isFirst = true;
198 
199  /// <summary>
200  /// THe enclosing instance, called back to in order to parse the field.
201  /// </summary>
202  private FieldEnumerator<T> enclosing;
203 
204  /// <summary>
205  /// Ctor.
206  /// </summary>
207  /// <param name="termEnum">The TermEnum to wrap.</param>
208  /// <param name="termDocs">The TermDocs to wrap.</param>
209  /// <param name="field"> </param>
210  /// <param name="enclosing"> </param>
211  public TermEnumerator(TermEnum termEnum, TermDocs termDocs, string field, FieldEnumerator<T> enclosing)
212  {
213  this.termEnum = termEnum;
214  this.termDocs = termDocs;
215  this.fieldName = field;
216  this.enclosing = enclosing;
217  }
218 
219  #region IEnumerator<T> Members
220 
221  /// <summary>
222  /// The current item in the enumerator.
223  /// </summary>
224  public T Current
225  {
226  get;
227  internal set;
228  }
229 
230  #endregion
231 
232  #region IEnumerator Members
233 
234  /// <summary>
235  /// Current item in the enumerator.
236  /// </summary>
237  object IEnumerator.Current
238  {
239  get { return (object)this.Current; }
240  }
241 
242  /// <summary>
243  /// Advance to the next item.
244  /// </summary>
245  /// <returns></returns>
246  public bool MoveNext()
247  {
248  if (this.isFirst)
249  {
250  this.isFirst = false;
251  }
252  else
253  {
254  if (!this.termEnum.Next())
255  {
256  return false;
257  }
258  }
259 
260  this.currentTerm = termEnum.Term;
261  if (this.currentTerm == null || (!this.currentTerm.Field.Equals(this.fieldName)))
262  {
263  return false;
264  }
265 
266  if (this.enclosing.TryParse(this.currentTerm.Text))
267  {
268  if (this.termDocs != null)
269  {
270  this.termDocs.Seek(this.termEnum);
271  }
272  return true;
273  }
274 
275  return false;
276  }
277 
278  /// <summary>
279  /// Reset the enumerator to the beginngin.
280  /// </summary>
281  public void Reset()
282  {
283  throw new NotSupportedException("The enumerator cannot be reset");
284  }
285 
286  #endregion
287 
288  #region IDisposable Members
289 
290  public void Dispose()
291  {
292  // noop
293  }
294 
295  #endregion
296 
297  #region IEnumerable<T> Members
298 
299  /// <summary>
300  /// Accessor to IEnumerator-T-."/>
301  /// </summary>
302  /// <returns></returns>
303  public IEnumerator<T> GetEnumerator()
304  {
305  return this;
306  }
307 
308  #endregion
309 
310  #region IEnumerable Members
311 
312  /// <summary>
313  /// Accessor to IEnumertor.
314  /// </summary>
315  /// <returns></returns>
316  IEnumerator IEnumerable.GetEnumerator()
317  {
318  return this.GetEnumerator();
319  }
320 
321  #endregion
322  }
323  }
324 
325  /// <summary>
326  /// Class to handle creating a TermDocs and allowing for seeking and enumeration. Used
327  /// when you have a set of one or moreterms for which you want to enumerate over the
328  /// documents that contain those terms.
329  /// </summary>
330  public class TermDocEnumerator : IEnumerable<int>, IDisposable
331  {
332  /// <summary>
333  /// The underlying TermDocs.
334  /// </summary>
335  private TermDocs termDocs;
336 
337  /// <summary>
338  /// Ctor.
339  /// </summary>
340  /// <param name="termDocs">The TermDocs to wrap.</param>
341  public TermDocEnumerator(TermDocs termDocs)
342  {
343  this.termDocs = termDocs;
344  }
345 
346  /// <summary>
347  /// Seek to a specific term.
348  /// </summary>
349  /// <param name="t"></param>
350  public void Seek(Term t)
351  {
352  this.termDocs.Seek(t);
353  }
354 
355  #region IEnumerable<int> Members
356 
357  public IEnumerator<int> GetEnumerator()
358  {
359  return new TermDocUsingTermsEnumerator(this.termDocs);
360  }
361 
362  #endregion
363 
364  #region IEnumerable Members
365 
366  IEnumerator IEnumerable.GetEnumerator()
367  {
368  return this.GetEnumerator();
369  }
370 
371  #endregion
372 
373  #region IDisposable Members
374 
375  /// <summary>
376  /// Dispose of the instance, closing the termdocs.
377  /// </summary>
378  public void Dispose()
379  {
380  if (this.termDocs != null)
381  {
382  termDocs.Close();
383  }
384  }
385 
386  #endregion
387 
388  /// <summary>
389  /// Class to handle enumeration over the TermDocs that does NOT close them
390  /// on a call to Dispose!
391  /// </summary>
392  public class TermDocUsingTermsEnumerator : IEnumerable<int>, IEnumerator<int>
393  {
394  /// <summary>
395  /// A reference to an outside TermEnum that is used to position
396  /// the TermDocs. It can be null.
397  /// </summary>
398  private TermEnum termEnum;
399 
400  /// <summary>
401  /// The underlying TermDocs.
402  /// </summary>
403  private TermDocs termDocs;
404 
405  /// <summary>
406  /// Ctor.
407  /// </summary>
408  /// <param name="termDocs">TermDocs to wrap</param>
409  internal TermDocUsingTermsEnumerator(TermDocs termDocs)
410  : this(termDocs, null)
411  { }
412 
413  /// <summary>
414  /// Ctor.
415  /// </summary>
416  /// <param name="td">Underlying TermDocs.</param>
417  /// <param name="termEnum">Enclosing field enum.</param>
418  internal TermDocUsingTermsEnumerator(TermDocs td, TermEnum termEnum)
419  {
420  this.termDocs = td;
421  this.termEnum = termEnum;
422  }
423 
424  /// <summary>
425  /// Seel to a term in the underlying TermDocs.
426  /// </summary>
427  /// <param name="te">The point to seek to.</param>
428  internal void Seek(TermEnum te)
429  {
430  this.termDocs.Seek(te);
431  }
432 
433  #region IEnumerable<int> Members
434 
435  /// <summary>
436  /// Get the enumerator.
437  /// </summary>
438  /// <returns></returns>
439  public IEnumerator<int> GetEnumerator()
440  {
441  return this;
442  }
443 
444  #endregion
445 
446  #region IEnumerable Members
447 
448  /// <summary>
449  /// Get the enumerator.
450  /// </summary>
451  /// <returns></returns>
452  IEnumerator IEnumerable.GetEnumerator()
453  {
454  return this.GetEnumerator();
455  }
456 
457  #endregion
458 
459  #region IEnumerator<int> Members
460 
461  /// <summary>
462  /// The current document id.
463  /// </summary>
464  public int Current
465  {
466  get { return this.termDocs.Doc; }
467  }
468 
469  #endregion
470 
471  #region IDisposable Members
472 
473  /// <summary>
474  /// Dispose impl.
475  /// </summary>
476  public void Dispose()
477  {
478  // noop as the closing of the underlying
479  // TermDocs is handled by the containing class
480  }
481 
482  #endregion
483 
484  #region IEnumerator Members
485 
486  /// <summary>
487  /// The current item.
488  /// </summary>
489  object IEnumerator.Current
490  {
491  get { throw new NotImplementedException(); }
492  }
493 
494  /// <summary>
495  /// Move to the next item.
496  /// </summary>
497  /// <returns>True if more, false if not.</returns>
498  public bool MoveNext()
499  {
500  return this.termDocs.Next();
501  }
502 
503  /// <summary>
504  /// Not implemented. Use Seek instead.
505  /// </summary>
506  public void Reset()
507  {
508  throw new NotImplementedException();
509  }
510 
511  #endregion
512  }
513  }
514 
515 
516  /// <summary>
517  /// Implementation for enumerating over terms with a string value.
518  /// </summary>
519  public class StringFieldEnumerator : FieldEnumerator<string>
520  {
521  /// <summary>
522  /// Construct an enumerator over one field.
523  /// </summary>
524  /// <param name="reader">Index reader.</param>
525  /// <param name="fieldName">The field to read.</param>
526  public StringFieldEnumerator(IndexReader reader, string fieldName)
527  {
528  this.Init(reader, fieldName);
529  }
530 
531  /// <summary>
532  /// Construct an enumerator over one field.
533  /// </summary>
534  /// <param name="reader">Index reader.</param>
535  /// <param name="fieldName">The field to read.</param>
536  /// <param name="includeDocs">Whether the TermDocs will also be enumerated.</param>
537  public StringFieldEnumerator(IndexReader reader, string fieldName, bool includeDocs)
538  {
539  this.Init(reader, fieldName, includeDocs);
540  }
541 
542  /// <summary>
543  /// Sets the value of current.
544  /// </summary>
545  /// <param name="s">The string.</param>
546  /// <returns>Always true.</returns>
547  protected override bool TryParse(string s)
548  {
549  this.tEnum.Current = s;
550  return true;
551  }
552  }
553 
554  /// <summary>
555  /// Base for enumerating over numeric fields.
556  /// </summary>
557  /// <typeparam name="T"></typeparam>
558  public abstract class NumericFieldEnum<T> : FieldEnumerator<T>
559  {
560  /// <summary>
561  /// The parser type for the field being enumerated.
562  /// </summary>
563  private FieldParser parser;
564 
565  /// <summary>
566  /// Initialize the instance.
567  /// </summary>
568  /// <param name="reader">The reader.</param>
569  /// <param name="field">The field name.</param>
570  /// <param name="includeDocs">Whether to include a TermDoc enum.</param>
571  /// <param name="parser">The parser to use on the field.</param>
572  protected void Init(IndexReader reader, string field, bool includeDocs, FieldParser parser)
573  {
574  base.Init(reader, field, includeDocs);
575  this.parser = parser;
576  }
577 
578  /// <summary>
579  /// Overridden from base.
580  /// </summary>
581  /// <param name="s">String to parse.</param>
582  /// <returns></returns>
583  protected override bool TryParse(string s)
584  {
585  if (this.parser == FieldParser.Numeric)
586  {
587  return this.TryParseNumeric(s);
588  }
589  else
590  {
591  return this.TryParseString(s);
592  }
593  }
594 
595  /// <summary>
596  /// Parse out a standard string and set the value of current.
597  /// </summary>
598  /// <param name="s">String to parse.</param>
599  /// <returns>True on success.</returns>
600  protected abstract bool TryParseString(string s);
601 
602  /// <summary>
603  /// Parse out an encoded numeric string and set the value of current.
604  /// </summary>
605  /// <param name="s">String to parse.</param>
606  /// <returns>True on success.</returns>
607  protected abstract bool TryParseNumeric(string s);
608  }
609 
610  /// <summary>
611  /// Implementation for enumerating over all of the terms in an int numeric field.
612  /// </summary>
613  public class IntFieldEnumerator : NumericFieldEnum<int>
614  {
615  /// <summary>
616  /// Construct an enumerator over one field.
617  /// </summary>
618  /// <param name="reader">Index reader.</param>
619  /// <param name="fieldName">The field to read.</param>
620  public IntFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser)
621  {
622  this.Init(reader, fieldName, true, parser);
623  }
624 
625  /// <summary>
626  /// Construct an enumerator over one field.
627  /// </summary>
628  /// <param name="reader">Index reader.</param>
629  /// <param name="fieldName">The field to read.</param>
630  /// <param name="includeDocs">Whether the TermDocs will also be enumerated.</param>
631  public IntFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser, bool includeDocs)
632  {
633  this.Init(reader, fieldName, includeDocs, parser);
634  }
635 
636  /// <summary>
637  /// Parse the int from the string.
638  /// </summary>
639  /// <param name="s">String to parse.</param>
640  /// <returns>Always true.</returns>
641  protected override bool TryParseString(string s)
642  {
643  this.tEnum.Current = Int32.Parse(s);
644  return true;
645  }
646 
647  /// <summary>
648  /// Parse the int from an encoded string.
649  /// </summary>
650  /// <param name="s">String to parse.</param>
651  /// <returns>True if the parse was successful, false indicating failure
652  /// and the end of useful terms in the numeric field.</returns>
653  protected override bool TryParseNumeric(string s)
654  {
655  int shift = s[0] - NumericUtils.SHIFT_START_INT;
656  if (shift > 0 && shift <= 31)
657  {
658  return false;
659  }
660  else
661  {
662  this.tEnum.Current = NumericUtils.PrefixCodedToInt(s);
663  return true;
664  }
665  }
666  }
667 
668  /// <summary>
669  /// Implementation for enumerating over all of the terms in a float numeric field.
670  /// </summary>
671  public class FloatFieldEnumerator : NumericFieldEnum<float>
672  {
673 
674  /// <summary>
675  /// Construct an enumerator over one field.
676  /// </summary>
677  /// <param name="reader">Index reader.</param>
678  /// <param name="fieldName">The field to read.</param>
679  public FloatFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser)
680  {
681  this.Init(reader, fieldName, true, parser);
682  }
683 
684  /// <summary>
685  /// Construct an enumerator over one field.
686  /// </summary>
687  /// <param name="reader">Index reader.</param>
688  /// <param name="fieldName">The field to read.</param>
689  /// <param name="includeDocs">Whether the TermDocs will also be enumerated.</param>
690  public FloatFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser, bool includeDocs)
691  {
692  this.Init(reader, fieldName, includeDocs, parser);
693  }
694 
695  /// <summary>
696  /// Parse the float from a string.
697  /// </summary>
698  /// <param name="s">The string to parse.</param>
699  /// <returns>Always true.</returns>
700  protected override bool TryParseString(string s)
701  {
702  this.tEnum.Current = float.Parse(s);
703  return true;
704  }
705 
706  /// <summary>
707  /// Parse the float from a numeric field.
708  /// </summary>
709  /// <param name="s">The string to parse.</param>
710  /// <returns>True if the string was parsed, false to signify the
711  /// end of usable terms in a numeric field.</returns>
712  protected override bool TryParseNumeric(string s)
713  {
714  int shift = s[0] - NumericUtils.SHIFT_START_INT;
715  if (shift > 0 && shift <= 31)
716  {
717  return false;
718  }
719  else
720  {
722  return true;
723  }
724  }
725  }
726 
727  /// <summary>
728  /// Implementation for enumerating over all of the terms in a double numeric field.
729  /// </summary>
730  public class DoubleFieldEnumerator : NumericFieldEnum<double>
731  {
732  /// <summary>
733  /// Construct an enumerator over one field.
734  /// </summary>
735  /// <param name="reader">Index reader.</param>
736  /// <param name="fieldName">The field to read.</param>
737  public DoubleFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser)
738  {
739  this.Init(reader, fieldName, true, parser);
740  }
741 
742  /// <summary>
743  /// Construct an enumerator over one field.
744  /// </summary>
745  /// <param name="reader">Index reader.</param>
746  /// <param name="fieldName">The field to read.</param>
747  /// <param name="includeDocs">Whether the TermDocs will also be enumerated.</param>
748  public DoubleFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser, bool includeDocs)
749  {
750  this.Init(reader, fieldName, includeDocs, parser);
751  }
752 
753  /// <summary>
754  /// Parse the double from a string.
755  /// </summary>
756  /// <param name="s">The string to parse.</param>
757  /// <returns>Always true.</returns>
758  protected override bool TryParseString(string s)
759  {
760  this.tEnum.Current = Double.Parse(s);
761  return true;
762  }
763 
764  /// <summary>
765  /// Parse the double from a numeric field.
766  /// </summary>
767  /// <param name="s">The string to parse.</param>
768  /// <returns>True if the string was parsed, false to indicate the end
769  /// of usable numeric terms.</returns>
770  protected override bool TryParseNumeric(string s)
771  {
772  int shift = s[0] - NumericUtils.SHIFT_START_LONG;
773  if (shift > 0 && shift <= 63)
774  {
775  return false;
776  }
777  else
778  {
780  return true;
781  }
782  }
783  }
784 
785  /// <summary>
786  /// Implementation for enumerating over all of the terms in a long numeric field.
787  /// </summary>
788  public class LongFieldEnumerator : NumericFieldEnum<long>
789  {
790  /// <summary>
791  /// Construct an enumerator over one field.
792  /// </summary>
793  /// <param name="reader">Index reader.</param>
794  /// <param name="fieldName">The field to read.</param>
795  public LongFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser)
796  {
797  this.Init(reader, fieldName, true, parser);
798  }
799 
800  /// <summary>
801  /// Construct an enumerator over one field.
802  /// </summary>
803  /// <param name="reader">Index reader.</param>
804  /// <param name="fieldName">The field to read.</param>
805  /// <param name="includeDocs">Whether the TermDocs will also be enumerated.</param>
806  public LongFieldEnumerator(IndexReader reader, string fieldName, FieldParser parser, bool includeDocs)
807  {
808  this.Init(reader, fieldName, includeDocs, parser);
809  }
810 
811  /// <summary>
812  /// Parse the long from a string.
813  /// </summary>
814  /// <param name="s">The string to parse.</param>
815  /// <returns>Always true.</returns>
816  protected override bool TryParseString(string s)
817  {
818  this.tEnum.Current = long.Parse(s);
819  return true;
820  }
821 
822  /// <summary>
823  /// Parse the long from a numeric field.
824  /// </summary>
825  /// <param name="s">The string to parse.</param>
826  /// <returns>True if the string was parsed, false to inidicate
827  /// the end of usable terms in a numeric field.</returns>
828  protected override bool TryParseNumeric(string s)
829  {
830  int shift = s[0] - NumericUtils.SHIFT_START_LONG;
831  if (shift > 0 && shift <= 63)
832  {
833  return false;
834  }
835  else
836  {
837  this.tEnum.Current = NumericUtils.PrefixCodedToLong(s);
838  return true;
839  }
840  }
841  }
842 }