Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
ParallelReader.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Linq;
21 using Lucene.Net.Support;
22 using Document = Lucene.Net.Documents.Document;
23 using FieldSelector = Lucene.Net.Documents.FieldSelector;
24 using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
25 
26 namespace Lucene.Net.Index
27 {
28  /// <summary>An IndexReader which reads multiple, parallel indexes. Each index added
29  /// must have the same number of documents, but typically each contains
30  /// different fields. Each document contains the union of the fields of all
31  /// documents with the same document number. When searching, matches for a
32  /// query term are from the first index added that has the field.
33  ///
34  /// <p/>This is useful, e.g., with collections that have large fields which
35  /// change rarely and small fields that change more frequently. The smaller
36  /// fields may be re-indexed in a new index and both indexes may be searched
37  /// together.
38  ///
39  /// <p/><strong>Warning:</strong> It is up to you to make sure all indexes
40  /// are created and modified the same way. For example, if you add
41  /// documents to one index, you need to add the same documents in the
42  /// same order to the other indexes. <em>Failure to do so will result in
43  /// undefined behavior</em>.
44  /// </summary>
45  public class ParallelReader:IndexReader, System.ICloneable
46  {
47  private List<IndexReader> readers = new List<IndexReader>();
48  private List<bool> decrefOnClose = new List<bool>(); // remember which subreaders to decRef on close
49  internal bool incRefReaders = false;
50  private SortedDictionary<string, IndexReader> fieldToReader = new SortedDictionary<string, IndexReader>();
51  private IDictionary<IndexReader, ICollection<string>> readerToFields = new HashMap<IndexReader, ICollection<string>>();
52  private List<IndexReader> storedFieldReaders = new List<IndexReader>();
53 
54  private int maxDoc;
55  private int numDocs;
56  private bool hasDeletions;
57 
58  /// <summary>Construct a ParallelReader.
59  /// <p/>Note that all subreaders are closed if this ParallelReader is closed.<p/>
60  /// </summary>
61  public ParallelReader():this(true)
62  {
63  }
64 
65  /// <summary>Construct a ParallelReader. </summary>
66  /// <param name="closeSubReaders">indicates whether the subreaders should be closed
67  /// when this ParallelReader is closed
68  /// </param>
69  public ParallelReader(bool closeSubReaders):base()
70  {
71  this.incRefReaders = !closeSubReaders;
72  }
73 
74  /// <summary>Add an IndexReader.</summary>
75  /// <throws> IOException if there is a low-level IO error </throws>
76  public virtual void Add(IndexReader reader)
77  {
78  EnsureOpen();
79  Add(reader, false);
80  }
81 
82  /// <summary>Add an IndexReader whose stored fields will not be returned. This can
83  /// accellerate search when stored fields are only needed from a subset of
84  /// the IndexReaders.
85  ///
86  /// </summary>
87  /// <throws> IllegalArgumentException if not all indexes contain the same number </throws>
88  /// <summary> of documents
89  /// </summary>
90  /// <throws> IllegalArgumentException if not all indexes have the same value </throws>
91  /// <summary> of <see cref="IndexReader.MaxDoc" />
92  /// </summary>
93  /// <throws> IOException if there is a low-level IO error </throws>
94  public virtual void Add(IndexReader reader, bool ignoreStoredFields)
95  {
96 
97  EnsureOpen();
98  if (readers.Count == 0)
99  {
100  this.maxDoc = reader.MaxDoc;
101  this.numDocs = reader.NumDocs();
102  this.hasDeletions = reader.HasDeletions;
103  }
104 
105  if (reader.MaxDoc != maxDoc)
106  // check compatibility
107  throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc);
108  if (reader.NumDocs() != numDocs)
109  throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
110 
111  ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
112  readerToFields[reader] = fields;
113  foreach(var field in fields)
114  {
115  // update fieldToReader map
116  // Do a containskey firt to mimic java behavior
117  if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null)
118  fieldToReader[field] = reader;
119  }
120 
121  if (!ignoreStoredFields)
122  storedFieldReaders.Add(reader); // add to storedFieldReaders
123  readers.Add(reader);
124 
125  if (incRefReaders)
126  {
127  reader.IncRef();
128  }
129  decrefOnClose.Add(incRefReaders);
130  }
131 
132  public override System.Object Clone()
133  {
134  try
135  {
136  return DoReopen(true);
137  }
138  catch (System.Exception ex)
139  {
140  throw new System.SystemException(ex.Message, ex);
141  }
142  }
143 
144  /// <summary> Tries to reopen the subreaders.
145  /// <br/>
146  /// If one or more subreaders could be re-opened (i. e. subReader.reopen()
147  /// returned a new instance != subReader), then a new ParallelReader instance
148  /// is returned, otherwise this instance is returned.
149  /// <p/>
150  /// A re-opened instance might share one or more subreaders with the old
151  /// instance. Index modification operations result in undefined behavior
152  /// when performed before the old instance is closed.
153  /// (see <see cref="IndexReader.Reopen()" />).
154  /// <p/>
155  /// If subreaders are shared, then the reference count of those
156  /// readers is increased to ensure that the subreaders remain open
157  /// until the last referring reader is closed.
158  ///
159  /// </summary>
160  /// <throws> CorruptIndexException if the index is corrupt </throws>
161  /// <throws> IOException if there is a low-level IO error </throws>
162  public override IndexReader Reopen()
163  {
164  lock (this)
165  {
166  return DoReopen(false);
167  }
168  }
169 
170  protected internal virtual IndexReader DoReopen(bool doClone)
171  {
172  EnsureOpen();
173 
174  bool reopened = false;
175  IList<IndexReader> newReaders = new List<IndexReader>();
176 
177  bool success = false;
178 
179  try
180  {
181  foreach(var oldReader in readers)
182  {
183  IndexReader newReader = null;
184  if (doClone)
185  {
186  newReader = (IndexReader) oldReader.Clone();
187  }
188  else
189  {
190  newReader = oldReader.Reopen();
191  }
192  newReaders.Add(newReader);
193  // if at least one of the subreaders was updated we remember that
194  // and return a new ParallelReader
195  if (newReader != oldReader)
196  {
197  reopened = true;
198  }
199  }
200  success = true;
201  }
202  finally
203  {
204  if (!success && reopened)
205  {
206  for (int i = 0; i < newReaders.Count; i++)
207  {
208  IndexReader r = newReaders[i];
209  if (r != readers[i])
210  {
211  try
212  {
213  r.Close();
214  }
215  catch (System.IO.IOException)
216  {
217  // keep going - we want to clean up as much as possible
218  }
219  }
220  }
221  }
222  }
223 
224  if (reopened)
225  {
226  List<bool> newDecrefOnClose = new List<bool>();
227  ParallelReader pr = new ParallelReader();
228  for (int i = 0; i < readers.Count; i++)
229  {
230  IndexReader oldReader = readers[i];
231  IndexReader newReader = newReaders[i];
232  if (newReader == oldReader)
233  {
234  newDecrefOnClose.Add(true);
235  newReader.IncRef();
236  }
237  else
238  {
239  // this is a new subreader instance, so on close() we don't
240  // decRef but close it
241  newDecrefOnClose.Add(false);
242  }
243  pr.Add(newReader, !storedFieldReaders.Contains(oldReader));
244  }
245  pr.decrefOnClose = newDecrefOnClose;
246  pr.incRefReaders = incRefReaders;
247  return pr;
248  }
249  else
250  {
251  // No subreader was refreshed
252  return this;
253  }
254  }
255 
256 
257  public override int NumDocs()
258  {
259  // Don't call ensureOpen() here (it could affect performance)
260  return numDocs;
261  }
262 
263  public override int MaxDoc
264  {
265  get
266  {
267  // Don't call ensureOpen() here (it could affect performance)
268  return maxDoc;
269  }
270  }
271 
272  public override bool HasDeletions
273  {
274  get
275  {
276  // Don't call ensureOpen() here (it could affect performance)
277  return hasDeletions;
278  }
279  }
280 
281  // check first reader
282  public override bool IsDeleted(int n)
283  {
284  // Don't call ensureOpen() here (it could affect performance)
285  if (readers.Count > 0)
286  return readers[0].IsDeleted(n);
287  return false;
288  }
289 
290  // delete in all readers
291  protected internal override void DoDelete(int n)
292  {
293  foreach(var reader in readers)
294  {
295  reader.DeleteDocument(n);
296  }
297  hasDeletions = true;
298  }
299 
300  // undeleteAll in all readers
301  protected internal override void DoUndeleteAll()
302  {
303  foreach(var reader in readers)
304  {
305  reader.UndeleteAll();
306  }
307  hasDeletions = false;
308  }
309 
310  // append fields from storedFieldReaders
311  public override Document Document(int n, FieldSelector fieldSelector)
312  {
313  EnsureOpen();
314  Document result = new Document();
315  foreach(IndexReader reader in storedFieldReaders)
316  {
317  bool include = (fieldSelector == null);
318  if (!include)
319  {
320  var fields = readerToFields[reader];
321  foreach(var field in fields)
322  {
323  if (fieldSelector.Accept(field) != FieldSelectorResult.NO_LOAD)
324  {
325  include = true;
326  break;
327  }
328  }
329  }
330  if (include)
331  {
332  var fields = reader.Document(n, fieldSelector).GetFields();
333  foreach(var field in fields)
334  {
335  result.Add(field);
336  }
337  }
338  }
339  return result;
340  }
341 
342  // get all vectors
343  public override ITermFreqVector[] GetTermFreqVectors(int n)
344  {
345  EnsureOpen();
346  IList<ITermFreqVector> results = new List<ITermFreqVector>();
347  foreach(var e in fieldToReader)
348  {
349  System.String field = e.Key;
350  IndexReader reader = e.Value;
351 
352  ITermFreqVector vector = reader.GetTermFreqVector(n, field);
353  if (vector != null)
354  results.Add(vector);
355  }
356  return results.ToArray();
357  }
358 
359  public override ITermFreqVector GetTermFreqVector(int n, System.String field)
360  {
361  EnsureOpen();
362  IndexReader reader = (fieldToReader[field]);
363  return reader == null?null:reader.GetTermFreqVector(n, field);
364  }
365 
366 
367  public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
368  {
369  EnsureOpen();
370  IndexReader reader = (fieldToReader[field]);
371  if (reader != null)
372  {
373  reader.GetTermFreqVector(docNumber, field, mapper);
374  }
375  }
376 
377  public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
378  {
379  EnsureOpen();
380 
381  foreach(var e in fieldToReader)
382  {
383  System.String field = e.Key;
384  IndexReader reader = e.Value;
385  reader.GetTermFreqVector(docNumber, field, mapper);
386  }
387  }
388 
389  public override bool HasNorms(System.String field)
390  {
391  EnsureOpen();
392  IndexReader reader = fieldToReader[field];
393  return reader != null && reader.HasNorms(field);
394  }
395 
396  public override byte[] Norms(System.String field)
397  {
398  EnsureOpen();
399  IndexReader reader = fieldToReader[field];
400  return reader == null?null:reader.Norms(field);
401  }
402 
403  public override void Norms(System.String field, byte[] result, int offset)
404  {
405  EnsureOpen();
406  IndexReader reader = fieldToReader[field];
407  if (reader != null)
408  reader.Norms(field, result, offset);
409  }
410 
411  protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
412  {
413  IndexReader reader = fieldToReader[field];
414  if (reader != null)
415  reader.DoSetNorm(n, field, value_Renamed);
416  }
417 
418  public override TermEnum Terms()
419  {
420  EnsureOpen();
421  return new ParallelTermEnum(this);
422  }
423 
424  public override TermEnum Terms(Term term)
425  {
426  EnsureOpen();
427  return new ParallelTermEnum(this, term);
428  }
429 
430  public override int DocFreq(Term term)
431  {
432  EnsureOpen();
433  IndexReader reader = fieldToReader[term.Field];
434  return reader == null?0:reader.DocFreq(term);
435  }
436 
437  public override TermDocs TermDocs(Term term)
438  {
439  EnsureOpen();
440  return new ParallelTermDocs(this, term);
441  }
442 
443  public override TermDocs TermDocs()
444  {
445  EnsureOpen();
446  return new ParallelTermDocs(this);
447  }
448 
449  public override TermPositions TermPositions(Term term)
450  {
451  EnsureOpen();
452  return new ParallelTermPositions(this, term);
453  }
454 
455  public override TermPositions TermPositions()
456  {
457  EnsureOpen();
458  return new ParallelTermPositions(this);
459  }
460 
461  /// <summary> Checks recursively if all subreaders are up to date. </summary>
462  public override bool IsCurrent()
463  {
464  foreach (var reader in readers)
465  {
466  if (!reader.IsCurrent())
467  {
468  return false;
469  }
470  }
471 
472  // all subreaders are up to date
473  return true;
474  }
475 
476  /// <summary> Checks recursively if all subindexes are optimized </summary>
477  public override bool IsOptimized()
478  {
479  foreach (var reader in readers)
480  {
481  if (!reader.IsOptimized())
482  {
483  return false;
484  }
485  }
486 
487  // all subindexes are optimized
488  return true;
489  }
490 
491 
492  /// <summary>Not implemented.</summary>
493  /// <throws> UnsupportedOperationException </throws>
494  public override long Version
495  {
496  get { throw new System.NotSupportedException("ParallelReader does not support this method."); }
497  }
498 
499  // for testing
500  public /*internal*/ virtual IndexReader[] GetSubReaders()
501  {
502  return readers.ToArray();
503  }
504 
505  protected internal override void DoCommit(IDictionary<string, string> commitUserData)
506  {
507  foreach(var reader in readers)
508  reader.Commit(commitUserData);
509  }
510 
511  protected internal override void DoClose()
512  {
513  lock (this)
514  {
515  for (int i = 0; i < readers.Count; i++)
516  {
517  if (decrefOnClose[i])
518  {
519  readers[i].DecRef();
520  }
521  else
522  {
523  readers[i].Close();
524  }
525  }
526  }
527 
528  Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this);
529  }
530 
531  public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
532  {
533  EnsureOpen();
534  ISet<string> fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>();
535  foreach(var reader in readers)
536  {
537  ICollection<string> names = reader.GetFieldNames(fieldNames);
538  fieldSet.UnionWith(names);
539  }
540  return fieldSet;
541  }
542 
543  private class ParallelTermEnum : TermEnum
544  {
545  private void InitBlock(ParallelReader enclosingInstance)
546  {
547  this.enclosingInstance = enclosingInstance;
548  }
549  private ParallelReader enclosingInstance;
550  public ParallelReader Enclosing_Instance
551  {
552  get
553  {
554  return enclosingInstance;
555  }
556 
557  }
558  private System.String field;
559  private IEnumerator<string> fieldIterator;
560  private TermEnum termEnum;
561 
562  private bool isDisposed;
563 
564  public ParallelTermEnum(ParallelReader enclosingInstance)
565  {
566  InitBlock(enclosingInstance);
567  try
568  {
569  field = Enclosing_Instance.fieldToReader.Keys.First();
570  }
571  catch (ArgumentOutOfRangeException)
572  {
573  // No fields, so keep field == null, termEnum == null
574  return;
575  }
576  if (field != null)
577  termEnum = Enclosing_Instance.fieldToReader[field].Terms();
578  }
579 
580  public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
581  {
582  InitBlock(enclosingInstance);
583  field = term.Field;
584  IndexReader reader = Enclosing_Instance.fieldToReader[field];
585  if (reader != null)
586  termEnum = reader.Terms(term);
587  }
588 
589  public override bool Next()
590  {
591  if (termEnum == null)
592  return false;
593 
594  // another term in this field?
595  if (termEnum.Next() && (System.Object) termEnum.Term.Field == (System.Object) field)
596  return true; // yes, keep going
597 
598  termEnum.Close(); // close old termEnum
599 
600  // find the next field with terms, if any
601  if (fieldIterator == null)
602  {
603  var newList = new List<string>();
604  if (Enclosing_Instance.fieldToReader != null && Enclosing_Instance.fieldToReader.Count > 0)
605  {
606  var comparer = Enclosing_Instance.fieldToReader.Comparer;
607  foreach(var entry in Enclosing_Instance.fieldToReader.Keys.Where(x => comparer.Compare(x, field) >= 0))
608  newList.Add(entry);
609  }
610 
611  fieldIterator = newList.Skip(1).GetEnumerator(); // Skip field to get next one
612  }
613  while (fieldIterator.MoveNext())
614  {
615  field = fieldIterator.Current;
616  termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
617  Term term = termEnum.Term;
618  if (term != null && (System.Object) term.Field == (System.Object) field)
619  return true;
620  else
621  termEnum.Close();
622  }
623 
624  return false; // no more fields
625  }
626 
627  public override Term Term
628  {
629  get
630  {
631  if (termEnum == null)
632  return null;
633 
634  return termEnum.Term;
635  }
636  }
637 
638  public override int DocFreq()
639  {
640  if (termEnum == null)
641  return 0;
642 
643  return termEnum.DocFreq();
644  }
645 
646  protected override void Dispose(bool disposing)
647  {
648  if (isDisposed) return;
649 
650  if (disposing)
651  {
652  if (termEnum != null)
653  termEnum.Close();
654  }
655 
656  isDisposed = true;
657  }
658  }
659 
660  // wrap a TermDocs in order to support seek(Term)
661  private class ParallelTermDocs : TermDocs
662  {
663  private void InitBlock(ParallelReader enclosingInstance)
664  {
665  this.enclosingInstance = enclosingInstance;
666  }
667  private ParallelReader enclosingInstance;
668  public ParallelReader Enclosing_Instance
669  {
670  get
671  {
672  return enclosingInstance;
673  }
674 
675  }
676  protected internal TermDocs termDocs;
677 
678  private bool isDisposed;
679 
680  public ParallelTermDocs(ParallelReader enclosingInstance)
681  {
682  InitBlock(enclosingInstance);
683  }
684  public ParallelTermDocs(ParallelReader enclosingInstance, Term term)
685  {
686  InitBlock(enclosingInstance);
687  if(term == null)
688  termDocs = (Enclosing_Instance.readers.Count == 0)
689  ? null
690  : Enclosing_Instance.readers[0].TermDocs(null);
691  else
692  Seek(term);
693  }
694 
695  public virtual int Doc
696  {
697  get { return termDocs.Doc; }
698  }
699 
700  public virtual int Freq
701  {
702  get { return termDocs.Freq; }
703  }
704 
705  public virtual void Seek(Term term)
706  {
707  IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];
708  termDocs = reader != null?reader.TermDocs(term):null;
709  }
710 
711  public virtual void Seek(TermEnum termEnum)
712  {
713  Seek(termEnum.Term);
714  }
715 
716  public virtual bool Next()
717  {
718  if (termDocs == null)
719  return false;
720 
721  return termDocs.Next();
722  }
723 
724  public virtual int Read(int[] docs, int[] freqs)
725  {
726  if (termDocs == null)
727  return 0;
728 
729  return termDocs.Read(docs, freqs);
730  }
731 
732  public virtual bool SkipTo(int target)
733  {
734  if (termDocs == null)
735  return false;
736 
737  return termDocs.SkipTo(target);
738  }
739 
740  [Obsolete("Use Dispose() instead")]
741  public virtual void Close()
742  {
743  Dispose();
744  }
745 
746  public void Dispose()
747  {
748  Dispose(true);
749  }
750 
751  protected virtual void Dispose(bool disposing)
752  {
753  if (isDisposed) return;
754 
755  if (disposing)
756  {
757  if (termDocs != null)
758  termDocs.Close();
759  }
760 
761  isDisposed = true;
762  }
763  }
764 
765  private class ParallelTermPositions:ParallelTermDocs, TermPositions
766  {
767  private void InitBlock(ParallelReader enclosingInstance)
768  {
769  this.enclosingInstance = enclosingInstance;
770  }
771  private ParallelReader enclosingInstance;
772  public new ParallelReader Enclosing_Instance
773  {
774  get
775  {
776  return enclosingInstance;
777  }
778 
779  }
780 
781  public ParallelTermPositions(ParallelReader enclosingInstance):base(enclosingInstance)
782  {
783  InitBlock(enclosingInstance);
784  }
785  public ParallelTermPositions(ParallelReader enclosingInstance, Term term):base(enclosingInstance)
786  {
787  InitBlock(enclosingInstance);
788  Seek(term);
789  }
790 
791  public override void Seek(Term term)
792  {
793  IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];
794  termDocs = reader != null?reader.TermPositions(term):null;
795  }
796 
797  public virtual int NextPosition()
798  {
799  // It is an error to call this if there is no next position, e.g. if termDocs==null
800  return ((TermPositions) termDocs).NextPosition();
801  }
802 
803  public virtual int PayloadLength
804  {
805  get { return ((TermPositions) termDocs).PayloadLength; }
806  }
807 
808  public virtual byte[] GetPayload(byte[] data, int offset)
809  {
810  return ((TermPositions) termDocs).GetPayload(data, offset);
811  }
812 
813 
814  // TODO: Remove warning after API has been finalized
815 
816  public virtual bool IsPayloadAvailable
817  {
818  get { return ((TermPositions) termDocs).IsPayloadAvailable; }
819  }
820  }
821  }
822 }