Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
IndexWriter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using Lucene.Net.Support;
22 using Analyzer = Lucene.Net.Analysis.Analyzer;
23 using Document = Lucene.Net.Documents.Document;
24 using IndexingChain = Lucene.Net.Index.DocumentsWriter.IndexingChain;
25 using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
26 using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
27 using Directory = Lucene.Net.Store.Directory;
28 using Lock = Lucene.Net.Store.Lock;
29 using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException;
30 using Constants = Lucene.Net.Util.Constants;
31 using Query = Lucene.Net.Search.Query;
32 using Similarity = Lucene.Net.Search.Similarity;
33 
34 namespace Lucene.Net.Index
35 {
36 
37  /// <summary>An <c>IndexWriter</c> creates and maintains an index.
38  /// <p/>The <c>create</c> argument to the
39  /// <see cref="IndexWriter(Directory, Analyzer, bool, MaxFieldLength)">constructor</see> determines
40  /// whether a new index is created, or whether an existing index is
41  /// opened. Note that you can open an index with <c>create=true</c>
42  /// even while readers are using the index. The old readers will
43  /// continue to search the "point in time" snapshot they had opened,
44  /// and won't see the newly created index until they re-open. There are
45  /// also <see cref="IndexWriter(Directory, Analyzer, MaxFieldLength)">constructors</see>
46  /// with no <c>create</c> argument which will create a new index
47  /// if there is not already an index at the provided path and otherwise
48  /// open the existing index.<p/>
49  /// <p/>In either case, documents are added with <see cref="AddDocument(Document)" />
50  /// and removed with <see cref="DeleteDocuments(Term)" /> or
51  /// <see cref="DeleteDocuments(Query)" />. A document can be updated with
52  /// <see cref="UpdateDocument(Term, Document)" /> (which just deletes
53  /// and then adds the entire document). When finished adding, deleting
54  /// and updating documents, <see cref="Close()" /> should be called.<p/>
55  /// <a name="flush"></a>
56  /// <p/>These changes are buffered in memory and periodically
57  /// flushed to the <see cref="Directory" /> (during the above method
58  /// calls). A flush is triggered when there are enough
59  /// buffered deletes (see <see cref="SetMaxBufferedDeleteTerms" />)
60  /// or enough added documents since the last flush, whichever
61  /// is sooner. For the added documents, flushing is triggered
62  /// either by RAM usage of the documents (see
63  /// <see cref="SetRAMBufferSizeMB" />) or the number of added documents.
64  /// The default is to flush when RAM usage hits 16 MB. For
65  /// best indexing speed you should flush by RAM usage with a
66  /// large RAM buffer. Note that flushing just moves the
67  /// internal buffered state in IndexWriter into the index, but
68  /// these changes are not visible to IndexReader until either
69  /// <see cref="Commit()" /> or <see cref="Close()" /> is called. A flush may
70  /// also trigger one or more segment merges which by default
71  /// run with a background thread so as not to block the
72  /// addDocument calls (see <a href="#mergePolicy">below</a>
73  /// for changing the <see cref="MergeScheduler" />).
74  /// <p/>
75  /// If an index will not have more documents added for a while and optimal search
76  /// performance is desired, then either the full <see cref="Optimize()" />
77  /// method or partial <see cref="Optimize(int)" /> method should be
78  /// called before the index is closed.
79  /// <p/>
80  /// Opening an <c>IndexWriter</c> creates a lock file for the directory in use. Trying to open
81  /// another <c>IndexWriter</c> on the same directory will lead to a
82  /// <see cref="LockObtainFailedException" />. The <see cref="LockObtainFailedException" />
83  /// is also thrown if an IndexReader on the same directory is used to delete documents
84  /// from the index.<p/>
85  /// </summary>
86  /// <summary><a name="deletionPolicy"></a>
87  /// <p/>Expert: <c>IndexWriter</c> allows an optional
88  /// <see cref="IndexDeletionPolicy" /> implementation to be
89  /// specified. You can use this to control when prior commits
90  /// are deleted from the index. The default policy is <see cref="KeepOnlyLastCommitDeletionPolicy" />
91  /// which removes all prior
92  /// commits as soon as a new commit is done (this matches
93  /// behavior before 2.2). Creating your own policy can allow
94  /// you to explicitly keep previous "point in time" commits
95  /// alive in the index for some time, to allow readers to
96  /// refresh to the new commit without having the old commit
97  /// deleted out from under them. This is necessary on
98  /// filesystems like NFS that do not support "delete on last
99  /// close" semantics, which Lucene's "point in time" search
100  /// normally relies on. <p/>
101  /// <a name="mergePolicy"></a> <p/>Expert:
102  /// <c>IndexWriter</c> allows you to separately change
103  /// the <see cref="MergePolicy" /> and the <see cref="MergeScheduler" />.
104  /// The <see cref="MergePolicy" /> is invoked whenever there are
105  /// changes to the segments in the index. Its role is to
106  /// select which merges to do, if any, and return a <see cref="Index.MergePolicy.MergeSpecification" />
107  /// describing the merges. It
108  /// also selects merges to do for optimize(). (The default is
109  /// <see cref="LogByteSizeMergePolicy" />. Then, the <see cref="MergeScheduler" />
110  /// is invoked with the requested merges and
111  /// it decides when and how to run the merges. The default is
112  /// <see cref="ConcurrentMergeScheduler" />. <p/>
113  /// <a name="OOME"></a><p/><b>NOTE</b>: if you hit an
114  /// OutOfMemoryError then IndexWriter will quietly record this
115  /// fact and block all future segment commits. This is a
116  /// defensive measure in case any internal state (buffered
117  /// documents and deletions) were corrupted. Any subsequent
118  /// calls to <see cref="Commit()" /> will throw an
119  /// IllegalStateException. The only course of action is to
120  /// call <see cref="Close()" />, which internally will call <see cref="Rollback()" />
121  ///, to undo any changes to the index since the
122  /// last commit. You can also just call <see cref="Rollback()" />
123  /// directly.<p/>
124  /// <a name="thread-safety"></a><p/><b>NOTE</b>:
125  /// <see cref="IndexWriter" /> instances are completely thread
126  /// safe, meaning multiple threads can call any of its
127  /// methods, concurrently. If your application requires
128  /// external synchronization, you should <b>not</b>
129  /// synchronize on the <c>IndexWriter</c> instance as
130  /// this may cause deadlock; use your own (non-Lucene) objects
131  /// instead. <p/>
132  /// <b>NOTE:</b> if you call
133  /// <c>Thread.Interrupt()</c> on a thread that's within
134  /// IndexWriter, IndexWriter will try to catch this (eg, if
135  /// it's in a Wait() or Thread.Sleep()), and will then throw
136  /// the unchecked exception <see cref="System.Threading.ThreadInterruptedException"/>
137  /// and <b>clear</b> the interrupt status on the thread<p/>
138  /// </summary>
139 
140  /*
141  * Clarification: Check Points (and commits)
142  * IndexWriter writes new index files to the directory without writing a new segments_N
143  * file which references these new files. It also means that the state of
144  * the in memory SegmentInfos object is different than the most recent
145  * segments_N file written to the directory.
146  *
147  * Each time the SegmentInfos is changed, and matches the (possibly
148  * modified) directory files, we have a new "check point".
149  * If the modified/new SegmentInfos is written to disk - as a new
150  * (generation of) segments_N file - this check point is also an
151  * IndexCommit.
152  *
153  * A new checkpoint always replaces the previous checkpoint and
154  * becomes the new "front" of the index. This allows the IndexFileDeleter
155  * to delete files that are referenced only by stale checkpoints.
156  * (files that were created since the last commit, but are no longer
157  * referenced by the "front" of the index). For this, IndexFileDeleter
158  * keeps track of the last non commit checkpoint.
159  */
160  public class IndexWriter : System.IDisposable
161  {
162  private void InitBlock()
163  {
164  similarity = Search.Similarity.Default;
165  mergePolicy = new LogByteSizeMergePolicy(this);
166  readerPool = new ReaderPool(this);
167  }
168 
169  /// <summary> Default value for the write lock timeout (1,000).</summary>
170  /// <seealso cref="DefaultWriteLockTimeout">
171  /// </seealso>
172  public static long WRITE_LOCK_TIMEOUT = 1000;
173 
174  private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
175 
176  /// <summary> Name of the write lock in the index.</summary>
177  public const System.String WRITE_LOCK_NAME = "write.lock";
178 
179  /// <summary> Value to denote a flush trigger is disabled</summary>
180  public const int DISABLE_AUTO_FLUSH = - 1;
181 
182  /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
183  /// by default). Change using <see cref="SetMaxBufferedDocs(int)" />.
184  /// </summary>
185  public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
186 
187  /// <summary> Default value is 16 MB (which means flush when buffered
188  /// docs consume 16 MB RAM). Change using <see cref="SetRAMBufferSizeMB" />.
189  /// </summary>
190  public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
191 
192  /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
193  /// by default). Change using <see cref="SetMaxBufferedDeleteTerms(int)" />.
194  /// </summary>
195  public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
196 
197  /// <summary> Default value is 10,000. Change using <see cref="SetMaxFieldLength(int)" />.</summary>
198  public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
199 
200  /// <summary> Default value is 128. Change using <see cref="TermIndexInterval" />.</summary>
201  public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
202 
203  /// <summary> Absolute hard maximum length for a term. If a term
204  /// arrives from the analyzer longer than this length, it
205  /// is skipped and a message is printed to infoStream, if
206  /// set (see <see cref="SetInfoStream" />).
207  /// </summary>
208  public static readonly int MAX_TERM_LENGTH;
209 
210  // The normal read buffer size defaults to 1024, but
211  // increasing this during merging seems to yield
212  // performance gains. However we don't want to increase
213  // it too much because there are quite a few
214  // BufferedIndexInputs created during merging. See
215  // LUCENE-888 for details.
216  private const int MERGE_READ_BUFFER_SIZE = 4096;
217 
218  // Used for printing messages
219  private static System.Object MESSAGE_ID_LOCK = new System.Object();
220  private static int MESSAGE_ID = 0;
221  private int messageID = - 1;
222  private volatile bool hitOOM;
223 
224  private Directory directory; // where this index resides
225  private Analyzer analyzer; // how to analyze text
226 
227  private Similarity similarity; // how to normalize
228 
229  private volatile uint changeCount; // increments every time a change is completed
230  private long lastCommitChangeCount; // last changeCount that was committed
231 
232  private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
233  private HashMap<SegmentInfo, int?> rollbackSegments;
234 
235  internal volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
236  internal volatile uint pendingCommitChangeCount;
237 
238  private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
239  private int localFlushedDocCount; // saved docWriter.getFlushedDocCount during local transaction
240 
241  private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
242  private int optimizeMaxNumSegments;
243 
244  private DocumentsWriter docWriter;
245  private IndexFileDeleter deleter;
246 
247  private ISet<SegmentInfo> segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<SegmentInfo>(); // used by optimize to note those needing optimization
248 
249  private Lock writeLock;
250 
251  private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
252 
253  private bool closed;
254  private bool closing;
255 
256  // Holds all SegmentInfo instances currently involved in
257  // merges
258  private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
259 
260  private MergePolicy mergePolicy;
261  private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
262  private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
263  private ISet<MergePolicy.OneMerge> runningMerges = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<MergePolicy.OneMerge>();
264  private IList<MergePolicy.OneMerge> mergeExceptions = new List<MergePolicy.OneMerge>();
265  private long mergeGen;
266  private bool stopMerges;
267 
268  private int flushCount;
269  private int flushDeletesCount;
270 
271  // Used to only allow one addIndexes to proceed at once
272  // TODO: use ReadWriteLock once we are on 5.0
273  private int readCount; // count of how many threads are holding read lock
274  private ThreadClass writeThread; // non-null if any thread holds write lock
275  internal ReaderPool readerPool;
276  private int upgradeCount;
277 
278  private int readerTermsIndexDivisor = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;
279 
280  // This is a "write once" variable (like the organic dye
281  // on a DVD-R that may or may not be heated by a laser and
282  // then cooled to permanently record the event): it's
283  // false, until getReader() is called for the first time,
284  // at which point it's switched to true and never changes
285  // back to false. Once this is true, we hold open and
286  // reuse SegmentReader instances internally for applying
287  // deletes, doing merges, and reopening near real-time
288  // readers.
289  private volatile bool poolReaders;
290 
291  /// <summary> Expert: returns a readonly reader, covering all committed as well as
292  /// un-committed changes to the index. This provides "near real-time"
293  /// searching, in that changes made during an IndexWriter session can be
294  /// quickly made available for searching without closing the writer nor
295  /// calling <see cref="Commit()" />.
296  ///
297  /// <p/>
298  /// Note that this is functionally equivalent to calling {#commit} and then
299  /// using <see cref="IndexReader.Open(Lucene.Net.Store.Directory, bool)" /> to open a new reader. But the turarnound
300  /// time of this method should be faster since it avoids the potentially
301  /// costly <see cref="Commit()" />.
302  /// <p/>
303  ///
304  /// You must close the <see cref="IndexReader" /> returned by this method once you are done using it.
305  ///
306  /// <p/>
307  /// It's <i>near</i> real-time because there is no hard
308  /// guarantee on how quickly you can get a new reader after
309  /// making changes with IndexWriter. You'll have to
310  /// experiment in your situation to determine if it's
311  /// faster enough. As this is a new and experimental
312  /// feature, please report back on your findings so we can
313  /// learn, improve and iterate.<p/>
314  ///
315  /// <p/>The resulting reader suppports <see cref="IndexReader.Reopen()" />
316  ///, but that call will simply forward
317  /// back to this method (though this may change in the
318  /// future).<p/>
319  ///
320  /// <p/>The very first time this method is called, this
321  /// writer instance will make every effort to pool the
322  /// readers that it opens for doing merges, applying
323  /// deletes, etc. This means additional resources (RAM,
324  /// file descriptors, CPU time) will be consumed.<p/>
325  ///
326  /// <p/>For lower latency on reopening a reader, you should call <see cref="MergedSegmentWarmer" />
327  /// to call <see cref="MergedSegmentWarmer" /> to
328  /// pre-warm a newly merged segment before it's committed
329  /// to the index. This is important for minimizing index-to-search
330  /// delay after a large merge.
331  ///
332  /// <p/>If an addIndexes* call is running in another thread,
333  /// then this reader will only search those segments from
334  /// the foreign index that have been successfully copied
335  /// over, so far<p/>.
336  ///
337  /// <p/><b>NOTE</b>: Once the writer is closed, any
338  /// outstanding readers may continue to be used. However,
339  /// if you attempt to reopen any of those readers, you'll
340  /// hit an <see cref="AlreadyClosedException" />.<p/>
341  ///
342  /// <p/><b>NOTE:</b> This API is experimental and might
343  /// change in incompatible ways in the next release.<p/>
344  ///
345  /// </summary>
346  /// <returns> IndexReader that covers entire index plus all
347  /// changes made so far by this IndexWriter instance
348  ///
349  /// </returns>
350  /// <throws> IOException </throws>
351  public virtual IndexReader GetReader()
352  {
353  return GetReader(readerTermsIndexDivisor);
354  }
355 
356  /// <summary>Expert: like <see cref="GetReader()" />, except you can
357  /// specify which termInfosIndexDivisor should be used for
358  /// any newly opened readers.
359  /// </summary>
360  /// <param name="termInfosIndexDivisor">Subsambles which indexed
361  /// terms are loaded into RAM. This has the same effect as <see cref="IndexWriter.TermIndexInterval" />
362  /// except that setting
363  /// must be done at indexing time while this setting can be
364  /// set per reader. When set to N, then one in every
365  /// N*termIndexInterval terms in the index is loaded into
366  /// memory. By setting this to a value > 1 you can reduce
367  /// memory usage, at the expense of higher latency when
368  /// loading a TermInfo. The default value is 1. Set this
369  /// to -1 to skip loading the terms index entirely.
370  /// </param>
371  public virtual IndexReader GetReader(int termInfosIndexDivisor)
372  {
373  EnsureOpen();
374 
375  if (infoStream != null)
376  {
377  Message("flush at getReader");
378  }
379 
380  // Do this up front before flushing so that the readers
381  // obtained during this flush are pooled, the first time
382  // this method is called:
383  poolReaders = true;
384 
385  // Prevent segmentInfos from changing while opening the
386  // reader; in theory we could do similar retry logic,
387  // just like we do when loading segments_N
388  IndexReader r;
389  lock (this)
390  {
391  Flush(false, true, true);
392  r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor);
393  }
394  MaybeMerge();
395  return r;
396  }
397 
398  /// <summary>Holds shared SegmentReader instances. IndexWriter uses
399  /// SegmentReaders for 1) applying deletes, 2) doing
400  /// merges, 3) handing out a real-time reader. This pool
401  /// reuses instances of the SegmentReaders in all these
402  /// places if it is in "near real-time mode" (getReader()
403  /// has been called on this instance).
404  /// </summary>
405 
406  internal class ReaderPool : IDisposable
407  {
408  public ReaderPool(IndexWriter enclosingInstance)
409  {
410  InitBlock(enclosingInstance);
411  }
412  private void InitBlock(IndexWriter enclosingInstance)
413  {
414  this.enclosingInstance = enclosingInstance;
415  }
416  private IndexWriter enclosingInstance;
417  public IndexWriter Enclosing_Instance
418  {
419  get
420  {
421  return enclosingInstance;
422  }
423 
424  }
425 
426  private IDictionary<SegmentInfo, SegmentReader> readerMap = new HashMap<SegmentInfo, SegmentReader>();
427 
428  /// <summary>Forcefully clear changes for the specifed segments,
429  /// and remove from the pool. This is called on succesful merge.
430  /// </summary>
431  internal virtual void Clear(SegmentInfos infos)
432  {
433  lock (this)
434  {
435  if (infos == null)
436  {
437  foreach(KeyValuePair<SegmentInfo, SegmentReader> ent in readerMap)
438  {
439  ent.Value.hasChanges = false;
440  }
441  }
442  else
443  {
444  foreach(SegmentInfo info in infos)
445  {
446  if (readerMap.ContainsKey(info))
447  {
448  readerMap[info].hasChanges = false;
449  }
450  }
451  }
452  }
453  }
454 
455  // used only by asserts
456  public virtual bool InfoIsLive(SegmentInfo info)
457  {
458  lock (this)
459  {
460  int idx = Enclosing_Instance.segmentInfos.IndexOf(info);
461  System.Diagnostics.Debug.Assert(idx != -1);
462  System.Diagnostics.Debug.Assert(Enclosing_Instance.segmentInfos[idx] == info);
463  return true;
464  }
465  }
466 
467  public virtual SegmentInfo MapToLive(SegmentInfo info)
468  {
469  lock (this)
470  {
471  int idx = Enclosing_Instance.segmentInfos.IndexOf(info);
472  if (idx != - 1)
473  {
474  info = Enclosing_Instance.segmentInfos[idx];
475  }
476  return info;
477  }
478  }
479 
480  /// <summary> Release the segment reader (i.e. decRef it and close if there
481  /// are no more references.
482  /// </summary>
483  /// <param name="sr">
484  /// </param>
485  /// <throws> IOException </throws>
486  public virtual void Release(SegmentReader sr)
487  {
488  lock (this)
489  {
490  Release(sr, false);
491  }
492  }
493 
494  /// <summary> Release the segment reader (i.e. decRef it and close if there
495  /// are no more references.
496  /// </summary>
497  /// <param name="sr">
498  /// </param>
499  /// <param name="drop"></param>
500  /// <throws> IOException </throws>
501  public virtual void Release(SegmentReader sr, bool drop)
502  {
503  lock (this)
504  {
505 
506  bool pooled = readerMap.ContainsKey(sr.SegmentInfo);
507 
508  System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.SegmentInfo] == sr);
509 
510  // Drop caller's ref; for an external reader (not
511  // pooled), this decRef will close it
512  sr.DecRef();
513 
514  if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.RefCount == 1)))
515  {
516 
517  // We invoke deleter.checkpoint below, so we must be
518  // sync'd on IW if there are changes:
519 
520  // TODO: Java 1.5 has this, .NET can't.
521  // System.Diagnostics.Debug.Assert(!sr.hasChanges || Thread.holdsLock(enclosingInstance));
522 
523  // Discard (don't save) changes when we are dropping
524  // the reader; this is used only on the sub-readers
525  // after a successful merge.
526  sr.hasChanges &= !drop;
527 
528  bool hasChanges = sr.hasChanges;
529 
530  // Drop our ref -- this will commit any pending
531  // changes to the dir
532  sr.Close();
533 
534  // We are the last ref to this reader; since we're
535  // not pooling readers, we release it:
536  readerMap.Remove(sr.SegmentInfo);
537 
538  if (hasChanges)
539  {
540  // Must checkpoint w/ deleter, because this
541  // segment reader will have created new _X_N.del
542  // file.
543  enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
544  }
545  }
546  }
547  }
548 
549  /// <summary>Remove all our references to readers, and commits
550  /// any pending changes.
551  /// </summary>
552  public void Dispose()
553  {
554  Dispose(true);
555  }
556 
557  protected void Dispose(bool disposing)
558  {
559  if (disposing)
560  {
561  // We invoke deleter.checkpoint below, so we must be
562  // sync'd on IW:
563  // TODO: assert Thread.holdsLock(IndexWriter.this);
564  // TODO: Should this class have bool _isDisposed?
565  lock (this)
566  {
567  //var toRemove = new List<SegmentInfo>();
568  foreach (var ent in readerMap)
569  {
570  SegmentReader sr = ent.Value;
571  if (sr.hasChanges)
572  {
573  System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo));
574  sr.DoCommit(null);
575  // Must checkpoint w/ deleter, because this
576  // segment reader will have created new _X_N.del
577  // file.
578  enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
579  }
580 
581  //toRemove.Add(ent.Key);
582 
583  // NOTE: it is allowed that this decRef does not
584  // actually close the SR; this can happen when a
585  // near real-time reader is kept open after the
586  // IndexWriter instance is closed
587  sr.DecRef();
588  }
589 
590  //foreach (var key in toRemove)
591  // readerMap.Remove(key);
592  readerMap.Clear();
593  }
594  }
595  }
596 
597  /// <summary> Commit all segment reader in the pool.</summary>
598  /// <throws> IOException </throws>
599  internal virtual void Commit()
600  {
601  // We invoke deleter.checkpoint below, so we must be
602  // sync'd on IW:
603  // TODO: assert Thread.holdsLock(IndexWriter.this);
604  lock (this)
605  {
606  foreach(KeyValuePair<SegmentInfo,SegmentReader> ent in readerMap)
607  {
608  SegmentReader sr = ent.Value;
609  if (sr.hasChanges)
610  {
611  System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo));
612  sr.DoCommit(null);
613  // Must checkpoint w/ deleter, because this
614  // segment reader will have created new _X_N.del
615  // file.
616  enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false);
617  }
618  }
619  }
620  }
621 
622  /// <summary> Returns a ref to a clone. NOTE: this clone is not
623  /// enrolled in the pool, so you should simply close()
624  /// it when you're done (ie, do not call release()).
625  /// </summary>
626  public virtual SegmentReader GetReadOnlyClone(SegmentInfo info, bool doOpenStores, int termInfosIndexDivisor)
627  {
628  lock (this)
629  {
630  SegmentReader sr = Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
631  try
632  {
633  return (SegmentReader) sr.Clone(true);
634  }
635  finally
636  {
637  sr.DecRef();
638  }
639  }
640  }
641 
642  /// <summary> Obtain a SegmentReader from the readerPool. The reader
643  /// must be returned by calling <see cref="Release(SegmentReader)" />
644  /// </summary>
645  /// <seealso cref="Release(SegmentReader)">
646  /// </seealso>
647  /// <param name="info">
648  /// </param>
649  /// <param name="doOpenStores">
650  /// </param>
651  /// <throws> IOException </throws>
652  public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores)
653  {
654  lock (this)
655  {
656  return Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, enclosingInstance.readerTermsIndexDivisor);
657  }
658  }
659  /// <summary> Obtain a SegmentReader from the readerPool. The reader
660  /// must be returned by calling <see cref="Release(SegmentReader)" />
661  ///
662  /// </summary>
663  /// <seealso cref="Release(SegmentReader)">
664  /// </seealso>
665  /// <param name="info">
666  /// </param>
667  /// <param name="doOpenStores">
668  /// </param>
669  /// <param name="readBufferSize">
670  /// </param>
671  /// <param name="termsIndexDivisor">
672  /// </param>
673  /// <throws> IOException </throws>
674  public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor)
675  {
676  lock (this)
677  {
678  if (Enclosing_Instance.poolReaders)
679  {
680  readBufferSize = BufferedIndexInput.BUFFER_SIZE;
681  }
682 
683  SegmentReader sr = readerMap[info];
684  if (sr == null)
685  {
686  // TODO: we may want to avoid doing this while
687  // synchronized
688  // Returns a ref, which we xfer to readerMap:
689  sr = SegmentReader.Get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
690  if (info.dir == enclosingInstance.directory)
691  {
692  // Only pool if reader is not external
693  readerMap[info]=sr;
694  }
695  }
696  else
697  {
698  if (doOpenStores)
699  {
700  sr.OpenDocStores();
701  }
702  if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded())
703  {
704  // If this reader was originally opened because we
705  // needed to merge it, we didn't load the terms
706  // index. But now, if the caller wants the terms
707  // index (eg because it's doing deletes, or an NRT
708  // reader is being opened) we ask the reader to
709  // load its terms index.
710  sr.LoadTermsIndex(termsIndexDivisor);
711  }
712  }
713 
714  // Return a ref to our caller
715  if (info.dir == enclosingInstance.directory)
716  {
717  // Only incRef if we pooled (reader is not external)
718  sr.IncRef();
719  }
720  return sr;
721  }
722  }
723 
724  // Returns a ref
725  public virtual SegmentReader GetIfExists(SegmentInfo info)
726  {
727  lock (this)
728  {
729  SegmentReader sr = readerMap[info];
730  if (sr != null)
731  {
732  sr.IncRef();
733  }
734  return sr;
735  }
736  }
737  }
738 
739  /// <summary> Obtain the number of deleted docs for a pooled reader.
740  /// If the reader isn't being pooled, the segmentInfo's
741  /// delCount is returned.
742  /// </summary>
743  public virtual int NumDeletedDocs(SegmentInfo info)
744  {
745  SegmentReader reader = readerPool.GetIfExists(info);
746  try
747  {
748  if (reader != null)
749  {
750  return reader.NumDeletedDocs;
751  }
752  else
753  {
754  return info.GetDelCount();
755  }
756  }
757  finally
758  {
759  if (reader != null)
760  {
761  readerPool.Release(reader);
762  }
763  }
764  }
765 
766  internal virtual void AcquireWrite()
767  {
768  lock (this)
769  {
770  System.Diagnostics.Debug.Assert(writeThread != ThreadClass.Current());
771  while (writeThread != null || readCount > 0)
772  DoWait();
773 
774  // We could have been closed while we were waiting:
775  EnsureOpen();
776 
777  writeThread = ThreadClass.Current();
778  }
779  }
780 
781  internal virtual void ReleaseWrite()
782  {
783  lock (this)
784  {
785  System.Diagnostics.Debug.Assert(ThreadClass.Current() == writeThread);
786  writeThread = null;
787  System.Threading.Monitor.PulseAll(this);
788  }
789  }
790 
791  internal virtual void AcquireRead()
792  {
793  lock (this)
794  {
795  ThreadClass current = ThreadClass.Current();
796  while (writeThread != null && writeThread != current)
797  DoWait();
798 
799  readCount++;
800  }
801  }
802 
803  // Allows one readLock to upgrade to a writeLock even if
804  // there are other readLocks as long as all other
805  // readLocks are also blocked in this method:
806  internal virtual void UpgradeReadToWrite()
807  {
808  lock (this)
809  {
810  System.Diagnostics.Debug.Assert(readCount > 0);
811  upgradeCount++;
812  while (readCount > upgradeCount || writeThread != null)
813  {
814  DoWait();
815  }
816 
817  writeThread = ThreadClass.Current();
818  readCount--;
819  upgradeCount--;
820  }
821  }
822 
823  internal virtual void ReleaseRead()
824  {
825  lock (this)
826  {
827  readCount--;
828  System.Diagnostics.Debug.Assert(readCount >= 0);
829  System.Threading.Monitor.PulseAll(this);
830  }
831  }
832 
833  internal bool IsOpen(bool includePendingClose)
834  {
835  lock (this)
836  {
837  return !(closed || (includePendingClose && closing));
838  }
839  }
840 
841  /// <summary> Used internally to throw an <see cref="AlreadyClosedException" />
842  /// if this IndexWriter has been
843  /// closed.
844  /// </summary>
845  /// <throws> AlreadyClosedException if this IndexWriter is </throws>
846  protected internal void EnsureOpen(bool includePendingClose)
847  {
848  lock (this)
849  {
850  if (!IsOpen(includePendingClose))
851  {
852  throw new AlreadyClosedException("this IndexWriter is closed");
853  }
854  }
855  }
856 
857  protected internal void EnsureOpen()
858  {
859  lock (this)
860  {
861  EnsureOpen(true);
862  }
863  }
864 
865  /// <summary> Prints a message to the infoStream (if non-null),
866  /// prefixed with the identifying information for this
867  /// writer and the thread that's calling it.
868  /// </summary>
869  public virtual void Message(System.String message)
870  {
871  if (infoStream != null)
872  infoStream.WriteLine("IW " + messageID + " [" + DateTime.Now.ToString() + "; " + ThreadClass.Current().Name + "]: " + message);
873  }
874 
875  private void SetMessageID(System.IO.StreamWriter infoStream)
876  {
877  lock (this)
878  {
879  if (infoStream != null && messageID == - 1)
880  {
881  lock (MESSAGE_ID_LOCK)
882  {
883  messageID = MESSAGE_ID++;
884  }
885  }
886  this.infoStream = infoStream;
887  }
888  }
889 
890  /// <summary> Casts current mergePolicy to LogMergePolicy, and throws
891  /// an exception if the mergePolicy is not a LogMergePolicy.
892  /// </summary>
893  private LogMergePolicy LogMergePolicy
894  {
895  get
896  {
897  if (mergePolicy is LogMergePolicy)
898  return (LogMergePolicy) mergePolicy;
899 
900  throw new System.ArgumentException(
901  "this method can only be called when the merge policy is the default LogMergePolicy");
902  }
903  }
904 
905  /// <summary><p/>Gets or sets the current setting of whether newly flushed
906  /// segments will use the compound file format. Note that
907  /// this just returns the value previously set with
908  /// setUseCompoundFile(boolean), or the default value
909  /// (true). You cannot use this to query the status of
910  /// previously flushed segments.<p/>
911  ///
912  /// <p/>Note that this method is a convenience method: it
913  /// just calls mergePolicy.getUseCompoundFile as long as
914  /// mergePolicy is an instance of <see cref="LogMergePolicy" />.
915  /// Otherwise an IllegalArgumentException is thrown.<p/>
916  ///
917  /// </summary>
918  public virtual bool UseCompoundFile
919  {
920  get { return LogMergePolicy.GetUseCompoundFile(); }
921  set
922  {
923  LogMergePolicy.SetUseCompoundFile(value);
924  LogMergePolicy.SetUseCompoundDocStore(value);
925  }
926  }
927 
928  /// <summary>Expert: Set the Similarity implementation used by this IndexWriter.
929  /// </summary>
930  public virtual void SetSimilarity(Similarity similarity)
931  {
932  EnsureOpen();
933  this.similarity = similarity;
934  docWriter.SetSimilarity(similarity);
935  }
936 
937  /// <summary>Expert: Return the Similarity implementation used by this IndexWriter.
938  ///
939  /// <p/>This defaults to the current value of <see cref="Search.Similarity.Default" />.
940  /// </summary>
941  public virtual Similarity Similarity
942  {
943  get
944  {
945  EnsureOpen();
946  return this.similarity;
947  }
948  }
949 
950 
951  /// <summary>Expert: Gets or sets the interval between indexed terms. Large values cause less
952  /// memory to be used by IndexReader, but slow random-access to terms. Small
953  /// values cause more memory to be used by an IndexReader, and speed
954  /// random-access to terms.
955  ///
956  /// This parameter determines the amount of computation required per query
957  /// term, regardless of the number of documents that contain that term. In
958  /// particular, it is the maximum number of other terms that must be
959  /// scanned before a term is located and its frequency and position information
960  /// may be processed. In a large index with user-entered query terms, query
961  /// processing time is likely to be dominated not by term lookup but rather
962  /// by the processing of frequency and positional data. In a small index
963  /// or when many uncommon query terms are generated (e.g., by wildcard
964  /// queries) term lookup may become a dominant cost.
965  ///
966  /// In particular, <c>numUniqueTerms/interval</c> terms are read into
967  /// memory by an IndexReader, and, on average, <c>interval/2</c> terms
968  /// must be scanned for each random term access.
969  ///
970  /// </summary>
971  /// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL">
972  /// </seealso>
973  public virtual int TermIndexInterval
974  {
975  get
976  {
977  // We pass false because this method is called by SegmentMerger while we are in the process of closing
978  EnsureOpen(false);
979  return termIndexInterval;
980  }
981  set
982  {
983  EnsureOpen();
984  this.termIndexInterval = value;
985  }
986  }
987 
988  /// <summary> Constructs an IndexWriter for the index in <c>d</c>.
989  /// Text will be analyzed with <c>a</c>. If <c>create</c>
990  /// is true, then a new, empty index will be created in
991  /// <c>d</c>, replacing the index already there, if any.
992  ///
993  /// </summary>
994  /// <param name="d">the index directory
995  /// </param>
996  /// <param name="a">the analyzer to use
997  /// </param>
998  /// <param name="create"><c>true</c> to create the index or overwrite
999  /// the existing one; <c>false</c> to append to the existing
1000  /// index
1001  /// </param>
1002  /// <param name="mfl">Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
1003  /// via the MaxFieldLength constructor.
1004  /// </param>
1005  /// <throws> CorruptIndexException if the index is corrupt </throws>
1006  /// <throws> LockObtainFailedException if another writer </throws>
1007  /// <summary> has this index open (<c>write.lock</c> could not
1008  /// be obtained)
1009  /// </summary>
1010  /// <throws> IOException if the directory cannot be read/written to, or </throws>
1011  /// <summary> if it does not exist and <c>create</c> is
1012  /// <c>false</c> or if there is any other low-level
1013  /// IO error
1014  /// </summary>
1015  public IndexWriter(Directory d, Analyzer a, bool create, MaxFieldLength mfl)
1016  {
1017  InitBlock();
1018  Init(d, a, create, null, mfl.Limit, null, null);
1019  }
1020 
1021  /// <summary> Constructs an IndexWriter for the index in
1022  /// <c>d</c>, first creating it if it does not
1023  /// already exist.
1024  ///
1025  /// </summary>
1026  /// <param name="d">the index directory
1027  /// </param>
1028  /// <param name="a">the analyzer to use
1029  /// </param>
1030  /// <param name="mfl">Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
1031  /// via the MaxFieldLength constructor.
1032  /// </param>
1033  /// <throws> CorruptIndexException if the index is corrupt </throws>
1034  /// <throws> LockObtainFailedException if another writer </throws>
1035  /// <summary> has this index open (<c>write.lock</c> could not
1036  /// be obtained)
1037  /// </summary>
1038  /// <throws> IOException if the directory cannot be </throws>
1039  /// <summary> read/written to or if there is any other low-level
1040  /// IO error
1041  /// </summary>
1043  {
1044  InitBlock();
1045  Init(d, a, null, mfl.Limit, null, null);
1046  }
1047 
1048  /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" />
1049  ///, for the index in <c>d</c>,
1050  /// first creating it if it does not already exist. Text
1051  /// will be analyzed with <c>a</c>.
1052  ///
1053  /// </summary>
1054  /// <param name="d">the index directory
1055  /// </param>
1056  /// <param name="a">the analyzer to use
1057  /// </param>
1058  /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
1059  /// </param>
1060  /// <param name="mfl">whether or not to limit field lengths
1061  /// </param>
1062  /// <throws> CorruptIndexException if the index is corrupt </throws>
1063  /// <throws> LockObtainFailedException if another writer </throws>
1064  /// <summary> has this index open (<c>write.lock</c> could not
1065  /// be obtained)
1066  /// </summary>
1067  /// <throws> IOException if the directory cannot be </throws>
1068  /// <summary> read/written to or if there is any other low-level
1069  /// IO error
1070  /// </summary>
1072  {
1073  InitBlock();
1074  Init(d, a, deletionPolicy, mfl.Limit, null, null);
1075  }
1076 
1077  /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" />
1078  ///, for the index in <c>d</c>.
1079  /// Text will be analyzed with <c>a</c>. If
1080  /// <c>create</c> is true, then a new, empty index
1081  /// will be created in <c>d</c>, replacing the index
1082  /// already there, if any.
1083  ///
1084  /// </summary>
1085  /// <param name="d">the index directory
1086  /// </param>
1087  /// <param name="a">the analyzer to use
1088  /// </param>
1089  /// <param name="create"><c>true</c> to create the index or overwrite
1090  /// the existing one; <c>false</c> to append to the existing
1091  /// index
1092  /// </param>
1093  /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
1094  /// </param>
1095  /// <param name="mfl"><see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />, whether or not to limit field lengths. Value is in number of terms/tokens
1096  /// </param>
1097  /// <throws> CorruptIndexException if the index is corrupt </throws>
1098  /// <throws> LockObtainFailedException if another writer </throws>
1099  /// <summary> has this index open (<c>write.lock</c> could not
1100  /// be obtained)
1101  /// </summary>
1102  /// <throws> IOException if the directory cannot be read/written to, or </throws>
1103  /// <summary> if it does not exist and <c>create</c> is
1104  /// <c>false</c> or if there is any other low-level
1105  /// IO error
1106  /// </summary>
1107  public IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
1108  {
1109  InitBlock();
1110  Init(d, a, create, deletionPolicy, mfl.Limit, null, null);
1111  }
1112 
1113  /// <summary> Expert: constructs an IndexWriter with a custom <see cref="IndexDeletionPolicy" />
1114  /// and <see cref="IndexingChain" />,
1115  /// for the index in <c>d</c>.
1116  /// Text will be analyzed with <c>a</c>. If
1117  /// <c>create</c> is true, then a new, empty index
1118  /// will be created in <c>d</c>, replacing the index
1119  /// already there, if any.
1120  ///
1121  /// </summary>
1122  /// <param name="d">the index directory
1123  /// </param>
1124  /// <param name="a">the analyzer to use
1125  /// </param>
1126  /// <param name="create"><c>true</c> to create the index or overwrite
1127  /// the existing one; <c>false</c> to append to the existing
1128  /// index
1129  /// </param>
1130  /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
1131  /// </param>
1132  /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />.
1133  /// </param>
1134  /// <param name="indexingChain">the <see cref="DocConsumer" /> chain to be used to
1135  /// process documents
1136  /// </param>
1137  /// <param name="commit">which commit to open
1138  /// </param>
1139  /// <throws> CorruptIndexException if the index is corrupt </throws>
1140  /// <throws> LockObtainFailedException if another writer </throws>
1141  /// <summary> has this index open (<c>write.lock</c> could not
1142  /// be obtained)
1143  /// </summary>
1144  /// <throws> IOException if the directory cannot be read/written to, or </throws>
1145  /// <summary> if it does not exist and <c>create</c> is
1146  /// <c>false</c> or if there is any other low-level
1147  /// IO error
1148  /// </summary>
1149  internal IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit)
1150  {
1151  InitBlock();
1152  Init(d, a, create, deletionPolicy, mfl.Limit, indexingChain, commit);
1153  }
1154 
1155  /// <summary> Expert: constructs an IndexWriter on specific commit
1156  /// point, with a custom <see cref="IndexDeletionPolicy" />, for
1157  /// the index in <c>d</c>. Text will be analyzed
1158  /// with <c>a</c>.
1159  ///
1160  /// <p/> This is only meaningful if you've used a <see cref="IndexDeletionPolicy" />
1161  /// in that past that keeps more than
1162  /// just the last commit.
1163  ///
1164  /// <p/>This operation is similar to <see cref="Rollback()" />,
1165  /// except that method can only rollback what's been done
1166  /// with the current instance of IndexWriter since its last
1167  /// commit, whereas this method can rollback to an
1168  /// arbitrary commit point from the past, assuming the
1169  /// <see cref="IndexDeletionPolicy" /> has preserved past
1170  /// commits.
1171  ///
1172  /// </summary>
1173  /// <param name="d">the index directory
1174  /// </param>
1175  /// <param name="a">the analyzer to use
1176  /// </param>
1177  /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
1178  /// </param>
1179  /// <param name="mfl">whether or not to limit field lengths, value is in number of terms/tokens. See <see cref="Lucene.Net.Index.IndexWriter.MaxFieldLength" />.
1180  /// </param>
1181  /// <param name="commit">which commit to open
1182  /// </param>
1183  /// <throws> CorruptIndexException if the index is corrupt </throws>
1184  /// <throws> LockObtainFailedException if another writer </throws>
1185  /// <summary> has this index open (<c>write.lock</c> could not
1186  /// be obtained)
1187  /// </summary>
1188  /// <throws> IOException if the directory cannot be read/written to, or </throws>
1189  /// <summary> if it does not exist and <c>create</c> is
1190  /// <c>false</c> or if there is any other low-level
1191  /// IO error
1192  /// </summary>
1194  {
1195  InitBlock();
1196  Init(d, a, false, deletionPolicy, mfl.Limit, null, commit);
1197  }
1198 
1199  private void Init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
1200  {
1201  if (IndexReader.IndexExists(d))
1202  {
1203  Init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit);
1204  }
1205  else
1206  {
1207  Init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit);
1208  }
1209  }
1210 
1211  private void Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
1212  {
1213  directory = d;
1214  analyzer = a;
1215  SetMessageID(defaultInfoStream);
1216  this.maxFieldLength = maxFieldLength;
1217 
1218  if (indexingChain == null)
1219  indexingChain = DocumentsWriter.DefaultIndexingChain;
1220 
1221  if (create)
1222  {
1223  // Clear the write lock in case it's leftover:
1224  directory.ClearLock(WRITE_LOCK_NAME);
1225  }
1226 
1227  Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME);
1228  if (!writeLock.Obtain(writeLockTimeout))
1229  // obtain write lock
1230  {
1231  throw new LockObtainFailedException("Index locked for write: " + writeLock);
1232  }
1233  this.writeLock = writeLock; // save it
1234 
1235  bool success = false;
1236  try
1237  {
1238  if (create)
1239  {
1240  // Try to read first. This is to allow create
1241  // against an index that's currently open for
1242  // searching. In this case we write the next
1243  // segments_N file with no segments:
1244  bool doCommit;
1245  try
1246  {
1247  segmentInfos.Read(directory);
1248  segmentInfos.Clear();
1249  doCommit = false;
1250  }
1251  catch (System.IO.IOException)
1252  {
1253  // Likely this means it's a fresh directory
1254  doCommit = true;
1255  }
1256 
1257  if (doCommit)
1258  {
1259  // Only commit if there is no segments file
1260  // in this dir already.
1261  segmentInfos.Commit(directory);
1262  synced.UnionWith(segmentInfos.Files(directory, true));
1263  }
1264  else
1265  {
1266  // Record that we have a change (zero out all
1267  // segments) pending:
1268  changeCount++;
1269  }
1270  }
1271  else
1272  {
1273  segmentInfos.Read(directory);
1274 
1275  if (commit != null)
1276  {
1277  // Swap out all segments, but, keep metadata in
1278  // SegmentInfos, like version & generation, to
1279  // preserve write-once. This is important if
1280  // readers are open against the future commit
1281  // points.
1282  if (commit.Directory != directory)
1283  throw new System.ArgumentException("IndexCommit's directory doesn't match my directory");
1284  SegmentInfos oldInfos = new SegmentInfos();
1285  oldInfos.Read(directory, commit.SegmentsFileName);
1286  segmentInfos.Replace(oldInfos);
1287  changeCount++;
1288  if (infoStream != null)
1289  Message("init: loaded commit \"" + commit.SegmentsFileName + "\"");
1290  }
1291 
1292  // We assume that this segments_N was previously
1293  // properly sync'd:
1294  synced.UnionWith(segmentInfos.Files(directory, true));
1295  }
1296 
1297  SetRollbackSegmentInfos(segmentInfos);
1298 
1299  docWriter = new DocumentsWriter(directory, this, indexingChain);
1300  docWriter.SetInfoStream(infoStream);
1301  docWriter.SetMaxFieldLength(maxFieldLength);
1302 
1303  // Default deleter (for backwards compatibility) is
1304  // KeepOnlyLastCommitDeleter:
1305  deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter, synced);
1306 
1307  if (deleter.startingCommitDeleted)
1308  // Deletion policy deleted the "head" commit point.
1309  // We have to mark ourself as changed so that if we
1310  // are closed w/o any further changes we write a new
1311  // segments_N file.
1312  changeCount++;
1313 
1314  PushMaxBufferedDocs();
1315 
1316  if (infoStream != null)
1317  {
1318  Message("init: create=" + create);
1319  MessageState();
1320  }
1321 
1322  success = true;
1323  }
1324  finally
1325  {
1326  if (!success)
1327  {
1328  if (infoStream != null)
1329  {
1330  Message("init: hit exception on init; releasing write lock");
1331  }
1332  try
1333  {
1334  writeLock.Release();
1335  }
1336  catch (Exception)
1337  {
1338  // don't mask the original exception
1339  }
1340  writeLock = null;
1341  }
1342  }
1343  }
1344 
1345  private void SetRollbackSegmentInfos(SegmentInfos infos)
1346  {
1347  lock (this)
1348  {
1349  rollbackSegmentInfos = (SegmentInfos) infos.Clone();
1350  System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory));
1351  rollbackSegments = new HashMap<SegmentInfo, int?>();
1352  int size = rollbackSegmentInfos.Count;
1353  for (int i = 0; i < size; i++)
1354  rollbackSegments[rollbackSegmentInfos.Info(i)] = i;
1355  }
1356  }
1357 
1358  /// <summary> Expert: set the merge policy used by this writer.</summary>
1359  public virtual void SetMergePolicy(MergePolicy mp)
1360  {
1361  EnsureOpen();
1362  if (mp == null)
1363  throw new System.NullReferenceException("MergePolicy must be non-null");
1364 
1365  if (mergePolicy != mp)
1366  mergePolicy.Close();
1367  mergePolicy = mp;
1368  PushMaxBufferedDocs();
1369  if (infoStream != null)
1370  {
1371  Message("setMergePolicy " + mp);
1372  }
1373  }
1374 
1375  /// <summary> Expert: returns the current MergePolicy in use by this writer.</summary>
1376  /// <seealso cref="SetMergePolicy">
1377  /// </seealso>
1378  public virtual MergePolicy MergePolicy
1379  {
1380  get
1381  {
1382  EnsureOpen();
1383  return mergePolicy;
1384  }
1385  }
1386 
1387  /// <summary> Expert: set the merge scheduler used by this writer.</summary>
1388  public virtual void SetMergeScheduler(MergeScheduler mergeScheduler)
1389  {
1390  lock (this)
1391  {
1392  EnsureOpen();
1393  if (mergeScheduler == null)
1394  throw new System.NullReferenceException("MergeScheduler must be non-null");
1395 
1396  if (this.mergeScheduler != mergeScheduler)
1397  {
1398  FinishMerges(true);
1399  this.mergeScheduler.Close();
1400  }
1401  this.mergeScheduler = mergeScheduler;
1402  if (infoStream != null)
1403  {
1404  Message("setMergeScheduler " + mergeScheduler);
1405  }
1406  }
1407  }
1408 
1409  /// <summary> Expert: returns the current MergePolicy in use by this
1410  /// writer.
1411  /// </summary>
1412  /// <seealso cref="SetMergePolicy">
1413  /// </seealso>
1414  public virtual MergeScheduler MergeScheduler
1415  {
1416  get
1417  {
1418  EnsureOpen();
1419  return mergeScheduler;
1420  }
1421  }
1422 
1423  /// <summary> <p/>Gets or sets the largest segment (measured by document
1424  /// count) that may be merged with other segments.
1425  /// <p/>
1426  /// Small values (e.g., less than 10,000) are best for
1427  /// interactive indexing, as this limits the length of
1428  /// pauses while indexing to a few seconds. Larger values
1429  /// are best for batched indexing and speedier
1430  /// searches.
1431  /// <p/>
1432  /// The default value is <see cref="int.MaxValue" />.
1433  /// <p/>
1434  /// Note that this method is a convenience method: it
1435  /// just calls mergePolicy.getMaxMergeDocs as long as
1436  /// mergePolicy is an instance of <see cref="LogMergePolicy" />.
1437  /// Otherwise an IllegalArgumentException is thrown.<p/>
1438  ///
1439  /// The default merge policy (<see cref="LogByteSizeMergePolicy" />)
1440  /// also allows you to set this
1441  /// limit by net size (in MB) of the segment, using
1442  /// <see cref="LogByteSizeMergePolicy.MaxMergeMB" />.<p/>
1443  /// </summary>
1444  /// <seealso cref="MaxMergeDocs">
1445  /// </seealso>
1446  public virtual int MaxMergeDocs
1447  {
1448  get { return LogMergePolicy.MaxMergeDocs; }
1449  set { LogMergePolicy.MaxMergeDocs = value; }
1450  }
1451 
1452  /// <summary> The maximum number of terms that will be indexed for a single field in a
1453  /// document. This limits the amount of memory required for indexing, so that
1454  /// collections with very large files will not crash the indexing process by
1455  /// running out of memory. This setting refers to the number of running terms,
1456  /// not to the number of different terms.<p/>
1457  /// <strong>Note:</strong> this silently truncates large documents, excluding from the
1458  /// index all terms that occur further in the document. If you know your source
1459  /// documents are large, be sure to set this value high enough to accomodate
1460  /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1461  /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
1462  /// By default, no more than <see cref="DEFAULT_MAX_FIELD_LENGTH" /> terms
1463  /// will be indexed for a field.
1464  /// </summary>
1465  public virtual void SetMaxFieldLength(int maxFieldLength)
1466  {
1467  EnsureOpen();
1468  this.maxFieldLength = maxFieldLength;
1469  docWriter.SetMaxFieldLength(maxFieldLength);
1470  if (infoStream != null)
1471  Message("setMaxFieldLength " + maxFieldLength);
1472  }
1473 
1474  /// <summary> Returns the maximum number of terms that will be
1475  /// indexed for a single field in a document.
1476  /// </summary>
1477  /// <seealso cref="SetMaxFieldLength">
1478  /// </seealso>
1479  [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
1480  public virtual int GetMaxFieldLength()
1481  {
1482  EnsureOpen();
1483  return maxFieldLength;
1484  }
1485 
1486  /// Gets or sets the termsIndexDivisor passed to any readers that
1487  /// IndexWriter opens, for example when applying deletes
1488  /// or creating a near-real-time reader in
1489  /// <see cref="GetReader()"/>. Default value is
1490  /// <see cref="IndexReader.DEFAULT_TERMS_INDEX_DIVISOR"/>.
1491  public int ReaderTermsIndexDivisor
1492  {
1493  get
1494  {
1495  EnsureOpen();
1496  return readerTermsIndexDivisor;
1497  }
1498  set
1499  {
1500  EnsureOpen();
1501  if (value <= 0)
1502  {
1503  throw new ArgumentException("divisor must be >= 1 (got " + value + ")");
1504  }
1505  readerTermsIndexDivisor = value;
1506  if (infoStream != null)
1507  {
1508  Message("setReaderTermsIndexDivisor " + readerTermsIndexDivisor);
1509  }
1510  }
1511  }
1512 
1513  /// <summary>Determines the minimal number of documents required
1514  /// before the buffered in-memory documents are flushed as
1515  /// a new Segment. Large values generally gives faster
1516  /// indexing.
1517  ///
1518  /// <p/>When this is set, the writer will flush every
1519  /// maxBufferedDocs added documents. Pass in <see cref="DISABLE_AUTO_FLUSH" />
1520  /// to prevent triggering a flush due
1521  /// to number of buffered documents. Note that if flushing
1522  /// by RAM usage is also enabled, then the flush will be
1523  /// triggered by whichever comes first.<p/>
1524  ///
1525  /// <p/>Disabled by default (writer flushes by RAM usage).<p/>
1526  ///
1527  /// </summary>
1528  /// <throws> IllegalArgumentException if maxBufferedDocs is </throws>
1529  /// <summary> enabled but smaller than 2, or it disables maxBufferedDocs
1530  /// when ramBufferSize is already disabled
1531  /// </summary>
1532  /// <seealso cref="SetRAMBufferSizeMB">
1533  /// </seealso>
1534  public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
1535  {
1536  EnsureOpen();
1537  if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
1538  throw new ArgumentException("maxBufferedDocs must at least be 2 when enabled");
1539 
1540  if (maxBufferedDocs == DISABLE_AUTO_FLUSH && (int)GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
1541  throw new ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
1542 
1543  docWriter.MaxBufferedDocs = maxBufferedDocs;
1544  PushMaxBufferedDocs();
1545  if (infoStream != null)
1546  Message("setMaxBufferedDocs " + maxBufferedDocs);
1547  }
1548 
1549  /// <summary> If we are flushing by doc count (not by RAM usage), and
1550  /// using LogDocMergePolicy then push maxBufferedDocs down
1551  /// as its minMergeDocs, to keep backwards compatibility.
1552  /// </summary>
1553  private void PushMaxBufferedDocs()
1554  {
1555  if (docWriter.MaxBufferedDocs != DISABLE_AUTO_FLUSH)
1556  {
1557  MergePolicy mp = mergePolicy;
1558  if (mp is LogDocMergePolicy)
1559  {
1560  LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
1561  int maxBufferedDocs = docWriter.MaxBufferedDocs;
1562  if (lmp.MinMergeDocs != maxBufferedDocs)
1563  {
1564  if (infoStream != null)
1565  Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
1566  lmp.MinMergeDocs = maxBufferedDocs;
1567  }
1568  }
1569  }
1570  }
1571 
1572  /// <summary> Returns the number of buffered added documents that will
1573  /// trigger a flush if enabled.
1574  /// </summary>
1575  /// <seealso cref="SetMaxBufferedDocs">
1576  /// </seealso>
1577  [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
1578  public virtual int GetMaxBufferedDocs()
1579  {
1580  EnsureOpen();
1581  return docWriter.MaxBufferedDocs;
1582  }
1583 
1584  /// <summary>Determines the amount of RAM that may be used for
1585  /// buffering added documents and deletions before they are
1586  /// flushed to the Directory. Generally for faster
1587  /// indexing performance it's best to flush by RAM usage
1588  /// instead of document count and use as large a RAM buffer
1589  /// as you can.
1590  ///
1591  /// <p/>When this is set, the writer will flush whenever
1592  /// buffered documents and deletions use this much RAM.
1593  /// Pass in <see cref="DISABLE_AUTO_FLUSH" /> to prevent
1594  /// triggering a flush due to RAM usage. Note that if
1595  /// flushing by document count is also enabled, then the
1596  /// flush will be triggered by whichever comes first.<p/>
1597  ///
1598  /// <p/> <b>NOTE</b>: the account of RAM usage for pending
1599  /// deletions is only approximate. Specifically, if you
1600  /// delete by Query, Lucene currently has no way to measure
1601  /// the RAM usage if individual Queries so the accounting
1602  /// will under-estimate and you should compensate by either
1603  /// calling commit() periodically yourself, or by using
1604  /// <see cref="SetMaxBufferedDeleteTerms" /> to flush by count
1605  /// instead of RAM usage (each buffered delete Query counts
1606  /// as one).
1607  ///
1608  /// <p/>
1609  /// <b>NOTE</b>: because IndexWriter uses <c>int</c>s when managing its
1610  /// internal storage, the absolute maximum value for this setting is somewhat
1611  /// less than 2048 MB. The precise limit depends on various factors, such as
1612  /// how large your documents are, how many fields have norms, etc., so it's
1613  /// best to set this value comfortably under 2048.
1614  /// <p/>
1615  ///
1616  /// <p/> The default value is <see cref="DEFAULT_RAM_BUFFER_SIZE_MB" />.<p/>
1617  ///
1618  /// </summary>
1619  /// <throws> IllegalArgumentException if ramBufferSize is </throws>
1620  /// <summary> enabled but non-positive, or it disables ramBufferSize
1621  /// when maxBufferedDocs is already disabled
1622  /// </summary>
1623  public virtual void SetRAMBufferSizeMB(double mb)
1624  {
1625  if (mb > 2048.0)
1626  {
1627  throw new System.ArgumentException("ramBufferSize " + mb + " is too large; should be comfortably less than 2048");
1628  }
1629  if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
1630  throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled");
1631  if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
1632  throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
1633  docWriter.SetRAMBufferSizeMB(mb);
1634  if (infoStream != null)
1635  Message("setRAMBufferSizeMB " + mb);
1636  }
1637 
1638  /// <summary> Returns the value set by <see cref="SetRAMBufferSizeMB" /> if enabled.</summary>
1639  [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
1640  public virtual double GetRAMBufferSizeMB()
1641  {
1642  return docWriter.GetRAMBufferSizeMB();
1643  }
1644 
1645  /// <summary> <p/>Determines the minimal number of delete terms required before the buffered
1646  /// in-memory delete terms are applied and flushed. If there are documents
1647  /// buffered in memory at the time, they are merged and a new segment is
1648  /// created.<p/>
1649  /// <p/>Disabled by default (writer flushes by RAM usage).<p/>
1650  ///
1651  /// </summary>
1652  /// <throws> IllegalArgumentException if maxBufferedDeleteTerms </throws>
1653  /// <summary> is enabled but smaller than 1
1654  /// </summary>
1655  /// <seealso cref="SetRAMBufferSizeMB">
1656  /// </seealso>
1657  public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)
1658  {
1659  EnsureOpen();
1660  if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1)
1661  throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled");
1662  docWriter.MaxBufferedDeleteTerms = maxBufferedDeleteTerms;
1663  if (infoStream != null)
1664  Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
1665  }
1666 
1667  /// <summary> Returns the number of buffered deleted terms that will
1668  /// trigger a flush if enabled.
1669  /// </summary>
1670  /// <seealso cref="SetMaxBufferedDeleteTerms">
1671  /// </seealso>
1672  [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
1673  public virtual int GetMaxBufferedDeleteTerms()
1674  {
1675  EnsureOpen();
1676  return docWriter.MaxBufferedDeleteTerms;
1677  }
1678 
1679  /// <summary>Gets or sets the number of segments that are merged at
1680  /// once and also controls the total number of segments
1681  /// allowed to accumulate in the index.
1682  /// <p/>Determines how often segment indices are merged by addDocument(). With
1683  /// smaller values, less RAM is used while indexing, and searches on
1684  /// unoptimized indices are faster, but indexing speed is slower. With larger
1685  /// values, more RAM is used during indexing, and while searches on unoptimized
1686  /// indices are slower, indexing is faster. Thus larger values (> 10) are best
1687  /// for batch index creation, and smaller values (&lt; 10) for indices that are
1688  /// interactively maintained.
1689  ///
1690  /// <p/>Note that this method is a convenience method: it
1691  /// just calls mergePolicy.setMergeFactor as long as
1692  /// mergePolicy is an instance of <see cref="LogMergePolicy" />.
1693  /// Otherwise an IllegalArgumentException is thrown.<p/>
1694  ///
1695  /// <p/>This must never be less than 2. The default value is 10.
1696  /// </summary>
1697  public virtual int MergeFactor
1698  {
1699  set { LogMergePolicy.MergeFactor = value; }
1700  get { return LogMergePolicy.MergeFactor; }
1701  }
1702 
1703  /// <summary>Gets or sets the default info stream.
1704  /// If non-null, this will be the default infoStream used
1705  /// by a newly instantiated IndexWriter.
1706  /// </summary>
1707  /// <seealso cref="SetInfoStream">
1708  /// </seealso>
1709  public static StreamWriter DefaultInfoStream
1710  {
1711  set { IndexWriter.defaultInfoStream = value; }
1712  get { return IndexWriter.defaultInfoStream; }
1713  }
1714 
1715  /// <summary>If non-null, information about merges, deletes and a
1716  /// message when maxFieldLength is reached will be printed
1717  /// to this.
1718  /// </summary>
1719  public virtual void SetInfoStream(System.IO.StreamWriter infoStream)
1720  {
1721  EnsureOpen();
1722  SetMessageID(infoStream);
1723  docWriter.SetInfoStream(infoStream);
1724  deleter.SetInfoStream(infoStream);
1725  if (infoStream != null)
1726  MessageState();
1727  }
1728 
1729  private void MessageState()
1730  {
1731  Message("setInfoStream: dir=" + directory +
1732  " mergePolicy=" + mergePolicy +
1733  " mergeScheduler=" + mergeScheduler +
1734  " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() +
1735  " maxBufferedDocs=" + docWriter.MaxBufferedDocs +
1736  " maxBuffereDeleteTerms=" + docWriter.MaxBufferedDeleteTerms +
1737  " maxFieldLength=" + maxFieldLength +
1738  " index=" + SegString());
1739  }
1740 
1741  /// <summary> Returns the current infoStream in use by this writer.</summary>
1742  /// <seealso cref="SetInfoStream">
1743  /// </seealso>
1744  public virtual StreamWriter InfoStream
1745  {
1746  get
1747  {
1748  EnsureOpen();
1749  return infoStream;
1750  }
1751  }
1752 
1753  /// <summary>Returns true if verbosing is enabled (i.e., infoStream != null). </summary>
1754  public virtual bool Verbose
1755  {
1756  get { return infoStream != null; }
1757  }
1758 
1759  /// <summary>Gets or sets allowed timeout when acquiring the write lock.</summary>
1760  public virtual long WriteLockTimeout
1761  {
1762  get
1763  {
1764  EnsureOpen();
1765  return writeLockTimeout;
1766  }
1767  set
1768  {
1769  EnsureOpen();
1770  this.writeLockTimeout = value;
1771  }
1772  }
1773 
1774  /// <summary> Gets or sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1775  /// milliseconds).
1776  /// </summary>
1777  public static long DefaultWriteLockTimeout
1778  {
1779  set { IndexWriter.WRITE_LOCK_TIMEOUT = value; }
1780  get { return IndexWriter.WRITE_LOCK_TIMEOUT; }
1781  }
1782 
1783  /// <summary> Commits all changes to an index and closes all
1784  /// associated files. Note that this may be a costly
1785  /// operation, so, try to re-use a single writer instead of
1786  /// closing and opening a new one. See <see cref="Commit()" /> for
1787  /// caveats about write caching done by some IO devices.
1788  ///
1789  /// <p/> If an Exception is hit during close, eg due to disk
1790  /// full or some other reason, then both the on-disk index
1791  /// and the internal state of the IndexWriter instance will
1792  /// be consistent. However, the close will not be complete
1793  /// even though part of it (flushing buffered documents)
1794  /// may have succeeded, so the write lock will still be
1795  /// held.<p/>
1796  ///
1797  /// <p/> If you can correct the underlying cause (eg free up
1798  /// some disk space) then you can call close() again.
1799  /// Failing that, if you want to force the write lock to be
1800  /// released (dangerous, because you may then lose buffered
1801  /// docs in the IndexWriter instance) then you can do
1802  /// something like this:<p/>
1803  ///
1804  /// <code>
1805  /// try {
1806  /// writer.close();
1807  /// } finally {
1808  /// if (IndexWriter.isLocked(directory)) {
1809  /// IndexWriter.unlock(directory);
1810  /// }
1811  /// }
1812  /// </code>
1813  ///
1814  /// after which, you must be certain not to use the writer
1815  /// instance anymore.<p/>
1816  ///
1817  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
1818  /// you should immediately close the writer, again. See <a
1819  /// href="#OOME">above</a> for details.<p/>
1820  ///
1821  /// </summary>
1822  /// <throws> CorruptIndexException if the index is corrupt </throws>
1823  /// <throws> IOException if there is a low-level IO error </throws>
1824  [Obsolete("Use Dispose() instead")]
1825  public void Close()
1826  {
1827  Dispose(true);
1828  }
1829 
1830  /// <summary> Commits all changes to an index and closes all
1831  /// associated files. Note that this may be a costly
1832  /// operation, so, try to re-use a single writer instead of
1833  /// closing and opening a new one. See <see cref="Commit()" /> for
1834  /// caveats about write caching done by some IO devices.
1835  ///
1836  /// <p/> If an Exception is hit during close, eg due to disk
1837  /// full or some other reason, then both the on-disk index
1838  /// and the internal state of the IndexWriter instance will
1839  /// be consistent. However, the close will not be complete
1840  /// even though part of it (flushing buffered documents)
1841  /// may have succeeded, so the write lock will still be
1842  /// held.<p/>
1843  ///
1844  /// <p/> If you can correct the underlying cause (eg free up
1845  /// some disk space) then you can call close() again.
1846  /// Failing that, if you want to force the write lock to be
1847  /// released (dangerous, because you may then lose buffered
1848  /// docs in the IndexWriter instance) then you can do
1849  /// something like this:<p/>
1850  ///
1851  /// <code>
1852  /// try {
1853  /// writer.close();
1854  /// } finally {
1855  /// if (IndexWriter.isLocked(directory)) {
1856  /// IndexWriter.unlock(directory);
1857  /// }
1858  /// }
1859  /// </code>
1860  ///
1861  /// after which, you must be certain not to use the writer
1862  /// instance anymore.<p/>
1863  ///
1864  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
1865  /// you should immediately close the writer, again. See <a
1866  /// href="#OOME">above</a> for details.<p/>
1867  ///
1868  /// </summary>
1869  /// <throws> CorruptIndexException if the index is corrupt </throws>
1870  /// <throws> IOException if there is a low-level IO error </throws>
1871  public virtual void Dispose()
1872  {
1873  Dispose(true);
1874  }
1875 
1876  /// <summary> Closes the index with or without waiting for currently
1877  /// running merges to finish. This is only meaningful when
1878  /// using a MergeScheduler that runs merges in background
1879  /// threads.
1880  ///
1881  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
1882  /// you should immediately close the writer, again. See <a
1883  /// href="#OOME">above</a> for details.<p/>
1884  ///
1885  /// <p/><b>NOTE</b>: it is dangerous to always call
1886  /// close(false), especially when IndexWriter is not open
1887  /// for very long, because this can result in "merge
1888  /// starvation" whereby long merges will never have a
1889  /// chance to finish. This will cause too many segments in
1890  /// your index over time.<p/>
1891  ///
1892  /// </summary>
1893  /// <param name="waitForMerges">if true, this call will block
1894  /// until all merges complete; else, it will ask all
1895  /// running merges to abort, wait until those merges have
1896  /// finished (which should be at most a few seconds), and
1897  /// then return.
1898  /// </param>
1899  public virtual void Dispose(bool waitForMerges)
1900  {
1901  Dispose(true, waitForMerges);
1902  }
1903 
1904  protected virtual void Dispose(bool disposing, bool waitForMerges)
1905  {
1906  if (disposing)
1907  {
1908  // Ensure that only one thread actually gets to do the closing:
1909  if (ShouldClose())
1910  {
1911  // If any methods have hit OutOfMemoryError, then abort
1912  // on close, in case the internal state of IndexWriter
1913  // or DocumentsWriter is corrupt
1914  if (hitOOM)
1915  RollbackInternal();
1916  else
1917  CloseInternal(waitForMerges);
1918  }
1919  }
1920  }
1921 
1922  /// <summary> Closes the index with or without waiting for currently
1923  /// running merges to finish. This is only meaningful when
1924  /// using a MergeScheduler that runs merges in background
1925  /// threads.
1926  ///
1927  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
1928  /// you should immediately close the writer, again. See <a
1929  /// href="#OOME">above</a> for details.<p/>
1930  ///
1931  /// <p/><b>NOTE</b>: it is dangerous to always call
1932  /// close(false), especially when IndexWriter is not open
1933  /// for very long, because this can result in "merge
1934  /// starvation" whereby long merges will never have a
1935  /// chance to finish. This will cause too many segments in
1936  /// your index over time.<p/>
1937  ///
1938  /// </summary>
1939  /// <param name="waitForMerges">if true, this call will block
1940  /// until all merges complete; else, it will ask all
1941  /// running merges to abort, wait until those merges have
1942  /// finished (which should be at most a few seconds), and
1943  /// then return.
1944  /// </param>
1945  [Obsolete("Use Dispose(bool) instead")]
1946  public virtual void Close(bool waitForMerges)
1947  {
1948  Dispose(waitForMerges);
1949  }
1950 
1951  // Returns true if this thread should attempt to close, or
1952  // false if IndexWriter is now closed; else, waits until
1953  // another thread finishes closing
1954  private bool ShouldClose()
1955  {
1956  lock (this)
1957  {
1958  while (true)
1959  {
1960  if (!closed)
1961  {
1962  if (!closing)
1963  {
1964  closing = true;
1965  return true;
1966  }
1967  else
1968  {
1969  // Another thread is presently trying to close;
1970  // wait until it finishes one way (closes
1971  // successfully) or another (fails to close)
1972  DoWait();
1973  }
1974  }
1975  else
1976  return false;
1977  }
1978  }
1979  }
1980 
1981  private void CloseInternal(bool waitForMerges)
1982  {
1983 
1984  docWriter.PauseAllThreads();
1985 
1986  try
1987  {
1988  if (infoStream != null)
1989  Message("now flush at close");
1990 
1991  docWriter.Dispose();
1992 
1993  // Only allow a new merge to be triggered if we are
1994  // going to wait for merges:
1995  if (!hitOOM)
1996  {
1997  Flush(waitForMerges, true, true);
1998  }
1999 
2000  if (waitForMerges)
2001  // Give merge scheduler last chance to run, in case
2002  // any pending merges are waiting:
2003  mergeScheduler.Merge(this);
2004 
2005  mergePolicy.Close();
2006 
2007  FinishMerges(waitForMerges);
2008  stopMerges = true;
2009 
2010  mergeScheduler.Close();
2011 
2012  if (infoStream != null)
2013  Message("now call final commit()");
2014 
2015  if (!hitOOM)
2016  {
2017  Commit(0);
2018  }
2019 
2020  if (infoStream != null)
2021  Message("at close: " + SegString());
2022 
2023  lock (this)
2024  {
2025  readerPool.Dispose();
2026  docWriter = null;
2027  deleter.Dispose();
2028  }
2029 
2030  if (writeLock != null)
2031  {
2032  writeLock.Release(); // release write lock
2033  writeLock = null;
2034  }
2035  lock (this)
2036  {
2037  closed = true;
2038  }
2039  }
2040  catch (System.OutOfMemoryException oom)
2041  {
2042  HandleOOM(oom, "closeInternal");
2043  }
2044  finally
2045  {
2046  lock (this)
2047  {
2048  closing = false;
2049  System.Threading.Monitor.PulseAll(this);
2050  if (!closed)
2051  {
2052  if (docWriter != null)
2053  docWriter.ResumeAllThreads();
2054  if (infoStream != null)
2055  Message("hit exception while closing");
2056  }
2057  }
2058  }
2059  }
2060 
2061  /// <summary>Tells the docWriter to close its currently open shared
2062  /// doc stores (stored fields &amp; vectors files).
2063  /// Return value specifices whether new doc store files are compound or not.
2064  /// </summary>
2065  private bool FlushDocStores()
2066  {
2067  lock (this)
2068  {
2069  if (infoStream != null)
2070  {
2071  Message("flushDocStores segment=" + docWriter.DocStoreSegment);
2072  }
2073 
2074  bool useCompoundDocStore = false;
2075  if (infoStream != null)
2076  {
2077  Message("closeDocStores segment=" + docWriter.DocStoreSegment);
2078  }
2079 
2080  System.String docStoreSegment;
2081 
2082  bool success = false;
2083  try
2084  {
2085  docStoreSegment = docWriter.CloseDocStore();
2086  success = true;
2087  }
2088  finally
2089  {
2090  if (!success && infoStream != null)
2091  {
2092  Message("hit exception closing doc store segment");
2093  }
2094  }
2095 
2096  if (infoStream != null)
2097  {
2098  Message("flushDocStores files=" + docWriter.ClosedFiles());
2099  }
2100 
2101  useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos);
2102 
2103  if (useCompoundDocStore && docStoreSegment != null && docWriter.ClosedFiles().Count != 0)
2104  {
2105  // Now build compound doc store file
2106 
2107  if (infoStream != null)
2108  {
2109  Message("create compound file " + docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
2110  }
2111 
2112  success = false;
2113 
2114  int numSegments = segmentInfos.Count;
2115  System.String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
2116 
2117  try
2118  {
2119  CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
2120  foreach(string file in docWriter.closedFiles)
2121  {
2122  cfsWriter.AddFile(file);
2123  }
2124 
2125  // Perform the merge
2126  cfsWriter.Close();
2127  success = true;
2128  }
2129  finally
2130  {
2131  if (!success)
2132  {
2133  if (infoStream != null)
2134  Message("hit exception building compound file doc store for segment " + docStoreSegment);
2135  deleter.DeleteFile(compoundFileName);
2136  docWriter.Abort();
2137  }
2138  }
2139 
2140  for (int i = 0; i < numSegments; i++)
2141  {
2142  SegmentInfo si = segmentInfos.Info(i);
2143  if (si.DocStoreOffset != - 1 && si.DocStoreSegment.Equals(docStoreSegment))
2144  si.DocStoreIsCompoundFile = true;
2145  }
2146 
2147  Checkpoint();
2148 
2149  // In case the files we just merged into a CFS were
2150  // not previously checkpointed:
2151  deleter.DeleteNewFiles(docWriter.ClosedFiles());
2152  }
2153 
2154  return useCompoundDocStore;
2155  }
2156  }
2157 
2158  /// <summary>Returns the Directory used by this index. </summary>
2159  public virtual Directory Directory
2160  {
2161  get
2162  {
2163  // Pass false because the flush during closing calls getDirectory
2164  EnsureOpen(false);
2165  return directory;
2166  }
2167  }
2168 
2169  /// <summary>Returns the analyzer used by this index. </summary>
2170  public virtual Analyzer Analyzer
2171  {
2172  get
2173  {
2174  EnsureOpen();
2175  return analyzer;
2176  }
2177  }
2178 
2179  /// <summary>Returns total number of docs in this index, including
2180  /// docs not yet flushed (still in the RAM buffer),
2181  /// not counting deletions.
2182  /// </summary>
2183  /// <seealso cref="NumDocs">
2184  /// </seealso>
2185  public virtual int MaxDoc()
2186  {
2187  lock (this)
2188  {
2189  int count;
2190  if (docWriter != null)
2191  count = docWriter.NumDocsInRAM;
2192  else
2193  count = 0;
2194 
2195  for (int i = 0; i < segmentInfos.Count; i++)
2196  count += segmentInfos.Info(i).docCount;
2197  return count;
2198  }
2199  }
2200 
2201  /// <summary>Returns total number of docs in this index, including
2202  /// docs not yet flushed (still in the RAM buffer), and
2203  /// including deletions. <b>NOTE:</b> buffered deletions
2204  /// are not counted. If you really need these to be
2205  /// counted you should call <see cref="Commit()" /> first.
2206  /// </summary>
2207  /// <seealso cref="NumDocs">
2208  /// </seealso>
2209  public virtual int NumDocs()
2210  {
2211  lock (this)
2212  {
2213  int count;
2214  if (docWriter != null)
2215  count = docWriter.NumDocsInRAM;
2216  else
2217  count = 0;
2218 
2219  for (int i = 0; i < segmentInfos.Count; i++)
2220  {
2221  SegmentInfo info = segmentInfos.Info(i);
2222  count += info.docCount - info.GetDelCount();
2223  }
2224  return count;
2225  }
2226  }
2227 
2228  public virtual bool HasDeletions()
2229  {
2230  lock (this)
2231  {
2232  EnsureOpen();
2233  if (docWriter.HasDeletes())
2234  return true;
2235  for (int i = 0; i < segmentInfos.Count; i++)
2236  if (segmentInfos.Info(i).HasDeletions())
2237  return true;
2238  return false;
2239  }
2240  }
2241 
2242  /// <summary> The maximum number of terms that will be indexed for a single field in a
2243  /// document. This limits the amount of memory required for indexing, so that
2244  /// collections with very large files will not crash the indexing process by
2245  /// running out of memory.<p/>
2246  /// Note that this effectively truncates large documents, excluding from the
2247  /// index terms that occur further in the document. If you know your source
2248  /// documents are large, be sure to set this value high enough to accomodate
2249  /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
2250  /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
2251  /// By default, no more than 10,000 terms will be indexed for a field.
2252  ///
2253  /// </summary>
2254  /// <seealso cref="MaxFieldLength">
2255  /// </seealso>
2256  private int maxFieldLength;
2257 
2258  /// <summary> Adds a document to this index. If the document contains more than
2259  /// <see cref="SetMaxFieldLength(int)" /> terms for a given field, the remainder are
2260  /// discarded.
2261  ///
2262  /// <p/> Note that if an Exception is hit (for example disk full)
2263  /// then the index will be consistent, but this document
2264  /// may not have been added. Furthermore, it's possible
2265  /// the index will have one segment in non-compound format
2266  /// even when using compound files (when a merge has
2267  /// partially succeeded).<p/>
2268  ///
2269  /// <p/> This method periodically flushes pending documents
2270  /// to the Directory (see <a href="#flush">above</a>), and
2271  /// also periodically triggers segment merges in the index
2272  /// according to the <see cref="MergePolicy" /> in use.<p/>
2273  ///
2274  /// <p/>Merges temporarily consume space in the
2275  /// directory. The amount of space required is up to 1X the
2276  /// size of all segments being merged, when no
2277  /// readers/searchers are open against the index, and up to
2278  /// 2X the size of all segments being merged when
2279  /// readers/searchers are open against the index (see
2280  /// <see cref="Optimize()" /> for details). The sequence of
2281  /// primitive merge operations performed is governed by the
2282  /// merge policy.
2283  ///
2284  /// <p/>Note that each term in the document can be no longer
2285  /// than 16383 characters, otherwise an
2286  /// IllegalArgumentException will be thrown.<p/>
2287  ///
2288  /// <p/>Note that it's possible to create an invalid Unicode
2289  /// string in java if a UTF16 surrogate pair is malformed.
2290  /// In this case, the invalid characters are silently
2291  /// replaced with the Unicode replacement character
2292  /// U+FFFD.<p/>
2293  ///
2294  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2295  /// you should immediately close the writer. See <a
2296  /// href="#OOME">above</a> for details.<p/>
2297  ///
2298  /// </summary>
2299  /// <throws> CorruptIndexException if the index is corrupt </throws>
2300  /// <throws> IOException if there is a low-level IO error </throws>
2301  public virtual void AddDocument(Document doc)
2302  {
2303  AddDocument(doc, analyzer);
2304  }
2305 
2306  /// <summary> Adds a document to this index, using the provided analyzer instead of the
2307  /// value of <see cref="Analyzer" />. If the document contains more than
2308  /// <see cref="SetMaxFieldLength(int)" /> terms for a given field, the remainder are
2309  /// discarded.
2310  ///
2311  /// <p/>See <see cref="AddDocument(Document)" /> for details on
2312  /// index and IndexWriter state after an Exception, and
2313  /// flushing/merging temporary free space requirements.<p/>
2314  ///
2315  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2316  /// you should immediately close the writer. See <a
2317  /// href="#OOME">above</a> for details.<p/>
2318  ///
2319  /// </summary>
2320  /// <throws> CorruptIndexException if the index is corrupt </throws>
2321  /// <throws> IOException if there is a low-level IO error </throws>
2322  public virtual void AddDocument(Document doc, Analyzer analyzer)
2323  {
2324  EnsureOpen();
2325  bool doFlush = false;
2326  bool success = false;
2327  try
2328  {
2329  try
2330  {
2331  doFlush = docWriter.AddDocument(doc, analyzer);
2332  success = true;
2333  }
2334  finally
2335  {
2336  if (!success)
2337  {
2338 
2339  if (infoStream != null)
2340  Message("hit exception adding document");
2341 
2342  lock (this)
2343  {
2344  // If docWriter has some aborted files that were
2345  // never incref'd, then we clean them up here
2346  if (docWriter != null)
2347  {
2348  ICollection<string> files = docWriter.AbortedFiles();
2349  if (files != null)
2350  deleter.DeleteNewFiles(files);
2351  }
2352  }
2353  }
2354  }
2355  if (doFlush)
2356  Flush(true, false, false);
2357  }
2358  catch (System.OutOfMemoryException oom)
2359  {
2360  HandleOOM(oom, "addDocument");
2361  }
2362  }
2363 
2364  /// <summary> Deletes the document(s) containing <c>term</c>.
2365  ///
2366  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2367  /// you should immediately close the writer. See <a
2368  /// href="#OOME">above</a> for details.<p/>
2369  ///
2370  /// </summary>
2371  /// <param name="term">the term to identify the documents to be deleted
2372  /// </param>
2373  /// <throws> CorruptIndexException if the index is corrupt </throws>
2374  /// <throws> IOException if there is a low-level IO error </throws>
2375  public virtual void DeleteDocuments(Term term)
2376  {
2377  EnsureOpen();
2378  try
2379  {
2380  bool doFlush = docWriter.BufferDeleteTerm(term);
2381  if (doFlush)
2382  Flush(true, false, false);
2383  }
2384  catch (System.OutOfMemoryException oom)
2385  {
2386  HandleOOM(oom, "deleteDocuments(Term)");
2387  }
2388  }
2389 
2390  /// <summary> Deletes the document(s) containing any of the
2391  /// terms. All deletes are flushed at the same time.
2392  ///
2393  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2394  /// you should immediately close the writer. See <a
2395  /// href="#OOME">above</a> for details.<p/>
2396  ///
2397  /// </summary>
2398  /// <param name="terms">array of terms to identify the documents
2399  /// to be deleted
2400  /// </param>
2401  /// <throws> CorruptIndexException if the index is corrupt </throws>
2402  /// <throws> IOException if there is a low-level IO error </throws>
2403  public virtual void DeleteDocuments(params Term[] terms)
2404  {
2405  EnsureOpen();
2406  try
2407  {
2408  bool doFlush = docWriter.BufferDeleteTerms(terms);
2409  if (doFlush)
2410  Flush(true, false, false);
2411  }
2412  catch (System.OutOfMemoryException oom)
2413  {
2414  HandleOOM(oom, "deleteDocuments(params Term[])");
2415  }
2416  }
2417 
2418  /// <summary> Deletes the document(s) matching the provided query.
2419  ///
2420  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2421  /// you should immediately close the writer. See <a
2422  /// href="#OOME">above</a> for details.<p/>
2423  ///
2424  /// </summary>
2425  /// <param name="query">the query to identify the documents to be deleted
2426  /// </param>
2427  /// <throws> CorruptIndexException if the index is corrupt </throws>
2428  /// <throws> IOException if there is a low-level IO error </throws>
2429  public virtual void DeleteDocuments(Query query)
2430  {
2431  EnsureOpen();
2432  bool doFlush = docWriter.BufferDeleteQuery(query);
2433  if (doFlush)
2434  Flush(true, false, false);
2435  }
2436 
2437  /// <summary> Deletes the document(s) matching any of the provided queries.
2438  /// All deletes are flushed at the same time.
2439  ///
2440  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2441  /// you should immediately close the writer. See <a
2442  /// href="#OOME">above</a> for details.<p/>
2443  ///
2444  /// </summary>
2445  /// <param name="queries">array of queries to identify the documents
2446  /// to be deleted
2447  /// </param>
2448  /// <throws> CorruptIndexException if the index is corrupt </throws>
2449  /// <throws> IOException if there is a low-level IO error </throws>
2450  public virtual void DeleteDocuments(params Query[] queries)
2451  {
2452  EnsureOpen();
2453  bool doFlush = docWriter.BufferDeleteQueries(queries);
2454  if (doFlush)
2455  Flush(true, false, false);
2456  }
2457 
2458  /// <summary> Updates a document by first deleting the document(s)
2459  /// containing <c>term</c> and then adding the new
2460  /// document. The delete and then add are atomic as seen
2461  /// by a reader on the same index (flush may happen only after
2462  /// the add).
2463  ///
2464  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2465  /// you should immediately close the writer. See <a
2466  /// href="#OOME">above</a> for details.<p/>
2467  ///
2468  /// </summary>
2469  /// <param name="term">the term to identify the document(s) to be
2470  /// deleted
2471  /// </param>
2472  /// <param name="doc">the document to be added
2473  /// </param>
2474  /// <throws> CorruptIndexException if the index is corrupt </throws>
2475  /// <throws> IOException if there is a low-level IO error </throws>
2476  public virtual void UpdateDocument(Term term, Document doc)
2477  {
2478  EnsureOpen();
2479  UpdateDocument(term, doc, Analyzer);
2480  }
2481 
2482  /// <summary> Updates a document by first deleting the document(s)
2483  /// containing <c>term</c> and then adding the new
2484  /// document. The delete and then add are atomic as seen
2485  /// by a reader on the same index (flush may happen only after
2486  /// the add).
2487  ///
2488  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2489  /// you should immediately close the writer. See <a
2490  /// href="#OOME">above</a> for details.<p/>
2491  ///
2492  /// </summary>
2493  /// <param name="term">the term to identify the document(s) to be
2494  /// deleted
2495  /// </param>
2496  /// <param name="doc">the document to be added
2497  /// </param>
2498  /// <param name="analyzer">the analyzer to use when analyzing the document
2499  /// </param>
2500  /// <throws> CorruptIndexException if the index is corrupt </throws>
2501  /// <throws> IOException if there is a low-level IO error </throws>
2502  public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer)
2503  {
2504  EnsureOpen();
2505  try
2506  {
2507  bool doFlush = false;
2508  bool success = false;
2509  try
2510  {
2511  doFlush = docWriter.UpdateDocument(term, doc, analyzer);
2512  success = true;
2513  }
2514  finally
2515  {
2516  if (!success)
2517  {
2518 
2519  if (infoStream != null)
2520  Message("hit exception updating document");
2521 
2522  lock (this)
2523  {
2524  // If docWriter has some aborted files that were
2525  // never incref'd, then we clean them up here
2526  ICollection<string> files = docWriter.AbortedFiles();
2527  if (files != null)
2528  deleter.DeleteNewFiles(files);
2529  }
2530  }
2531  }
2532  if (doFlush)
2533  Flush(true, false, false);
2534  }
2535  catch (System.OutOfMemoryException oom)
2536  {
2537  HandleOOM(oom, "updateDocument");
2538  }
2539  }
2540 
2541  // for test purpose
2542  internal int GetSegmentCount()
2543  {
2544  lock (this)
2545  {
2546  return segmentInfos.Count;
2547  }
2548  }
2549 
2550  // for test purpose
2551  internal int GetNumBufferedDocuments()
2552  {
2553  lock (this)
2554  {
2555  return docWriter.NumDocsInRAM;
2556  }
2557  }
2558 
2559  // for test purpose
2560  public /*internal*/ int GetDocCount(int i)
2561  {
2562  lock (this)
2563  {
2564  if (i >= 0 && i < segmentInfos.Count)
2565  {
2566  return segmentInfos.Info(i).docCount;
2567  }
2568  else
2569  {
2570  return - 1;
2571  }
2572  }
2573  }
2574 
2575  // for test purpose
2576  internal int GetFlushCount()
2577  {
2578  lock (this)
2579  {
2580  return flushCount;
2581  }
2582  }
2583 
2584  // for test purpose
2585  internal int GetFlushDeletesCount()
2586  {
2587  lock (this)
2588  {
2589  return flushDeletesCount;
2590  }
2591  }
2592 
2593  internal System.String NewSegmentName()
2594  {
2595  // Cannot synchronize on IndexWriter because that causes
2596  // deadlock
2597  lock (segmentInfos)
2598  {
2599  // Important to increment changeCount so that the
2600  // segmentInfos is written on close. Otherwise we
2601  // could close, re-open and re-return the same segment
2602  // name that was previously returned which can cause
2603  // problems at least with ConcurrentMergeScheduler.
2604  changeCount++;
2605  return "_" + Number.ToString(segmentInfos.counter++);
2606  }
2607  }
2608 
2609  /// <summary>If non-null, information about merges will be printed to this.</summary>
2610  private System.IO.StreamWriter infoStream = null;
2611  private static System.IO.StreamWriter defaultInfoStream = null;
2612 
2613  /// <summary> Requests an "optimize" operation on an index, priming the index
2614  /// for the fastest available search. Traditionally this has meant
2615  /// merging all segments into a single segment as is done in the
2616  /// default merge policy, but individaul merge policies may implement
2617  /// optimize in different ways.
2618  ///
2619  /// <p/>It is recommended that this method be called upon completion of indexing. In
2620  /// environments with frequent updates, optimize is best done during low volume times, if at all.
2621  ///
2622  /// <p/>
2623  /// <p/>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. <p/>
2624  ///
2625  /// <p/>Note that optimize requires 2X the index size free
2626  /// space in your Directory (3X if you're using compound
2627  /// file format). For example, if your index
2628  /// size is 10 MB then you need 20 MB free for optimize to
2629  /// complete (30 MB if you're using compound fiel format).<p/>
2630  ///
2631  /// <p/>If some but not all readers re-open while an
2632  /// optimize is underway, this will cause > 2X temporary
2633  /// space to be consumed as those new readers will then
2634  /// hold open the partially optimized segments at that
2635  /// time. It is best not to re-open readers while optimize
2636  /// is running.<p/>
2637  ///
2638  /// <p/>The actual temporary usage could be much less than
2639  /// these figures (it depends on many factors).<p/>
2640  ///
2641  /// <p/>In general, once the optimize completes, the total size of the
2642  /// index will be less than the size of the starting index.
2643  /// It could be quite a bit smaller (if there were many
2644  /// pending deletes) or just slightly smaller.<p/>
2645  ///
2646  /// <p/>If an Exception is hit during optimize(), for example
2647  /// due to disk full, the index will not be corrupt and no
2648  /// documents will have been lost. However, it may have
2649  /// been partially optimized (some segments were merged but
2650  /// not all), and it's possible that one of the segments in
2651  /// the index will be in non-compound format even when
2652  /// using compound file format. This will occur when the
2653  /// Exception is hit during conversion of the segment into
2654  /// compound format.<p/>
2655  ///
2656  /// <p/>This call will optimize those segments present in
2657  /// the index when the call started. If other threads are
2658  /// still adding documents and flushing segments, those
2659  /// newly created segments will not be optimized unless you
2660  /// call optimize again.<p/>
2661  ///
2662  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2663  /// you should immediately close the writer. See <a
2664  /// href="#OOME">above</a> for details.<p/>
2665  ///
2666  /// </summary>
2667  /// <throws> CorruptIndexException if the index is corrupt </throws>
2668  /// <throws> IOException if there is a low-level IO error </throws>
2669  /// <seealso cref="Index.LogMergePolicy.FindMergesForOptimize">
2670  /// </seealso>
2671  public virtual void Optimize()
2672  {
2673  Optimize(true);
2674  }
2675 
2676  /// <summary> Optimize the index down to &lt;= maxNumSegments. If
2677  /// maxNumSegments==1 then this is the same as <see cref="Optimize()" />
2678  ///.
2679  ///
2680  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2681  /// you should immediately close the writer. See <a
2682  /// href="#OOME">above</a> for details.<p/>
2683  ///
2684  /// </summary>
2685  /// <param name="maxNumSegments">maximum number of segments left
2686  /// in the index after optimization finishes
2687  /// </param>
2688  public virtual void Optimize(int maxNumSegments)
2689  {
2690  Optimize(maxNumSegments, true);
2691  }
2692 
2693  /// <summary>Just like <see cref="Optimize()" />, except you can specify
2694  /// whether the call should block until the optimize
2695  /// completes. This is only meaningful with a
2696  /// <see cref="MergeScheduler" /> that is able to run merges in
2697  /// background threads.
2698  ///
2699  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2700  /// you should immediately close the writer. See <a
2701  /// href="#OOME">above</a> for details.<p/>
2702  /// </summary>
2703  public virtual void Optimize(bool doWait)
2704  {
2705  Optimize(1, doWait);
2706  }
2707 
2708  /// <summary>Just like <see cref="Optimize(int)" />, except you can
2709  /// specify whether the call should block until the
2710  /// optimize completes. This is only meaningful with a
2711  /// <see cref="MergeScheduler" /> that is able to run merges in
2712  /// background threads.
2713  ///
2714  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2715  /// you should immediately close the writer. See <a
2716  /// href="#OOME">above</a> for details.<p/>
2717  /// </summary>
2718  public virtual void Optimize(int maxNumSegments, bool doWait)
2719  {
2720  EnsureOpen();
2721 
2722  if (maxNumSegments < 1)
2723  throw new System.ArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
2724 
2725  if (infoStream != null)
2726  Message("optimize: index now " + SegString());
2727 
2728  Flush(true, false, true);
2729 
2730  lock (this)
2731  {
2732  ResetMergeExceptions();
2733  segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<SegmentInfo>();
2734  optimizeMaxNumSegments = maxNumSegments;
2735  int numSegments = segmentInfos.Count;
2736  for (int i = 0; i < numSegments; i++)
2737  segmentsToOptimize.Add(segmentInfos.Info(i));
2738 
2739  // Now mark all pending & running merges as optimize
2740  // merge:
2741  foreach(MergePolicy.OneMerge merge in pendingMerges)
2742  {
2743  merge.optimize = true;
2744  merge.maxNumSegmentsOptimize = maxNumSegments;
2745  }
2746 
2747  foreach(MergePolicy.OneMerge merge in runningMerges)
2748  {
2749  merge.optimize = true;
2750  merge.maxNumSegmentsOptimize = maxNumSegments;
2751  }
2752  }
2753 
2754  MaybeMerge(maxNumSegments, true);
2755 
2756  if (doWait)
2757  {
2758  lock (this)
2759  {
2760  while (true)
2761  {
2762 
2763  if (hitOOM)
2764  {
2765  throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete optimize");
2766  }
2767 
2768  if (mergeExceptions.Count > 0)
2769  {
2770  // Forward any exceptions in background merge
2771  // threads to the current thread:
2772  int size = mergeExceptions.Count;
2773  for (int i = 0; i < size; i++)
2774  {
2775  MergePolicy.OneMerge merge = mergeExceptions[i];
2776  if (merge.optimize)
2777  {
2778  System.IO.IOException err;
2779  System.Exception t = merge.GetException();
2780  if (t != null)
2781  err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t);
2782  else
2783  err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory));
2784  throw err;
2785  }
2786  }
2787  }
2788 
2789  if (OptimizeMergesPending())
2790  DoWait();
2791  else
2792  break;
2793  }
2794  }
2795 
2796  // If close is called while we are still
2797  // running, throw an exception so the calling
2798  // thread will know the optimize did not
2799  // complete
2800  EnsureOpen();
2801  }
2802 
2803  // NOTE: in the ConcurrentMergeScheduler case, when
2804  // doWait is false, we can return immediately while
2805  // background threads accomplish the optimization
2806  }
2807 
2808  /// <summary>Returns true if any merges in pendingMerges or
2809  /// runningMerges are optimization merges.
2810  /// </summary>
2811  private bool OptimizeMergesPending()
2812  {
2813  lock (this)
2814  {
2815  foreach (MergePolicy.OneMerge merge in pendingMerges)
2816  {
2817  if (merge.optimize) return true;
2818  }
2819 
2820  foreach(MergePolicy.OneMerge merge in runningMerges)
2821  {
2822  if (merge.optimize) return true;
2823  }
2824 
2825  return false;
2826  }
2827  }
2828 
2829  /// <summary>Just like <see cref="ExpungeDeletes()" />, except you can
2830  /// specify whether the call should block until the
2831  /// operation completes. This is only meaningful with a
2832  /// <see cref="MergeScheduler" /> that is able to run merges in
2833  /// background threads.
2834  ///
2835  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2836  /// you should immediately close the writer. See <a
2837  /// href="#OOME">above</a> for details.<p/>
2838  /// </summary>
2839  public virtual void ExpungeDeletes(bool doWait)
2840  {
2841  EnsureOpen();
2842 
2843  if (infoStream != null)
2844  Message("expungeDeletes: index now " + SegString());
2845 
2846  MergePolicy.MergeSpecification spec;
2847 
2848  lock (this)
2849  {
2850  spec = mergePolicy.FindMergesToExpungeDeletes(segmentInfos);
2851  if (spec != null)
2852  {
2853  int numMerges = spec.merges.Count;
2854  for (int i = 0; i < numMerges; i++)
2855  RegisterMerge(spec.merges[i]);
2856  }
2857  }
2858 
2859  mergeScheduler.Merge(this);
2860 
2861  if (spec != null && doWait)
2862  {
2863  int numMerges = spec.merges.Count;
2864  lock (this)
2865  {
2866  bool running = true;
2867  while (running)
2868  {
2869 
2870  if (hitOOM)
2871  {
2872  throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete expungeDeletes");
2873  }
2874 
2875  // Check each merge that MergePolicy asked us to
2876  // do, to see if any of them are still running and
2877  // if any of them have hit an exception.
2878  running = false;
2879  for (int i = 0; i < numMerges; i++)
2880  {
2881  MergePolicy.OneMerge merge = spec.merges[i];
2882  if (pendingMerges.Contains(merge) || runningMerges.Contains(merge))
2883  running = true;
2884  System.Exception t = merge.GetException();
2885  if (t != null)
2886  {
2887  System.IO.IOException ioe = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t);
2888  throw ioe;
2889  }
2890  }
2891 
2892  // If any of our merges are still running, wait:
2893  if (running)
2894  DoWait();
2895  }
2896  }
2897  }
2898 
2899  // NOTE: in the ConcurrentMergeScheduler case, when
2900  // doWait is false, we can return immediately while
2901  // background threads accomplish the optimization
2902  }
2903 
2904 
2905  /// <summary>Expunges all deletes from the index. When an index
2906  /// has many document deletions (or updates to existing
2907  /// documents), it's best to either call optimize or
2908  /// expungeDeletes to remove all unused data in the index
2909  /// associated with the deleted documents. To see how
2910  /// many deletions you have pending in your index, call
2911  /// <see cref="IndexReader.NumDeletedDocs" />
2912  /// This saves disk space and memory usage while
2913  /// searching. expungeDeletes should be somewhat faster
2914  /// than optimize since it does not insist on reducing the
2915  /// index to a single segment (though, this depends on the
2916  /// <see cref="MergePolicy" />; see <see cref="Index.MergePolicy.FindMergesToExpungeDeletes" />.). Note that
2917  /// this call does not first commit any buffered
2918  /// documents, so you must do so yourself if necessary.
2919  /// See also <seealso cref="ExpungeDeletes(bool)" />
2920  ///
2921  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2922  /// you should immediately close the writer. See <a
2923  /// href="#OOME">above</a> for details.<p/>
2924  /// </summary>
2925  public virtual void ExpungeDeletes()
2926  {
2927  ExpungeDeletes(true);
2928  }
2929 
2930  /// <summary> Expert: asks the mergePolicy whether any merges are
2931  /// necessary now and if so, runs the requested merges and
2932  /// then iterate (test again if merges are needed) until no
2933  /// more merges are returned by the mergePolicy.
2934  ///
2935  /// Explicit calls to maybeMerge() are usually not
2936  /// necessary. The most common case is when merge policy
2937  /// parameters have changed.
2938  ///
2939  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
2940  /// you should immediately close the writer. See <a
2941  /// href="#OOME">above</a> for details.<p/>
2942  /// </summary>
2943  public void MaybeMerge()
2944  {
2945  MaybeMerge(false);
2946  }
2947 
2948  private void MaybeMerge(bool optimize)
2949  {
2950  MaybeMerge(1, optimize);
2951  }
2952 
2953  private void MaybeMerge(int maxNumSegmentsOptimize, bool optimize)
2954  {
2955  UpdatePendingMerges(maxNumSegmentsOptimize, optimize);
2956  mergeScheduler.Merge(this);
2957  }
2958 
2959  private void UpdatePendingMerges(int maxNumSegmentsOptimize, bool optimize)
2960  {
2961  lock (this)
2962  {
2963  System.Diagnostics.Debug.Assert(!optimize || maxNumSegmentsOptimize > 0);
2964 
2965  if (stopMerges)
2966  {
2967  return;
2968  }
2969 
2970  // Do not start new merges if we've hit OOME
2971  if (hitOOM)
2972  {
2973  return ;
2974  }
2975 
2976  MergePolicy.MergeSpecification spec;
2977  if (optimize)
2978  {
2979  spec = mergePolicy.FindMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
2980 
2981  if (spec != null)
2982  {
2983  int numMerges = spec.merges.Count;
2984  for (int i = 0; i < numMerges; i++)
2985  {
2986  MergePolicy.OneMerge merge = spec.merges[i];
2987  merge.optimize = true;
2988  merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
2989  }
2990  }
2991  }
2992  else
2993  {
2994  spec = mergePolicy.FindMerges(segmentInfos);
2995  }
2996 
2997  if (spec != null)
2998  {
2999  int numMerges = spec.merges.Count;
3000  for (int i = 0; i < numMerges; i++)
3001  RegisterMerge(spec.merges[i]);
3002  }
3003  }
3004  }
3005 
3006  /// <summary>Expert: the <see cref="MergeScheduler" /> calls this method
3007  /// to retrieve the next merge requested by the
3008  /// MergePolicy
3009  /// </summary>
3010  internal virtual MergePolicy.OneMerge GetNextMerge()
3011  {
3012  lock (this)
3013  {
3014  if (pendingMerges.Count == 0)
3015  return null;
3016  else
3017  {
3018  // Advance the merge from pending to running
3019  MergePolicy.OneMerge merge = pendingMerges.First.Value;
3020  pendingMerges.RemoveFirst();
3021  runningMerges.Add(merge);
3022  return merge;
3023  }
3024  }
3025  }
3026 
3027  /// <summary>Like getNextMerge() except only returns a merge if it's
3028  /// external.
3029  /// </summary>
3030  private MergePolicy.OneMerge GetNextExternalMerge()
3031  {
3032  lock (this)
3033  {
3034  if (pendingMerges.Count == 0)
3035  return null;
3036  else
3037  {
3038  var it = pendingMerges.GetEnumerator();
3039  while (it.MoveNext())
3040  {
3041  MergePolicy.OneMerge merge = it.Current;
3042  if (merge.isExternal)
3043  {
3044  // Advance the merge from pending to running
3045  pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue?
3046  runningMerges.Add(merge);
3047  return merge;
3048  }
3049  }
3050 
3051  // All existing merges do not involve external segments
3052  return null;
3053  }
3054  }
3055  }
3056 
3057  /*
3058  * Begin a transaction. During a transaction, any segment
3059  * merges that happen (or ram segments flushed) will not
3060  * write a new segments file and will not remove any files
3061  * that were present at the start of the transaction. You
3062  * must make a matched (try/finally) call to
3063  * commitTransaction() or rollbackTransaction() to finish
3064  * the transaction.
3065  *
3066  * Note that buffered documents and delete terms are not handled
3067  * within the transactions, so they must be flushed before the
3068  * transaction is started.
3069  */
3070  private void StartTransaction(bool haveReadLock)
3071  {
3072  lock (this)
3073  {
3074 
3075  bool success = false;
3076  try
3077  {
3078  if (infoStream != null)
3079  Message("now start transaction");
3080 
3081  System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0 ,
3082  "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.GetNumBufferedDeleteTerms());
3083  System.Diagnostics.Debug.Assert(docWriter.NumDocsInRAM == 0 ,
3084  "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.NumDocsInRAM);
3085 
3086  EnsureOpen();
3087 
3088  // If a transaction is trying to roll back (because
3089  // addIndexes hit an exception) then wait here until
3090  // that's done:
3091  lock (this)
3092  {
3093  while (stopMerges)
3094  DoWait();
3095  }
3096  success = true;
3097  }
3098  finally
3099  {
3100  // Release the write lock if our caller held it, on
3101  // hitting an exception
3102  if (!success && haveReadLock)
3103  ReleaseRead();
3104  }
3105 
3106  if (haveReadLock)
3107  {
3108  UpgradeReadToWrite();
3109  }
3110  else
3111  {
3112  AcquireWrite();
3113  }
3114 
3115  success = false;
3116  try
3117  {
3118  localRollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
3119 
3120  System.Diagnostics.Debug.Assert(!HasExternalSegments());
3121 
3122  localFlushedDocCount = docWriter.GetFlushedDocCount();
3123 
3124  // Remove the incRef we did in startTransaction:
3125  deleter.IncRef(segmentInfos, false);
3126 
3127  success = true;
3128  }
3129  finally
3130  {
3131  if (!success)
3132  FinishAddIndexes();
3133  }
3134  }
3135  }
3136 
3137  /*
3138  * Rolls back the transaction and restores state to where
3139  * we were at the start.
3140  */
3141  private void RollbackTransaction()
3142  {
3143  lock (this)
3144  {
3145 
3146  if (infoStream != null)
3147  Message("now rollback transaction");
3148 
3149  if (docWriter != null)
3150  {
3151  docWriter.SetFlushedDocCount(localFlushedDocCount);
3152  }
3153 
3154  // Must finish merges before rolling back segmentInfos
3155  // so merges don't hit exceptions on trying to commit
3156  // themselves, don't get files deleted out from under
3157  // them, etc:
3158  FinishMerges(false);
3159 
3160  // Keep the same segmentInfos instance but replace all
3161  // of its SegmentInfo instances. This is so the next
3162  // attempt to commit using this instance of IndexWriter
3163  // will always write to a new generation ("write once").
3164  segmentInfos.Clear();
3165  segmentInfos.AddRange(localRollbackSegmentInfos);
3166  localRollbackSegmentInfos = null;
3167 
3168  // This must come after we rollback segmentInfos, so
3169  // that if a commit() kicks off it does not see the
3170  // segmentInfos with external segments
3171  FinishAddIndexes();
3172 
3173  // Ask deleter to locate unreferenced files we had
3174  // created & remove them:
3175  deleter.Checkpoint(segmentInfos, false);
3176 
3177  // Remove the incRef we did in startTransaction:
3178  deleter.DecRef(segmentInfos);
3179 
3180  // Also ask deleter to remove any newly created files
3181  // that were never incref'd; this "garbage" is created
3182  // when a merge kicks off but aborts part way through
3183  // before it had a chance to incRef the files it had
3184  // partially created
3185  deleter.Refresh();
3186 
3187  System.Threading.Monitor.PulseAll(this);
3188 
3189  System.Diagnostics.Debug.Assert(!HasExternalSegments());
3190  }
3191  }
3192 
3193  /*
3194  * Commits the transaction. This will write the new
3195  * segments file and remove and pending deletions we have
3196  * accumulated during the transaction
3197  */
3198  private void CommitTransaction()
3199  {
3200  lock (this)
3201  {
3202 
3203  if (infoStream != null)
3204  Message("now commit transaction");
3205 
3206  // Give deleter a chance to remove files now:
3207  Checkpoint();
3208 
3209  // Remove the incRef we did in startTransaction.
3210  deleter.DecRef(localRollbackSegmentInfos);
3211 
3212  localRollbackSegmentInfos = null;
3213 
3214  System.Diagnostics.Debug.Assert(!HasExternalSegments());
3215 
3216  FinishAddIndexes();
3217  }
3218  }
3219 
3220  /// <summary> Close the <c>IndexWriter</c> without committing
3221  /// any changes that have occurred since the last commit
3222  /// (or since it was opened, if commit hasn't been called).
3223  /// This removes any temporary files that had been created,
3224  /// after which the state of the index will be the same as
3225  /// it was when commit() was last called or when this
3226  /// writer was first opened. This also clears a previous
3227  /// call to <see cref="PrepareCommit()" />.
3228  /// </summary>
3229  /// <throws> IOException if there is a low-level IO error </throws>
3230  public virtual void Rollback()
3231  {
3232  EnsureOpen();
3233 
3234  // Ensure that only one thread actually gets to do the closing:
3235  if (ShouldClose())
3236  RollbackInternal();
3237  }
3238 
3239  private void RollbackInternal()
3240  {
3241 
3242  bool success = false;
3243 
3244  if (infoStream != null)
3245  {
3246  Message("rollback");
3247  }
3248 
3249  docWriter.PauseAllThreads();
3250 
3251  try
3252  {
3253  FinishMerges(false);
3254 
3255  // Must pre-close these two, in case they increment
3256  // changeCount so that we can then set it to false
3257  // before calling closeInternal
3258  mergePolicy.Close();
3259  mergeScheduler.Close();
3260 
3261  lock (this)
3262  {
3263 
3264  if (pendingCommit != null)
3265  {
3266  pendingCommit.RollbackCommit(directory);
3267  deleter.DecRef(pendingCommit);
3268  pendingCommit = null;
3269  System.Threading.Monitor.PulseAll(this);
3270  }
3271 
3272  // Keep the same segmentInfos instance but replace all
3273  // of its SegmentInfo instances. This is so the next
3274  // attempt to commit using this instance of IndexWriter
3275  // will always write to a new generation ("write
3276  // once").
3277  segmentInfos.Clear();
3278  segmentInfos.AddRange(rollbackSegmentInfos);
3279 
3280  System.Diagnostics.Debug.Assert(!HasExternalSegments());
3281 
3282  docWriter.Abort();
3283 
3284  System.Diagnostics.Debug.Assert(TestPoint("rollback before checkpoint"));
3285 
3286  // Ask deleter to locate unreferenced files & remove
3287  // them:
3288  deleter.Checkpoint(segmentInfos, false);
3289  deleter.Refresh();
3290  }
3291 
3292  // Don't bother saving any changes in our segmentInfos
3293  readerPool.Clear(null);
3294 
3295  lastCommitChangeCount = changeCount;
3296 
3297  success = true;
3298  }
3299  catch (System.OutOfMemoryException oom)
3300  {
3301  HandleOOM(oom, "rollbackInternal");
3302  }
3303  finally
3304  {
3305  lock (this)
3306  {
3307  if (!success)
3308  {
3309  docWriter.ResumeAllThreads();
3310  closing = false;
3311  System.Threading.Monitor.PulseAll(this);
3312  if (infoStream != null)
3313  Message("hit exception during rollback");
3314  }
3315  }
3316  }
3317 
3318  CloseInternal(false);
3319  }
3320 
3321  /// <summary> Delete all documents in the index.
3322  ///
3323  /// <p/>This method will drop all buffered documents and will
3324  /// remove all segments from the index. This change will not be
3325  /// visible until a <see cref="Commit()" /> has been called. This method
3326  /// can be rolled back using <see cref="Rollback()" />.<p/>
3327  ///
3328  /// <p/>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).<p/>
3329  ///
3330  /// <p/>NOTE: this method will forcefully abort all merges
3331  /// in progress. If other threads are running <see cref="Optimize()" />
3332  /// or any of the addIndexes methods, they
3333  /// will receive <see cref="Index.MergePolicy.MergeAbortedException" />s.
3334  /// </summary>
3335  public virtual void DeleteAll()
3336  {
3337  lock (this)
3338  {
3339  docWriter.PauseAllThreads();
3340  try
3341  {
3342 
3343  // Abort any running merges
3344  FinishMerges(false);
3345 
3346  // Remove any buffered docs
3347  docWriter.Abort();
3348  docWriter.SetFlushedDocCount(0);
3349 
3350  // Remove all segments
3351  segmentInfos.Clear();
3352 
3353  // Ask deleter to locate unreferenced files & remove them:
3354  deleter.Checkpoint(segmentInfos, false);
3355  deleter.Refresh();
3356 
3357  // Don't bother saving any changes in our segmentInfos
3358  readerPool.Clear(null);
3359 
3360  // Mark that the index has changed
3361  ++changeCount;
3362  }
3363  catch (System.OutOfMemoryException oom)
3364  {
3365  HandleOOM(oom, "deleteAll");
3366  }
3367  finally
3368  {
3369  docWriter.ResumeAllThreads();
3370  if (infoStream != null)
3371  {
3372  Message("hit exception during deleteAll");
3373  }
3374  }
3375  }
3376  }
3377 
3378  private void FinishMerges(bool waitForMerges)
3379  {
3380  lock (this)
3381  {
3382  if (!waitForMerges)
3383  {
3384 
3385  stopMerges = true;
3386 
3387  // Abort all pending & running merges:
3388  foreach(MergePolicy.OneMerge merge in pendingMerges)
3389  {
3390  if (infoStream != null)
3391  Message("now abort pending merge " + merge.SegString(directory));
3392  merge.Abort();
3393  MergeFinish(merge);
3394  }
3395  pendingMerges.Clear();
3396 
3397  foreach(MergePolicy.OneMerge merge in runningMerges)
3398  {
3399  if (infoStream != null)
3400  Message("now abort running merge " + merge.SegString(directory));
3401  merge.Abort();
3402  }
3403 
3404  // Ensure any running addIndexes finishes. It's fine
3405  // if a new one attempts to start because its merges
3406  // will quickly see the stopMerges == true and abort.
3407  AcquireRead();
3408  ReleaseRead();
3409 
3410  // These merges periodically check whether they have
3411  // been aborted, and stop if so. We wait here to make
3412  // sure they all stop. It should not take very long
3413  // because the merge threads periodically check if
3414  // they are aborted.
3415  while (runningMerges.Count > 0)
3416  {
3417  if (infoStream != null)
3418  Message("now wait for " + runningMerges.Count + " running merge to abort");
3419  DoWait();
3420  }
3421 
3422  stopMerges = false;
3423  System.Threading.Monitor.PulseAll(this);
3424 
3425  System.Diagnostics.Debug.Assert(0 == mergingSegments.Count);
3426 
3427  if (infoStream != null)
3428  Message("all running merges have aborted");
3429  }
3430  else
3431  {
3432  // waitForMerges() will ensure any running addIndexes finishes.
3433  // It's fine if a new one attempts to start because from our
3434  // caller above the call will see that we are in the
3435  // process of closing, and will throw an
3436  // AlreadyClosedException.
3437  WaitForMerges();
3438  }
3439  }
3440  }
3441 
3442  /// <summary> Wait for any currently outstanding merges to finish.
3443  ///
3444  /// <p/>It is guaranteed that any merges started prior to calling this method
3445  /// will have completed once this method completes.<p/>
3446  /// </summary>
3447  public virtual void WaitForMerges()
3448  {
3449  lock (this)
3450  {
3451  // Ensure any running addIndexes finishes.
3452  AcquireRead();
3453  ReleaseRead();
3454 
3455  while (pendingMerges.Count > 0 || runningMerges.Count > 0)
3456  {
3457  DoWait();
3458  }
3459 
3460  // sanity check
3461  System.Diagnostics.Debug.Assert(0 == mergingSegments.Count);
3462  }
3463  }
3464 
3465  /*
3466  * Called whenever the SegmentInfos has been updated and
3467  * the index files referenced exist (correctly) in the
3468  * index directory.
3469  */
3470  private void Checkpoint()
3471  {
3472  lock (this)
3473  {
3474  changeCount++;
3475  deleter.Checkpoint(segmentInfos, false);
3476  }
3477  }
3478 
3479  private void FinishAddIndexes()
3480  {
3481  ReleaseWrite();
3482  }
3483 
3484  private void BlockAddIndexes(bool includePendingClose)
3485  {
3486 
3487  AcquireRead();
3488 
3489  bool success = false;
3490  try
3491  {
3492 
3493  // Make sure we are still open since we could have
3494  // waited quite a while for last addIndexes to finish
3495  EnsureOpen(includePendingClose);
3496  success = true;
3497  }
3498  finally
3499  {
3500  if (!success)
3501  ReleaseRead();
3502  }
3503  }
3504 
3505  private void ResumeAddIndexes()
3506  {
3507  ReleaseRead();
3508  }
3509 
3510  private void ResetMergeExceptions()
3511  {
3512  lock (this)
3513  {
3514  mergeExceptions = new List<MergePolicy.OneMerge>();
3515  mergeGen++;
3516  }
3517  }
3518 
3519  private void NoDupDirs(Directory[] dirs)
3520  {
3521  HashSet<Directory> dups = new HashSet<Directory>();
3522  for (int i = 0; i < dirs.Length; i++)
3523  {
3524  if (dups.Contains(dirs[i]))
3525  {
3526  throw new System.ArgumentException("Directory " + dirs[i] + " appears more than once");
3527  }
3528  if (dirs[i] == directory)
3529  throw new System.ArgumentException("Cannot add directory to itself");
3530  dups.Add(dirs[i]);
3531  }
3532  }
3533 
3534  /// <summary> Merges all segments from an array of indexes into this
3535  /// index.
3536  ///
3537  /// <p/>This may be used to parallelize batch indexing. A large document
3538  /// collection can be broken into sub-collections. Each sub-collection can be
3539  /// indexed in parallel, on a different thread, process or machine. The
3540  /// complete index can then be created by merging sub-collection indexes
3541  /// with this method.
3542  ///
3543  /// <p/><b>NOTE:</b> the index in each Directory must not be
3544  /// changed (opened by a writer) while this method is
3545  /// running. This method does not acquire a write lock in
3546  /// each input Directory, so it is up to the caller to
3547  /// enforce this.
3548  ///
3549  /// <p/><b>NOTE:</b> while this is running, any attempts to
3550  /// add or delete documents (with another thread) will be
3551  /// paused until this method completes.
3552  ///
3553  /// <p/>This method is transactional in how Exceptions are
3554  /// handled: it does not commit a new segments_N file until
3555  /// all indexes are added. This means if an Exception
3556  /// occurs (for example disk full), then either no indexes
3557  /// will have been added or they all will have been.<p/>
3558  ///
3559  /// <p/>Note that this requires temporary free space in the
3560  /// Directory up to 2X the sum of all input indexes
3561  /// (including the starting index). If readers/searchers
3562  /// are open against the starting index, then temporary
3563  /// free space required will be higher by the size of the
3564  /// starting index (see <see cref="Optimize()" /> for details).
3565  /// <p/>
3566  ///
3567  /// <p/>Once this completes, the final size of the index
3568  /// will be less than the sum of all input index sizes
3569  /// (including the starting index). It could be quite a
3570  /// bit smaller (if there were many pending deletes) or
3571  /// just slightly smaller.<p/>
3572  ///
3573  /// <p/>
3574  /// This requires this index not be among those to be added.
3575  ///
3576  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
3577  /// you should immediately close the writer. See <a
3578  /// href="#OOME">above</a> for details.<p/>
3579  ///
3580  /// </summary>
3581  /// <throws> CorruptIndexException if the index is corrupt </throws>
3582  /// <throws> IOException if there is a low-level IO error </throws>
3583  public virtual void AddIndexesNoOptimize(params Directory[] dirs)
3584  {
3585 
3586  EnsureOpen();
3587 
3588  NoDupDirs(dirs);
3589 
3590  // Do not allow add docs or deletes while we are running:
3591  docWriter.PauseAllThreads();
3592 
3593  try
3594  {
3595  if (infoStream != null)
3596  Message("flush at addIndexesNoOptimize");
3597  Flush(true, false, true);
3598 
3599  bool success = false;
3600 
3601  StartTransaction(false);
3602 
3603  try
3604  {
3605 
3606  int docCount = 0;
3607  lock (this)
3608  {
3609  EnsureOpen();
3610 
3611  for (int i = 0; i < dirs.Length; i++)
3612  {
3613  if (directory == dirs[i])
3614  {
3615  // cannot add this index: segments may be deleted in merge before added
3616  throw new System.ArgumentException("Cannot add this index to itself");
3617  }
3618 
3619  SegmentInfos sis = new SegmentInfos(); // read infos from dir
3620  sis.Read(dirs[i]);
3621  for (int j = 0; j < sis.Count; j++)
3622  {
3623  SegmentInfo info = sis.Info(j);
3624  System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name);
3625  docCount += info.docCount;
3626  segmentInfos.Add(info); // add each info
3627  }
3628  }
3629  }
3630 
3631  // Notify DocumentsWriter that the flushed count just increased
3632  docWriter.UpdateFlushedDocCount(docCount);
3633 
3634  MaybeMerge();
3635 
3636  EnsureOpen();
3637 
3638  // If after merging there remain segments in the index
3639  // that are in a different directory, just copy these
3640  // over into our index. This is necessary (before
3641  // finishing the transaction) to avoid leaving the
3642  // index in an unusable (inconsistent) state.
3643  ResolveExternalSegments();
3644 
3645  EnsureOpen();
3646 
3647  success = true;
3648  }
3649  finally
3650  {
3651  if (success)
3652  {
3653  CommitTransaction();
3654  }
3655  else
3656  {
3657  RollbackTransaction();
3658  }
3659  }
3660  }
3661  catch (System.OutOfMemoryException oom)
3662  {
3663  HandleOOM(oom, "addIndexesNoOptimize");
3664  }
3665  finally
3666  {
3667  if (docWriter != null)
3668  {
3669  docWriter.ResumeAllThreads();
3670  }
3671  }
3672  }
3673 
3674  private bool HasExternalSegments()
3675  {
3676  return segmentInfos.HasExternalSegments(directory);
3677  }
3678 
3679  /* If any of our segments are using a directory != ours
3680  * then we have to either copy them over one by one, merge
3681  * them (if merge policy has chosen to) or wait until
3682  * currently running merges (in the background) complete.
3683  * We don't return until the SegmentInfos has no more
3684  * external segments. Currently this is only used by
3685  * addIndexesNoOptimize(). */
3686  private void ResolveExternalSegments()
3687  {
3688 
3689  bool any = false;
3690 
3691  bool done = false;
3692 
3693  while (!done)
3694  {
3695  SegmentInfo info = null;
3696  MergePolicy.OneMerge merge = null;
3697  lock (this)
3698  {
3699 
3700  if (stopMerges)
3701  throw new MergePolicy.MergeAbortedException("rollback() was called or addIndexes* hit an unhandled exception");
3702 
3703  int numSegments = segmentInfos.Count;
3704 
3705  done = true;
3706  for (int i = 0; i < numSegments; i++)
3707  {
3708  info = segmentInfos.Info(i);
3709  if (info.dir != directory)
3710  {
3711  done = false;
3712  MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), mergePolicy is LogMergePolicy && UseCompoundFile);
3713 
3714  // Returns true if no running merge conflicts
3715  // with this one (and, records this merge as
3716  // pending), ie, this segment is not currently
3717  // being merged:
3718  if (RegisterMerge(newMerge))
3719  {
3720  merge = newMerge;
3721 
3722  // If this segment is not currently being
3723  // merged, then advance it to running & run
3724  // the merge ourself (below):
3725  pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue?
3726  runningMerges.Add(merge);
3727  break;
3728  }
3729  }
3730  }
3731 
3732  if (!done && merge == null)
3733  // We are not yet done (external segments still
3734  // exist in segmentInfos), yet, all such segments
3735  // are currently "covered" by a pending or running
3736  // merge. We now try to grab any pending merge
3737  // that involves external segments:
3738  merge = GetNextExternalMerge();
3739 
3740  if (!done && merge == null)
3741  // We are not yet done, and, all external segments
3742  // fall under merges that the merge scheduler is
3743  // currently running. So, we now wait and check
3744  // back to see if the merge has completed.
3745  DoWait();
3746  }
3747 
3748  if (merge != null)
3749  {
3750  any = true;
3751  Merge(merge);
3752  }
3753  }
3754 
3755  if (any)
3756  // Sometimes, on copying an external segment over,
3757  // more merges may become necessary:
3758  mergeScheduler.Merge(this);
3759  }
3760 
3761  /// <summary>Merges the provided indexes into this index.
3762  /// <p/>After this completes, the index is optimized. <p/>
3763  /// <p/>The provided IndexReaders are not closed.<p/>
3764  ///
3765  /// <p/><b>NOTE:</b> while this is running, any attempts to
3766  /// add or delete documents (with another thread) will be
3767  /// paused until this method completes.
3768  ///
3769  /// <p/>See <see cref="AddIndexesNoOptimize(Directory[])" /> for
3770  /// details on transactional semantics, temporary free
3771  /// space required in the Directory, and non-CFS segments
3772  /// on an Exception.<p/>
3773  ///
3774  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
3775  /// you should immediately close the writer. See <a
3776  /// href="#OOME">above</a> for details.<p/>
3777  ///
3778  /// </summary>
3779  /// <throws> CorruptIndexException if the index is corrupt </throws>
3780  /// <throws> IOException if there is a low-level IO error </throws>
3781  public virtual void AddIndexes(params IndexReader[] readers)
3782  {
3783 
3784  EnsureOpen();
3785 
3786  // Do not allow add docs or deletes while we are running:
3787  docWriter.PauseAllThreads();
3788 
3789  // We must pre-acquire a read lock here (and upgrade to
3790  // write lock in startTransaction below) so that no
3791  // other addIndexes is allowed to start up after we have
3792  // flushed & optimized but before we then start our
3793  // transaction. This is because the merging below
3794  // requires that only one segment is present in the
3795  // index:
3796  AcquireRead();
3797 
3798  try
3799  {
3800 
3801  SegmentInfo info = null;
3802  System.String mergedName = null;
3803  SegmentMerger merger = null;
3804 
3805  bool success = false;
3806 
3807  try
3808  {
3809  Flush(true, false, true);
3810  Optimize(); // start with zero or 1 seg
3811  success = true;
3812  }
3813  finally
3814  {
3815  // Take care to release the read lock if we hit an
3816  // exception before starting the transaction
3817  if (!success)
3818  ReleaseRead();
3819  }
3820 
3821  // true means we already have a read lock; if this
3822  // call hits an exception it will release the write
3823  // lock:
3824  StartTransaction(true);
3825 
3826  try
3827  {
3828  mergedName = NewSegmentName();
3829  merger = new SegmentMerger(this, mergedName, null);
3830 
3831  SegmentReader sReader = null;
3832  lock (this)
3833  {
3834  if (segmentInfos.Count == 1)
3835  {
3836  // add existing index, if any
3837  sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1);
3838  }
3839  }
3840 
3841  success = false;
3842 
3843  try
3844  {
3845  if (sReader != null)
3846  merger.Add(sReader);
3847 
3848  for (int i = 0; i < readers.Length; i++)
3849  // add new indexes
3850  merger.Add(readers[i]);
3851 
3852  int docCount = merger.Merge(); // merge 'em
3853 
3854  lock (this)
3855  {
3856  segmentInfos.Clear(); // pop old infos & add new
3857  info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx());
3858  SetDiagnostics(info, "addIndexes(params IndexReader[])");
3859  segmentInfos.Add(info);
3860  }
3861 
3862  // Notify DocumentsWriter that the flushed count just increased
3863  docWriter.UpdateFlushedDocCount(docCount);
3864 
3865  success = true;
3866  }
3867  finally
3868  {
3869  if (sReader != null)
3870  {
3871  readerPool.Release(sReader);
3872  }
3873  }
3874  }
3875  finally
3876  {
3877  if (!success)
3878  {
3879  if (infoStream != null)
3880  Message("hit exception in addIndexes during merge");
3881  RollbackTransaction();
3882  }
3883  else
3884  {
3885  CommitTransaction();
3886  }
3887  }
3888 
3889  if (mergePolicy is LogMergePolicy && UseCompoundFile)
3890  {
3891 
3892  IList<string> files = null;
3893 
3894  lock (this)
3895  {
3896  // Must incRef our files so that if another thread
3897  // is running merge/optimize, it doesn't delete our
3898  // segment's files before we have a change to
3899  // finish making the compound file.
3900  if (segmentInfos.Contains(info))
3901  {
3902  files = info.Files();
3903  deleter.IncRef(files);
3904  }
3905  }
3906 
3907  if (files != null)
3908  {
3909 
3910  success = false;
3911 
3912  StartTransaction(false);
3913 
3914  try
3915  {
3916  merger.CreateCompoundFile(mergedName + ".cfs");
3917  lock (this)
3918  {
3919  info.SetUseCompoundFile(true);
3920  }
3921 
3922  success = true;
3923  }
3924  finally
3925  {
3926  lock (this)
3927  {
3928  deleter.DecRef(files);
3929  }
3930 
3931  if (!success)
3932  {
3933  if (infoStream != null)
3934  Message("hit exception building compound file in addIndexes during merge");
3935 
3936  RollbackTransaction();
3937  }
3938  else
3939  {
3940  CommitTransaction();
3941  }
3942  }
3943  }
3944  }
3945  }
3946  catch (System.OutOfMemoryException oom)
3947  {
3948  HandleOOM(oom, "addIndexes(params IndexReader[])");
3949  }
3950  finally
3951  {
3952  if (docWriter != null)
3953  {
3954  docWriter.ResumeAllThreads();
3955  }
3956  }
3957  }
3958 
3959  ///<summary>
3960  /// A hook for extending classes to execute operations after pending added and
3961  /// deleted documents have been flushed to the Directory but before the change
3962  /// is committed (new segments_N file written).
3963  ///</summary>
3964  protected virtual void DoAfterFlush()
3965  {
3966  }
3967 
3968  ///<summary>
3969  /// A hook for extending classes to execute operations before pending added and
3970  /// deleted documents are flushed to the Directory.
3971  ///</summary>
3972  protected virtual void DoBeforeFlush()
3973  {
3974  }
3975 
3976  /// <summary>Expert: prepare for commit.
3977  ///
3978  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
3979  /// you should immediately close the writer. See <a
3980  /// href="#OOME">above</a> for details.<p/>
3981  ///
3982  /// </summary>
3983  /// <seealso cref="PrepareCommit(IDictionary{string,string})">
3984  /// </seealso>
3985  public void PrepareCommit()
3986  {
3987  EnsureOpen();
3988  PrepareCommit(null);
3989  }
3990 
3991  /// <summary><p/>Expert: prepare for commit, specifying
3992  /// commitUserData Map (String -> String). This does the
3993  /// first phase of 2-phase commit. This method does all steps
3994  /// necessary to commit changes since this writer was
3995  /// opened: flushes pending added and deleted docs, syncs
3996  /// the index files, writes most of next segments_N file.
3997  /// After calling this you must call either <see cref="Commit()" />
3998  /// to finish the commit, or <see cref="Rollback()" />
3999  /// to revert the commit and undo all changes
4000  /// done since the writer was opened.<p/>
4001  ///
4002  /// You can also just call <see cref="Commit(IDictionary{string,string})" /> directly
4003  /// without prepareCommit first in which case that method
4004  /// will internally call prepareCommit.
4005  ///
4006  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
4007  /// you should immediately close the writer. See <a
4008  /// href="#OOME">above</a> for details.<p/>
4009  ///
4010  /// </summary>
4011  /// <param name="commitUserData">Opaque Map (String->String)
4012  /// that's recorded into the segments file in the index,
4013  /// and retrievable by <see cref="IndexReader.GetCommitUserData" />.
4014  /// Note that when IndexWriter commits itself, during <see cref="Close()" />, the
4015  /// commitUserData is unchanged (just carried over from
4016  /// the prior commit). If this is null then the previous
4017  /// commitUserData is kept. Also, the commitUserData will
4018  /// only "stick" if there are actually changes in the
4019  /// index to commit.
4020  /// </param>
4021  private void PrepareCommit(IDictionary<string, string> commitUserData)
4022  {
4023  if (hitOOM)
4024  {
4025  throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit");
4026  }
4027 
4028  if (pendingCommit != null)
4029  throw new System.SystemException("prepareCommit was already called with no corresponding call to commit");
4030 
4031  if (infoStream != null)
4032  Message("prepareCommit: flush");
4033 
4034  Flush(true, true, true);
4035 
4036  StartCommit(0, commitUserData);
4037  }
4038 
4039  // Used only by commit, below; lock order is commitLock -> IW
4040  private Object commitLock = new Object();
4041 
4042  private void Commit(long sizeInBytes)
4043  {
4044  lock(commitLock) {
4045  StartCommit(sizeInBytes, null);
4046  FinishCommit();
4047  }
4048  }
4049 
4050  /// <summary> <p/>Commits all pending changes (added &amp; deleted
4051  /// documents, optimizations, segment merges, added
4052  /// indexes, etc.) to the index, and syncs all referenced
4053  /// index files, such that a reader will see the changes
4054  /// and the index updates will survive an OS or machine
4055  /// crash or power loss. Note that this does not wait for
4056  /// any running background merges to finish. This may be a
4057  /// costly operation, so you should test the cost in your
4058  /// application and do it only when really necessary.<p/>
4059  ///
4060  /// <p/> Note that this operation calls Directory.sync on
4061  /// the index files. That call should not return until the
4062  /// file contents &amp; metadata are on stable storage. For
4063  /// FSDirectory, this calls the OS's fsync. But, beware:
4064  /// some hardware devices may in fact cache writes even
4065  /// during fsync, and return before the bits are actually
4066  /// on stable storage, to give the appearance of faster
4067  /// performance. If you have such a device, and it does
4068  /// not have a battery backup (for example) then on power
4069  /// loss it may still lose data. Lucene cannot guarantee
4070  /// consistency on such devices. <p/>
4071  ///
4072  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
4073  /// you should immediately close the writer. See <a
4074  /// href="#OOME">above</a> for details.<p/>
4075  ///
4076  /// </summary>
4077  /// <seealso cref="PrepareCommit()">
4078  /// </seealso>
4079  /// <seealso cref="Commit(IDictionary{string,string})">
4080  /// </seealso>
4081  public void Commit()
4082  {
4083  Commit(null);
4084  }
4085 
4086  /// <summary>Commits all changes to the index, specifying a
4087  /// commitUserData Map (String -> String). This just
4088  /// calls <see cref="PrepareCommit(IDictionary{string, string})" /> (if you didn't
4089  /// already call it) and then <see cref="FinishCommit" />.
4090  ///
4091  /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
4092  /// you should immediately close the writer. See <a
4093  /// href="#OOME">above</a> for details.<p/>
4094  /// </summary>
4095  public void Commit(IDictionary<string, string> commitUserData)
4096  {
4097  EnsureOpen();
4098 
4099  if (infoStream != null)
4100  {
4101  Message("commit: start");
4102  }
4103 
4104  lock (commitLock)
4105  {
4106  if (infoStream != null)
4107  {
4108  Message("commit: enter lock");
4109  }
4110  if (pendingCommit == null)
4111  {
4112  if (infoStream != null)
4113  {
4114  Message("commit: now prepare");
4115  }
4116  PrepareCommit(commitUserData);
4117  }
4118  else if (infoStream != null)
4119  {
4120  Message("commit: already prepared");
4121  }
4122 
4123  FinishCommit();
4124  }
4125  }
4126 
4127  private void FinishCommit()
4128  {
4129  lock (this)
4130  {
4131 
4132  if (pendingCommit != null)
4133  {
4134  try
4135  {
4136  if (infoStream != null)
4137  Message("commit: pendingCommit != null");
4138  pendingCommit.FinishCommit(directory);
4139  if (infoStream != null)
4140  Message("commit: wrote segments file \"" + pendingCommit.GetCurrentSegmentFileName() + "\"");
4141  lastCommitChangeCount = pendingCommitChangeCount;
4142  segmentInfos.UpdateGeneration(pendingCommit);
4143  segmentInfos.UserData = pendingCommit.UserData;
4144  SetRollbackSegmentInfos(pendingCommit);
4145  deleter.Checkpoint(pendingCommit, true);
4146  }
4147  finally
4148  {
4149  deleter.DecRef(pendingCommit);
4150  pendingCommit = null;
4151  System.Threading.Monitor.PulseAll(this);
4152  }
4153  }
4154  else if (infoStream != null)
4155  {
4156  Message("commit: pendingCommit == null; skip");
4157  }
4158 
4159  if (infoStream != null)
4160  {
4161  Message("commit: done");
4162  }
4163  }
4164  }
4165 
4166  /// <summary> Flush all in-memory buffered udpates (adds and deletes)
4167  /// to the Directory.
4168  /// </summary>
4169  /// <param name="triggerMerge">if true, we may merge segments (if
4170  /// deletes or docs were flushed) if necessary
4171  /// </param>
4172  /// <param name="flushDocStores">if false we are allowed to keep
4173  /// doc stores open to share with the next segment
4174  /// </param>
4175  /// <param name="flushDeletes">whether pending deletes should also
4176  /// be flushed
4177  /// </param>
4178  public /*protected internal*/ void Flush(bool triggerMerge, bool flushDocStores, bool flushDeletes)
4179  {
4180  // We can be called during close, when closing==true, so we must pass false to ensureOpen:
4181  EnsureOpen(false);
4182  if (DoFlush(flushDocStores, flushDeletes) && triggerMerge)
4183  MaybeMerge();
4184  }
4185 
4186  // TODO: this method should not have to be entirely
4187  // synchronized, ie, merges should be allowed to commit
4188  // even while a flush is happening
4189  private bool DoFlush(bool flushDocStores, bool flushDeletes)
4190  {
4191  lock (this)
4192  {
4193  try
4194  {
4195  try
4196  {
4197  return DoFlushInternal(flushDocStores, flushDeletes);
4198  }
4199  finally
4200  {
4201  if (docWriter.DoBalanceRAM())
4202  {
4203  docWriter.BalanceRAM();
4204  }
4205  }
4206  }
4207  finally
4208  {
4209  docWriter.ClearFlushPending();
4210  }
4211  }
4212  }
4213 
4214  // TODO: this method should not have to be entirely
4215  // synchronized, ie, merges should be allowed to commit
4216  // even while a flush is happening
4217  private bool DoFlushInternal(bool flushDocStores, bool flushDeletes)
4218  {
4219  lock (this)
4220  {
4221  if (hitOOM)
4222  {
4223  throw new System.SystemException("this writer hit an OutOfMemoryError; cannot flush");
4224  }
4225 
4226  EnsureOpen(false);
4227 
4228  System.Diagnostics.Debug.Assert(TestPoint("startDoFlush"));
4229 
4230  DoBeforeFlush();
4231 
4232  flushCount++;
4233 
4234  // If we are flushing because too many deletes
4235  // accumulated, then we should apply the deletes to free
4236  // RAM:
4237  flushDeletes |= docWriter.DoApplyDeletes();
4238 
4239  // Make sure no threads are actively adding a document.
4240  // Returns true if docWriter is currently aborting, in
4241  // which case we skip flushing this segment
4242  if (infoStream != null)
4243  {
4244  Message("flush: now pause all indexing threads");
4245  }
4246  if (docWriter.PauseAllThreads())
4247  {
4248  docWriter.ResumeAllThreads();
4249  return false;
4250  }
4251 
4252  try
4253  {
4254 
4255  SegmentInfo newSegment = null;
4256 
4257  int numDocs = docWriter.NumDocsInRAM;
4258 
4259  // Always flush docs if there are any
4260  bool flushDocs = numDocs > 0;
4261 
4262  System.String docStoreSegment = docWriter.DocStoreSegment;
4263 
4264  System.Diagnostics.Debug.Assert(docStoreSegment != null || numDocs == 0, "dss=" + docStoreSegment + " numDocs=" + numDocs);
4265 
4266  if (docStoreSegment == null)
4267  flushDocStores = false;
4268 
4269  int docStoreOffset = docWriter.DocStoreOffset;
4270 
4271  bool docStoreIsCompoundFile = false;
4272 
4273  if (infoStream != null)
4274  {
4275  Message(" flush: segment=" + docWriter.Segment + " docStoreSegment=" + docWriter.DocStoreSegment + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms());
4276  Message(" index before flush " + SegString());
4277  }
4278 
4279  // Check if the doc stores must be separately flushed
4280  // because other segments, besides the one we are about
4281  // to flush, reference it
4282  if (flushDocStores && (!flushDocs || !docWriter.Segment.Equals(docWriter.DocStoreSegment)))
4283  {
4284  // We must separately flush the doc store
4285  if (infoStream != null)
4286  Message(" flush shared docStore segment " + docStoreSegment);
4287 
4288  docStoreIsCompoundFile = FlushDocStores();
4289  flushDocStores = false;
4290  }
4291 
4292  System.String segment = docWriter.Segment;
4293 
4294  // If we are flushing docs, segment must not be null:
4295  System.Diagnostics.Debug.Assert(segment != null || !flushDocs);
4296 
4297  if (flushDocs)
4298  {
4299 
4300  bool success = false;
4301  int flushedDocCount;
4302 
4303  try
4304  {
4305  flushedDocCount = docWriter.Flush(flushDocStores);
4306  if (infoStream != null)
4307  {
4308  Message("flushedFiles=" + docWriter.GetFlushedFiles());
4309  }
4310  success = true;
4311  }
4312  finally
4313  {
4314  if (!success)
4315  {
4316  if (infoStream != null)
4317  Message("hit exception flushing segment " + segment);
4318  deleter.Refresh(segment);
4319  }
4320  }
4321 
4322  if (0 == docStoreOffset && flushDocStores)
4323  {
4324  // This means we are flushing private doc stores
4325  // with this segment, so it will not be shared
4326  // with other segments
4327  System.Diagnostics.Debug.Assert(docStoreSegment != null);
4328  System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment));
4329  docStoreOffset = - 1;
4330  docStoreIsCompoundFile = false;
4331  docStoreSegment = null;
4332  }
4333 
4334  // Create new SegmentInfo, but do not add to our
4335  // segmentInfos until deletes are flushed
4336  // successfully.
4337  newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.HasProx());
4338  SetDiagnostics(newSegment, "flush");
4339  }
4340 
4341  docWriter.PushDeletes();
4342 
4343  if (flushDocs)
4344  {
4345  segmentInfos.Add(newSegment);
4346  Checkpoint();
4347  }
4348 
4349  if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment))
4350  {
4351  // Now build compound file
4352  bool success = false;
4353  try
4354  {
4355  docWriter.CreateCompoundFile(segment);
4356  success = true;
4357  }
4358  finally
4359  {
4360  if (!success)
4361  {
4362  if (infoStream != null)
4363  Message("hit exception creating compound file for newly flushed segment " + segment);
4364  deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
4365  }
4366  }
4367 
4368  newSegment.SetUseCompoundFile(true);
4369  Checkpoint();
4370  }
4371 
4372  if (flushDeletes)
4373  {
4374  ApplyDeletes();
4375  }
4376 
4377  if (flushDocs)
4378  Checkpoint();
4379 
4380  DoAfterFlush();
4381 
4382  return flushDocs;
4383  }
4384  catch (System.OutOfMemoryException oom)
4385  {
4386  HandleOOM(oom, "doFlush");
4387  // never hit
4388  return false;
4389  }
4390  finally
4391  {
4392  docWriter.ResumeAllThreads();
4393  }
4394  }
4395  }
4396 
4397  /// <summary>Expert: Return the total size of all index files currently cached in memory.
4398  /// Useful for size management with flushRamDocs()
4399  /// </summary>
4400  public long RamSizeInBytes()
4401  {
4402  EnsureOpen();
4403  return docWriter.GetRAMUsed();
4404  }
4405 
4406  /// <summary>Expert: Return the number of documents currently
4407  /// buffered in RAM.
4408  /// </summary>
4409  public int NumRamDocs()
4410  {
4411  lock (this)
4412  {
4413  EnsureOpen();
4414  return docWriter.NumDocsInRAM;
4415  }
4416  }
4417 
4418  private int EnsureContiguousMerge(MergePolicy.OneMerge merge)
4419  {
4420 
4421  int first = segmentInfos.IndexOf(merge.segments.Info(0));
4422  if (first == - 1)
4423  throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current index " + SegString(), directory);
4424 
4425  int numSegments = segmentInfos.Count;
4426 
4427  int numSegmentsToMerge = merge.segments.Count;
4428  for (int i = 0; i < numSegmentsToMerge; i++)
4429  {
4430  SegmentInfo info = merge.segments.Info(i);
4431 
4432  if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info))
4433  {
4434  if (segmentInfos.IndexOf(info) == - 1)
4435  throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + SegString(), directory);
4436  else
4437  throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.SegString(directory) + " vs " + SegString() + "), which IndexWriter (currently) cannot handle", directory);
4438  }
4439  }
4440 
4441  return first;
4442  }
4443 
4444  /// <summary>Carefully merges deletes for the segments we just
4445  /// merged. This is tricky because, although merging will
4446  /// clear all deletes (compacts the documents), new
4447  /// deletes may have been flushed to the segments since
4448  /// the merge was started. This method "carries over"
4449  /// such new deletes onto the newly merged segment, and
4450  /// saves the resulting deletes file (incrementing the
4451  /// delete generation for merge.info). If no deletes were
4452  /// flushed, no new deletes file is saved.
4453  /// </summary>
4454  private void CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader)
4455  {
4456  lock (this)
4457  {
4458 
4459  System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes"));
4460 
4461  SegmentInfos sourceSegments = merge.segments;
4462 
4463  if (infoStream != null)
4464  Message("commitMergeDeletes " + merge.SegString(directory));
4465 
4466  // Carefully merge deletes that occurred after we
4467  // started merging:
4468  int docUpto = 0;
4469  int delCount = 0;
4470 
4471  for (int i = 0; i < sourceSegments.Count; i++)
4472  {
4473  SegmentInfo info = sourceSegments.Info(i);
4474  int docCount = info.docCount;
4475  SegmentReader previousReader = merge.readersClone[i];
4476  SegmentReader currentReader = merge.readers[i];
4477  if (previousReader.HasDeletions)
4478  {
4479 
4480  // There were deletes on this segment when the merge
4481  // started. The merge has collapsed away those
4482  // deletes, but, if new deletes were flushed since
4483  // the merge started, we must now carefully keep any
4484  // newly flushed deletes but mapping them to the new
4485  // docIDs.
4486 
4487  if (currentReader.NumDeletedDocs > previousReader.NumDeletedDocs)
4488  {
4489  // This means this segment has had new deletes
4490  // committed since we started the merge, so we
4491  // must merge them:
4492  for (int j = 0; j < docCount; j++)
4493  {
4494  if (previousReader.IsDeleted(j))
4495  {
4496  System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j));
4497  }
4498  else
4499  {
4500  if (currentReader.IsDeleted(j))
4501  {
4502  mergeReader.DoDelete(docUpto);
4503  delCount++;
4504  }
4505  docUpto++;
4506  }
4507  }
4508  }
4509  else
4510  {
4511  docUpto += docCount - previousReader.NumDeletedDocs;
4512  }
4513  }
4514  else if (currentReader.HasDeletions)
4515  {
4516  // This segment had no deletes before but now it
4517  // does:
4518  for (int j = 0; j < docCount; j++)
4519  {
4520  if (currentReader.IsDeleted(j))
4521  {
4522  mergeReader.DoDelete(docUpto);
4523  delCount++;
4524  }
4525  docUpto++;
4526  }
4527  }
4528  // No deletes before or after
4529  else
4530  docUpto += info.docCount;
4531  }
4532 
4533  System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs == delCount);
4534 
4535  mergeReader.hasChanges = delCount > 0;
4536  }
4537  }
4538 
4539  /* FIXME if we want to support non-contiguous segment merges */
4540  private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader)
4541  {
4542  lock (this)
4543  {
4544 
4545  System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge"));
4546 
4547  if (hitOOM)
4548  {
4549  throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge");
4550  }
4551 
4552  if (infoStream != null)
4553  Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString());
4554 
4555  System.Diagnostics.Debug.Assert(merge.registerDone);
4556 
4557  // If merge was explicitly aborted, or, if rollback() or
4558  // rollbackTransaction() had been called since our merge
4559  // started (which results in an unqualified
4560  // deleter.refresh() call that will remove any index
4561  // file that current segments does not reference), we
4562  // abort this merge
4563  if (merge.IsAborted())
4564  {
4565  if (infoStream != null)
4566  Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted");
4567 
4568  return false;
4569  }
4570 
4571  int start = EnsureContiguousMerge(merge);
4572 
4573  CommitMergedDeletes(merge, mergedReader);
4574  docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount);
4575 
4576  // If the doc store we are using has been closed and
4577  // is in now compound format (but wasn't when we
4578  // started), then we will switch to the compound
4579  // format as well:
4580  SetMergeDocStoreIsCompoundFile(merge);
4581 
4582  merge.info.HasProx = merger.HasProx();
4583 
4584  segmentInfos.RemoveRange(start, start + merge.segments.Count - start);
4585  System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info));
4586  segmentInfos.Insert(start, merge.info);
4587 
4588  CloseMergeReaders(merge, false);
4589 
4590  // Must note the change to segmentInfos so any commits
4591  // in-flight don't lose it:
4592  Checkpoint();
4593 
4594  // If the merged segments had pending changes, clear
4595  // them so that they don't bother writing them to
4596  // disk, updating SegmentInfo, etc.:
4597  readerPool.Clear(merge.segments);
4598 
4599  if (merge.optimize)
4600  {
4601  // cascade the optimize:
4602  segmentsToOptimize.Add(merge.info);
4603  }
4604  return true;
4605  }
4606  }
4607 
4608  private void HandleMergeException(System.Exception t, MergePolicy.OneMerge merge)
4609  {
4610 
4611  if (infoStream != null)
4612  {
4613  Message("handleMergeException: merge=" + merge.SegString(directory) + " exc=" + t);
4614  }
4615 
4616  // Set the exception on the merge, so if
4617  // optimize() is waiting on us it sees the root
4618  // cause exception:
4619  merge.SetException(t);
4620  AddMergeException(merge);
4621 
4622  if (t is MergePolicy.MergeAbortedException)
4623  {
4624  // We can ignore this exception (it happens when
4625  // close(false) or rollback is called), unless the
4626  // merge involves segments from external directories,
4627  // in which case we must throw it so, for example, the
4628  // rollbackTransaction code in addIndexes* is
4629  // executed.
4630  if (merge.isExternal)
4631  throw t;
4632  }
4633  else if (t is System.IO.IOException || t is System.SystemException || t is System.ApplicationException)
4634  {
4635  throw t;
4636  }
4637  else
4638  {
4639  // Should not get here
4640  System.Diagnostics.Debug.Fail("Exception is not expected type!");
4641  throw new System.SystemException(null, t);
4642  }
4643  }
4644 
4645  public void Merge_ForNUnit(MergePolicy.OneMerge merge)
4646  {
4647  Merge(merge);
4648  }
4649  /// <summary> Merges the indicated segments, replacing them in the stack with a
4650  /// single segment.
4651  /// </summary>
4652  internal void Merge(MergePolicy.OneMerge merge)
4653  {
4654 
4655  bool success = false;
4656 
4657  try
4658  {
4659  try
4660  {
4661  try
4662  {
4663  MergeInit(merge);
4664 
4665  if (infoStream != null)
4666  {
4667  Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString());
4668  }
4669 
4670  MergeMiddle(merge);
4671  MergeSuccess(merge);
4672  success = true;
4673  }
4674  catch (System.Exception t)
4675  {
4676  HandleMergeException(t, merge);
4677  }
4678  }
4679  finally
4680  {
4681  lock (this)
4682  {
4683  MergeFinish(merge);
4684 
4685  if (!success)
4686  {
4687  if (infoStream != null)
4688  Message("hit exception during merge");
4689  if (merge.info != null && !segmentInfos.Contains(merge.info))
4690  deleter.Refresh(merge.info.name);
4691  }
4692 
4693  // This merge (and, generally, any change to the
4694  // segments) may now enable new merges, so we call
4695  // merge policy & update pending merges.
4696  if (success && !merge.IsAborted() && !closed && !closing)
4697  UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize);
4698  }
4699  }
4700  }
4701  catch (System.OutOfMemoryException oom)
4702  {
4703  HandleOOM(oom, "merge");
4704  }
4705  }
4706 
4707  /// <summary>Hook that's called when the specified merge is complete. </summary>
4708  internal virtual void MergeSuccess(MergePolicy.OneMerge merge)
4709  {
4710  }
4711 
4712  /// <summary>Checks whether this merge involves any segments
4713  /// already participating in a merge. If not, this merge
4714  /// is "registered", meaning we record that its segments
4715  /// are now participating in a merge, and true is
4716  /// returned. Else (the merge conflicts) false is
4717  /// returned.
4718  /// </summary>
4719  internal bool RegisterMerge(MergePolicy.OneMerge merge)
4720  {
4721  lock (this)
4722  {
4723 
4724  if (merge.registerDone)
4725  return true;
4726 
4727  if (stopMerges)
4728  {
4729  merge.Abort();
4730  throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.SegString(directory));
4731  }
4732 
4733  int count = merge.segments.Count;
4734  bool isExternal = false;
4735  for (int i = 0; i < count; i++)
4736  {
4737  SegmentInfo info = merge.segments.Info(i);
4738  if (mergingSegments.Contains(info))
4739  {
4740  return false;
4741  }
4742  if (segmentInfos.IndexOf(info) == -1)
4743  {
4744  return false;
4745  }
4746  if (info.dir != directory)
4747  {
4748  isExternal = true;
4749  }
4750  if (segmentsToOptimize.Contains(info))
4751  {
4752  merge.optimize = true;
4753  merge.maxNumSegmentsOptimize = optimizeMaxNumSegments;
4754  }
4755  }
4756 
4757  EnsureContiguousMerge(merge);
4758 
4759  pendingMerges.AddLast(merge);
4760 
4761  if (infoStream != null)
4762  Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]");
4763 
4764  merge.mergeGen = mergeGen;
4765  merge.isExternal = isExternal;
4766 
4767  // OK it does not conflict; now record that this merge
4768  // is running (while synchronized) to avoid race
4769  // condition where two conflicting merges from different
4770  // threads, start
4771  for (int i = 0; i < count; i++)
4772  {
4773  SegmentInfo si = merge.segments.Info(i);
4774  mergingSegments.Add(si);
4775  }
4776 
4777  // Merge is now registered
4778  merge.registerDone = true;
4779  return true;
4780  }
4781  }
4782 
4783  /// <summary>Does initial setup for a merge, which is fast but holds
4784  /// the synchronized lock on IndexWriter instance.
4785  /// </summary>
4786  internal void MergeInit(MergePolicy.OneMerge merge)
4787  {
4788  lock (this)
4789  {
4790  bool success = false;
4791  try
4792  {
4793  _MergeInit(merge);
4794  success = true;
4795  }
4796  finally
4797  {
4798  if (!success)
4799  {
4800  MergeFinish(merge);
4801  }
4802  }
4803  }
4804  }
4805 
4806  private void _MergeInit(MergePolicy.OneMerge merge)
4807  {
4808  lock (this)
4809  {
4810 
4811  System.Diagnostics.Debug.Assert(TestPoint("startMergeInit"));
4812 
4813  System.Diagnostics.Debug.Assert(merge.registerDone);
4814  System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0);
4815 
4816  if (hitOOM)
4817  {
4818  throw new System.SystemException("this writer hit an OutOfMemoryError; cannot merge");
4819  }
4820 
4821  if (merge.info != null)
4822  // mergeInit already done
4823  return ;
4824 
4825  if (merge.IsAborted())
4826  return ;
4827 
4828  ApplyDeletes();
4829 
4830  SegmentInfos sourceSegments = merge.segments;
4831  int end = sourceSegments.Count;
4832 
4833  // Check whether this merge will allow us to skip
4834  // merging the doc stores (stored field & vectors).
4835  // This is a very substantial optimization (saves tons
4836  // of IO).
4837 
4838  Directory lastDir = directory;
4839  System.String lastDocStoreSegment = null;
4840  int next = - 1;
4841 
4842  bool mergeDocStores = false;
4843  bool doFlushDocStore = false;
4844  System.String currentDocStoreSegment = docWriter.DocStoreSegment;
4845 
4846  // Test each segment to be merged: check if we need to
4847  // flush/merge doc stores
4848  for (int i = 0; i < end; i++)
4849  {
4850  SegmentInfo si = sourceSegments.Info(i);
4851 
4852  // If it has deletions we must merge the doc stores
4853  if (si.HasDeletions())
4854  mergeDocStores = true;
4855 
4856  // If it has its own (private) doc stores we must
4857  // merge the doc stores
4858  if (- 1 == si.DocStoreOffset)
4859  mergeDocStores = true;
4860 
4861  // If it has a different doc store segment than
4862  // previous segments, we must merge the doc stores
4863  System.String docStoreSegment = si.DocStoreSegment;
4864  if (docStoreSegment == null)
4865  mergeDocStores = true;
4866  else if (lastDocStoreSegment == null)
4867  lastDocStoreSegment = docStoreSegment;
4868  else if (!lastDocStoreSegment.Equals(docStoreSegment))
4869  mergeDocStores = true;
4870 
4871  // Segments' docScoreOffsets must be in-order,
4872  // contiguous. For the default merge policy now
4873  // this will always be the case but for an arbitrary
4874  // merge policy this may not be the case
4875  if (- 1 == next)
4876  next = si.DocStoreOffset + si.docCount;
4877  else if (next != si.DocStoreOffset)
4878  mergeDocStores = true;
4879  else
4880  next = si.DocStoreOffset + si.docCount;
4881 
4882  // If the segment comes from a different directory
4883  // we must merge
4884  if (lastDir != si.dir)
4885  mergeDocStores = true;
4886 
4887  // If the segment is referencing the current "live"
4888  // doc store outputs then we must merge
4889  if (si.DocStoreOffset != - 1 && currentDocStoreSegment != null && si.DocStoreSegment.Equals(currentDocStoreSegment))
4890  {
4891  doFlushDocStore = true;
4892  }
4893  }
4894 
4895  // if a mergedSegmentWarmer is installed, we must merge
4896  // the doc stores because we will open a full
4897  // SegmentReader on the merged segment:
4898  if (!mergeDocStores && mergedSegmentWarmer != null && currentDocStoreSegment != null && lastDocStoreSegment != null && lastDocStoreSegment.Equals(currentDocStoreSegment))
4899  {
4900  mergeDocStores = true;
4901  }
4902 
4903  int docStoreOffset;
4904  System.String docStoreSegment2;
4905  bool docStoreIsCompoundFile;
4906 
4907  if (mergeDocStores)
4908  {
4909  docStoreOffset = - 1;
4910  docStoreSegment2 = null;
4911  docStoreIsCompoundFile = false;
4912  }
4913  else
4914  {
4915  SegmentInfo si = sourceSegments.Info(0);
4916  docStoreOffset = si.DocStoreOffset;
4917  docStoreSegment2 = si.DocStoreSegment;
4918  docStoreIsCompoundFile = si.DocStoreIsCompoundFile;
4919  }
4920 
4921  if (mergeDocStores && doFlushDocStore)
4922  {
4923  // SegmentMerger intends to merge the doc stores
4924  // (stored fields, vectors), and at least one of the
4925  // segments to be merged refers to the currently
4926  // live doc stores.
4927 
4928  // TODO: if we know we are about to merge away these
4929  // newly flushed doc store files then we should not
4930  // make compound file out of them...
4931  if (infoStream != null)
4932  Message("now flush at merge");
4933  DoFlush(true, false);
4934  }
4935 
4936  merge.mergeDocStores = mergeDocStores;
4937 
4938  // Bind a new segment name here so even with
4939  // ConcurrentMergePolicy we keep deterministic segment
4940  // names.
4941  merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile, false);
4942 
4943 
4944  IDictionary<string, string> details = new Dictionary<string, string>();
4945  details["optimize"] = merge.optimize + "";
4946  details["mergeFactor"] = end + "";
4947  details["mergeDocStores"] = mergeDocStores + "";
4948  SetDiagnostics(merge.info, "merge", details);
4949 
4950  // Also enroll the merged segment into mergingSegments;
4951  // this prevents it from getting selected for a merge
4952  // after our merge is done but while we are building the
4953  // CFS:
4954  mergingSegments.Add(merge.info);
4955  }
4956  }
4957 
4958  private void SetDiagnostics(SegmentInfo info, System.String source)
4959  {
4960  SetDiagnostics(info, source, null);
4961  }
4962 
4963  private void SetDiagnostics(SegmentInfo info, System.String source, IDictionary<string, string> details)
4964  {
4965  IDictionary<string, string> diagnostics = new Dictionary<string,string>();
4966  diagnostics["source"] = source;
4967  diagnostics["lucene.version"] = Constants.LUCENE_VERSION;
4968  diagnostics["os"] = Constants.OS_NAME + "";
4969  diagnostics["os.arch"] = Constants.OS_ARCH + "";
4970  diagnostics["os.version"] = Constants.OS_VERSION + "";
4971  diagnostics["java.version"] = Constants.JAVA_VERSION + "";
4972  diagnostics["java.vendor"] = Constants.JAVA_VENDOR + "";
4973  if (details != null)
4974  {
4975  //System.Collections.ArrayList keys = new System.Collections.ArrayList(details.Keys);
4976  //System.Collections.ArrayList values = new System.Collections.ArrayList(details.Values);
4977  foreach (string key in details.Keys)
4978  {
4979  diagnostics[key] = details[key];
4980  }
4981  }
4982  info.Diagnostics = diagnostics;
4983  }
4984 
4985  /// <summary>Does fininishing for a merge, which is fast but holds
4986  /// the synchronized lock on IndexWriter instance.
4987  /// </summary>
4988  internal void MergeFinish(MergePolicy.OneMerge merge)
4989  {
4990  lock (this)
4991  {
4992 
4993  // Optimize, addIndexes or finishMerges may be waiting
4994  // on merges to finish.
4995  System.Threading.Monitor.PulseAll(this);
4996 
4997  // It's possible we are called twice, eg if there was an
4998  // exception inside mergeInit
4999  if (merge.registerDone)
5000  {
5001  SegmentInfos sourceSegments = merge.segments;
5002  int end = sourceSegments.Count;
5003  for (int i = 0; i < end; i++)
5004  mergingSegments.Remove(sourceSegments.Info(i));
5005  if(merge.info != null)
5006  mergingSegments.Remove(merge.info);
5007  merge.registerDone = false;
5008  }
5009 
5010  runningMerges.Remove(merge);
5011  }
5012  }
5013 
5014  private void SetMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge)
5015  {
5016  lock (this)
5017  {
5018  string mergeDocStoreSegment = merge.info.DocStoreSegment;
5019  if (mergeDocStoreSegment != null && !merge.info.DocStoreIsCompoundFile)
5020  {
5021  int size = segmentInfos.Count;
5022  for (int i = 0; i < size; i++)
5023  {
5024  SegmentInfo info = segmentInfos.Info(i);
5025  string docStoreSegment = info.DocStoreSegment;
5026  if (docStoreSegment != null &&
5027  docStoreSegment.Equals(mergeDocStoreSegment) &&
5028  info.DocStoreIsCompoundFile)
5029  {
5030  merge.info.DocStoreIsCompoundFile = true;
5031  break;
5032  }
5033  }
5034  }
5035  }
5036  }
5037 
5038  private void CloseMergeReaders(MergePolicy.OneMerge merge, bool suppressExceptions)
5039  {
5040  lock (this)
5041  {
5042  int numSegments = merge.segments.Count;
5043  if (suppressExceptions)
5044  {
5045  // Suppress any new exceptions so we throw the
5046  // original cause
5047  for (int i = 0; i < numSegments; i++)
5048  {
5049  if (merge.readers[i] != null)
5050  {
5051  try
5052  {
5053  readerPool.Release(merge.readers[i], false);
5054  }
5055  catch (Exception)
5056  {
5057  }
5058  merge.readers[i] = null;
5059  }
5060 
5061  if (merge.readersClone[i] != null)
5062  {
5063  try
5064  {
5065  merge.readersClone[i].Close();
5066  }
5067  catch (Exception)
5068  {
5069  }
5070  // This was a private clone and we had the
5071  // only reference
5072  System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0); //: "refCount should be 0 but is " + merge.readersClone[i].getRefCount();
5073  merge.readersClone[i] = null;
5074  }
5075  }
5076  }
5077  else
5078  {
5079  for (int i = 0; i < numSegments; i++)
5080  {
5081  if (merge.readers[i] != null)
5082  {
5083  readerPool.Release(merge.readers[i], true);
5084  merge.readers[i] = null;
5085  }
5086 
5087  if (merge.readersClone[i] != null)
5088  {
5089  merge.readersClone[i].Close();
5090  // This was a private clone and we had the only reference
5091  System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0);
5092  merge.readersClone[i] = null;
5093  }
5094  }
5095  }
5096  }
5097  }
5098 
5099 
5100  /// <summary>Does the actual (time-consuming) work of the merge,
5101  /// but without holding synchronized lock on IndexWriter
5102  /// instance
5103  /// </summary>
5104  private int MergeMiddle(MergePolicy.OneMerge merge)
5105  {
5106 
5107  merge.CheckAborted(directory);
5108 
5109  System.String mergedName = merge.info.name;
5110 
5111  SegmentMerger merger = null;
5112 
5113  int mergedDocCount = 0;
5114 
5115  SegmentInfos sourceSegments = merge.segments;
5116  int numSegments = sourceSegments.Count;
5117 
5118  if (infoStream != null)
5119  Message("merging " + merge.SegString(directory));
5120 
5121  merger = new SegmentMerger(this, mergedName, merge);
5122 
5123  merge.readers = new SegmentReader[numSegments];
5124  merge.readersClone = new SegmentReader[numSegments];
5125 
5126  bool mergeDocStores = false;
5127 
5128  String currentDocStoreSegment;
5129  lock(this) {
5130  currentDocStoreSegment = docWriter.DocStoreSegment;
5131  }
5132  bool currentDSSMerged = false;
5133 
5134  // This is try/finally to make sure merger's readers are
5135  // closed:
5136  bool success = false;
5137  try
5138  {
5139  int totDocCount = 0;
5140 
5141  for (int i = 0; i < numSegments; i++)
5142  {
5143 
5144  SegmentInfo info = sourceSegments.Info(i);
5145 
5146  // Hold onto the "live" reader; we will use this to
5147  // commit merged deletes
5148  SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1);
5149 
5150  // We clone the segment readers because other
5151  // deletes may come in while we're merging so we
5152  // need readers that will not change
5153  SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true);
5154  merger.Add(clone);
5155 
5156  if (clone.HasDeletions)
5157  {
5158  mergeDocStores = true;
5159  }
5160 
5161  if (info.DocStoreOffset != -1 && currentDocStoreSegment != null)
5162  {
5163  currentDSSMerged |= currentDocStoreSegment.Equals(info.DocStoreSegment);
5164  }
5165 
5166  totDocCount += clone.NumDocs();
5167  }
5168 
5169  if (infoStream != null)
5170  {
5171  Message("merge: total " + totDocCount + " docs");
5172  }
5173 
5174  merge.CheckAborted(directory);
5175 
5176  // If deletions have arrived and it has now become
5177  // necessary to merge doc stores, go and open them:
5178  if (mergeDocStores && !merge.mergeDocStores)
5179  {
5180  merge.mergeDocStores = true;
5181  lock (this)
5182  {
5183  if (currentDSSMerged)
5184  {
5185  if (infoStream != null)
5186  {
5187  Message("now flush at mergeMiddle");
5188  }
5189  DoFlush(true, false);
5190  }
5191  }
5192 
5193  for (int i = 0; i < numSegments; i++)
5194  {
5195  merge.readersClone[i].OpenDocStores();
5196  }
5197 
5198  // Clear DSS
5199  merge.info.SetDocStore(-1, null, false);
5200 
5201  }
5202 
5203  // This is where all the work happens:
5204  mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores);
5205 
5206  System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount);
5207 
5208  if (merge.useCompoundFile)
5209  {
5210 
5211  success = false;
5212  string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION);
5213 
5214  try
5215  {
5216  if (infoStream != null)
5217  {
5218  Message("create compound file " + compoundFileName);
5219  }
5220  merger.CreateCompoundFile(compoundFileName);
5221  success = true;
5222  }
5223  catch (System.IO.IOException ioe)
5224  {
5225  lock (this)
5226  {
5227  if (merge.IsAborted())
5228  {
5229  // This can happen if rollback or close(false)
5230  // is called -- fall through to logic below to
5231  // remove the partially created CFS:
5232  }
5233  else
5234  {
5235  HandleMergeException(ioe, merge);
5236  }
5237  }
5238  }
5239  catch (Exception t)
5240  {
5241  HandleMergeException(t, merge);
5242  }
5243  finally
5244  {
5245  if (!success)
5246  {
5247  if (infoStream != null)
5248  {
5249  Message("hit exception creating compound file during merge");
5250  }
5251 
5252  lock (this)
5253  {
5254  deleter.DeleteFile(compoundFileName);
5255  deleter.DeleteNewFiles(merger.GetMergedFiles());
5256  }
5257  }
5258  }
5259 
5260  success = false;
5261 
5262  lock (this)
5263  {
5264 
5265  // delete new non cfs files directly: they were never
5266  // registered with IFD
5267  deleter.DeleteNewFiles(merger.GetMergedFiles());
5268 
5269  if (merge.IsAborted())
5270  {
5271  if (infoStream != null)
5272  {
5273  Message("abort merge after building CFS");
5274  }
5275  deleter.DeleteFile(compoundFileName);
5276  return 0;
5277  }
5278  }
5279 
5280  merge.info.SetUseCompoundFile(true);
5281  }
5282 
5283  int termsIndexDivisor;
5284  bool loadDocStores;
5285 
5286  // if the merged segment warmer was not installed when
5287  // this merge was started, causing us to not force
5288  // the docStores to close, we can't warm it now
5289  bool canWarm = merge.info.DocStoreSegment == null || currentDocStoreSegment == null || !merge.info.DocStoreSegment.Equals(currentDocStoreSegment);
5290 
5291  if (poolReaders && mergedSegmentWarmer != null && canWarm)
5292  {
5293  // Load terms index & doc stores so the segment
5294  // warmer can run searches, load documents/term
5295  // vectors
5296  termsIndexDivisor = readerTermsIndexDivisor;
5297  loadDocStores = true;
5298  }
5299  else
5300  {
5301  termsIndexDivisor = -1;
5302  loadDocStores = false;
5303  }
5304 
5305  // TODO: in the non-realtime case, we may want to only
5306  // keep deletes (it's costly to open entire reader
5307  // when we just need deletes)
5308 
5309  SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);
5310  try
5311  {
5312  if (poolReaders && mergedSegmentWarmer != null)
5313  {
5314  mergedSegmentWarmer.Warm(mergedReader);
5315  }
5316  if (!CommitMerge(merge, merger, mergedDocCount, mergedReader))
5317  {
5318  // commitMerge will return false if this merge was aborted
5319  return 0;
5320  }
5321  }
5322  finally
5323  {
5324  lock (this)
5325  {
5326  readerPool.Release(mergedReader);
5327  }
5328  }
5329 
5330  success = true;
5331  }
5332  finally
5333  {
5334  // Readers are already closed in commitMerge if we didn't hit
5335  // an exc:
5336  if (!success)
5337  {
5338  CloseMergeReaders(merge, true);
5339  }
5340  }
5341 
5342  return mergedDocCount;
5343  }
5344 
5345  internal virtual void AddMergeException(MergePolicy.OneMerge merge)
5346  {
5347  lock (this)
5348  {
5349  System.Diagnostics.Debug.Assert(merge.GetException() != null);
5350  if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen)
5351  mergeExceptions.Add(merge);
5352  }
5353  }
5354 
5355  // Apply buffered deletes to all segments.
5356  private bool ApplyDeletes()
5357  {
5358  lock (this)
5359  {
5360  System.Diagnostics.Debug.Assert(TestPoint("startApplyDeletes"));
5361  flushDeletesCount++;
5362 
5363  bool success = false;
5364  bool changed;
5365  try
5366  {
5367  changed = docWriter.ApplyDeletes(segmentInfos);
5368  success = true;
5369  }
5370  finally
5371  {
5372  if (!success && infoStream != null)
5373  {
5374  Message("hit exception flushing deletes");
5375  }
5376  }
5377 
5378  if (changed)
5379  Checkpoint();
5380  return changed;
5381  }
5382  }
5383 
5384  // For test purposes.
5385  internal int GetBufferedDeleteTermsSize()
5386  {
5387  lock (this)
5388  {
5389  return docWriter.GetBufferedDeleteTerms().Count;
5390  }
5391  }
5392 
5393  // For test purposes.
5394  internal int GetNumBufferedDeleteTerms()
5395  {
5396  lock (this)
5397  {
5398  return docWriter.GetNumBufferedDeleteTerms();
5399  }
5400  }
5401 
5402  // utility routines for tests
5403  public /*internal*/ virtual SegmentInfo NewestSegment()
5404  {
5405  return segmentInfos.Count > 0 ? segmentInfos.Info(segmentInfos.Count - 1) : null;
5406  }
5407 
5408  public virtual System.String SegString()
5409  {
5410  lock (this)
5411  {
5412  return SegString(segmentInfos);
5413  }
5414  }
5415 
5416  private System.String SegString(SegmentInfos infos)
5417  {
5418  lock (this)
5419  {
5420  System.Text.StringBuilder buffer = new System.Text.StringBuilder();
5421  int count = infos.Count;
5422  for (int i = 0; i < count; i++)
5423  {
5424  if (i > 0)
5425  {
5426  buffer.Append(' ');
5427  }
5428  SegmentInfo info = infos.Info(i);
5429  buffer.Append(info.SegString(directory));
5430  if (info.dir != directory)
5431  buffer.Append("**");
5432  }
5433  return buffer.ToString();
5434  }
5435  }
5436 
5437  // Files that have been sync'd already
5438  private HashSet<string> synced = new HashSet<string>();
5439 
5440  // Files that are now being sync'd
5441  private HashSet<string> syncing = new HashSet<string>();
5442 
5443  private bool StartSync(System.String fileName, ICollection<string> pending)
5444  {
5445  lock (synced)
5446  {
5447  if (!synced.Contains(fileName))
5448  {
5449  if (!syncing.Contains(fileName))
5450  {
5451  syncing.Add(fileName);
5452  return true;
5453  }
5454  else
5455  {
5456  pending.Add(fileName);
5457  return false;
5458  }
5459  }
5460  else
5461  return false;
5462  }
5463  }
5464 
5465  private void FinishSync(System.String fileName, bool success)
5466  {
5467  lock (synced)
5468  {
5469  System.Diagnostics.Debug.Assert(syncing.Contains(fileName));
5470  syncing.Remove(fileName);
5471  if (success)
5472  synced.Add(fileName);
5473  System.Threading.Monitor.PulseAll(synced);
5474  }
5475  }
5476 
5477  /// <summary>Blocks until all files in syncing are sync'd </summary>
5478  private bool WaitForAllSynced(ICollection<System.String> syncing)
5479  {
5480  lock (synced)
5481  {
5482  IEnumerator<string> it = syncing.GetEnumerator();
5483  while (it.MoveNext())
5484  {
5485  System.String fileName = it.Current;
5486  while (!synced.Contains(fileName))
5487  {
5488  if (!syncing.Contains(fileName))
5489  // There was an error because a file that was
5490  // previously syncing failed to appear in synced
5491  return false;
5492  else
5493  System.Threading.Monitor.Wait(synced);
5494 
5495  }
5496  }
5497  return true;
5498  }
5499  }
5500 
5501  private void DoWait()
5502  {
5503  lock (this)
5504  {
5505  // NOTE: the callers of this method should in theory
5506  // be able to do simply wait(), but, as a defense
5507  // against thread timing hazards where notifyAll()
5508  // falls to be called, we wait for at most 1 second
5509  // and then return so caller can check if wait
5510  // conditions are satisified:
5511  System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(1000));
5512 
5513  }
5514  }
5515 
5516  /// <summary>Walk through all files referenced by the current
5517  /// segmentInfos and ask the Directory to sync each file,
5518  /// if it wasn't already. If that succeeds, then we
5519  /// prepare a new segments_N file but do not fully commit
5520  /// it.
5521  /// </summary>
5522  private void StartCommit(long sizeInBytes, IDictionary<string, string> commitUserData)
5523  {
5524 
5525  System.Diagnostics.Debug.Assert(TestPoint("startStartCommit"));
5526 
5527  // TODO: as of LUCENE-2095, we can simplify this method,
5528  // since only 1 thread can be in here at once
5529 
5530  if (hitOOM)
5531  {
5532  throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit");
5533  }
5534 
5535  try
5536  {
5537 
5538  if (infoStream != null)
5539  Message("startCommit(): start sizeInBytes=" + sizeInBytes);
5540 
5541  SegmentInfos toSync = null;
5542  long myChangeCount;
5543 
5544  lock (this)
5545  {
5546  // Wait for any running addIndexes to complete
5547  // first, then block any from running until we've
5548  // copied the segmentInfos we intend to sync:
5549  BlockAddIndexes(false);
5550 
5551  // On commit the segmentInfos must never
5552  // reference a segment in another directory:
5553  System.Diagnostics.Debug.Assert(!HasExternalSegments());
5554 
5555  try
5556  {
5557 
5558  System.Diagnostics.Debug.Assert(lastCommitChangeCount <= changeCount);
5559  myChangeCount = changeCount;
5560 
5561  if (changeCount == lastCommitChangeCount)
5562  {
5563  if (infoStream != null)
5564  Message(" skip startCommit(): no changes pending");
5565  return ;
5566  }
5567 
5568  // First, we clone & incref the segmentInfos we intend
5569  // to sync, then, without locking, we sync() each file
5570  // referenced by toSync, in the background. Multiple
5571  // threads can be doing this at once, if say a large
5572  // merge and a small merge finish at the same time:
5573 
5574  if (infoStream != null)
5575  Message("startCommit index=" + SegString(segmentInfos) + " changeCount=" + changeCount);
5576 
5577  readerPool.Commit();
5578 
5579  // It's possible another flush (that did not close
5580  // the open do stores) snuck in after the flush we
5581  // just did, so we remove any tail segments
5582  // referencing the open doc store from the
5583  // SegmentInfos we are about to sync (the main
5584  // SegmentInfos will keep them):
5585  toSync = (SegmentInfos) segmentInfos.Clone();
5586  string dss = docWriter.DocStoreSegment;
5587  if (dss != null)
5588  {
5589  while (true)
5590  {
5591  String dss2 = toSync.Info(toSync.Count - 1).DocStoreSegment;
5592  if (dss2 == null || !dss2.Equals(dss))
5593  {
5594  break;
5595  }
5596  toSync.RemoveAt(toSync.Count - 1);
5597  changeCount++;
5598  }
5599  }
5600 
5601  if (commitUserData != null)
5602  toSync.UserData = commitUserData;
5603 
5604  deleter.IncRef(toSync, false);
5605 
5606  ICollection<string> files = toSync.Files(directory, false);
5607  foreach(string fileName in files)
5608  {
5609  System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file " + fileName + " does not exist");
5610  // If this trips it means we are missing a call to
5611  // .checkpoint somewhere, because by the time we
5612  // are called, deleter should know about every
5613  // file referenced by the current head
5614  // segmentInfos:
5615  System.Diagnostics.Debug.Assert(deleter.Exists(fileName));
5616  }
5617  }
5618  finally
5619  {
5620  ResumeAddIndexes();
5621  }
5622  }
5623 
5624  System.Diagnostics.Debug.Assert(TestPoint("midStartCommit"));
5625 
5626  bool setPending = false;
5627 
5628  try
5629  {
5630  // Loop until all files toSync references are sync'd:
5631  while (true)
5632  {
5633  ICollection<string> pending = new List<string>();
5634 
5635  IEnumerator<string> it = toSync.Files(directory, false).GetEnumerator();
5636  while (it.MoveNext())
5637  {
5638  string fileName = it.Current;
5639  if (StartSync(fileName, pending))
5640  {
5641  bool success = false;
5642  try
5643  {
5644  // Because we incRef'd this commit point, above,
5645  // the file had better exist:
5646  System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file '" + fileName + "' does not exist dir=" + directory);
5647  if (infoStream != null)
5648  Message("now sync " + fileName);
5649  directory.Sync(fileName);
5650  success = true;
5651  }
5652  finally
5653  {
5654  FinishSync(fileName, success);
5655  }
5656  }
5657  }
5658 
5659  // All files that I require are either synced or being
5660  // synced by other threads. If they are being synced,
5661  // we must at this point block until they are done.
5662  // If this returns false, that means an error in
5663  // another thread resulted in failing to actually
5664  // sync one of our files, so we repeat:
5665  if (WaitForAllSynced(pending))
5666  break;
5667  }
5668 
5669  System.Diagnostics.Debug.Assert(TestPoint("midStartCommit2"));
5670 
5671  lock (this)
5672  {
5673  // If someone saved a newer version of segments file
5674  // since I first started syncing my version, I can
5675  // safely skip saving myself since I've been
5676  // superseded:
5677 
5678  while (true)
5679  {
5680  if (myChangeCount <= lastCommitChangeCount)
5681  {
5682  if (infoStream != null)
5683  {
5684  Message("sync superseded by newer infos");
5685  }
5686  break;
5687  }
5688  else if (pendingCommit == null)
5689  {
5690  // My turn to commit
5691 
5692  if (segmentInfos.Generation > toSync.Generation)
5693  toSync.UpdateGeneration(segmentInfos);
5694 
5695  bool success = false;
5696  try
5697  {
5698 
5699  // Exception here means nothing is prepared
5700  // (this method unwinds everything it did on
5701  // an exception)
5702  try
5703  {
5704  toSync.PrepareCommit(directory);
5705  }
5706  finally
5707  {
5708  // Have our master segmentInfos record the
5709  // generations we just prepared. We do this
5710  // on error or success so we don't
5711  // double-write a segments_N file.
5712  segmentInfos.UpdateGeneration(toSync);
5713  }
5714 
5715  System.Diagnostics.Debug.Assert(pendingCommit == null);
5716  setPending = true;
5717  pendingCommit = toSync;
5718  pendingCommitChangeCount = (uint) myChangeCount;
5719  success = true;
5720  }
5721  finally
5722  {
5723  if (!success && infoStream != null)
5724  Message("hit exception committing segments file");
5725  }
5726  break;
5727  }
5728  else
5729  {
5730  // Must wait for other commit to complete
5731  DoWait();
5732  }
5733  }
5734  }
5735 
5736  if (infoStream != null)
5737  Message("done all syncs");
5738 
5739  System.Diagnostics.Debug.Assert(TestPoint("midStartCommitSuccess"));
5740  }
5741  finally
5742  {
5743  lock (this)
5744  {
5745  if (!setPending)
5746  deleter.DecRef(toSync);
5747  }
5748  }
5749  }
5750  catch (System.OutOfMemoryException oom)
5751  {
5752  HandleOOM(oom, "startCommit");
5753  }
5754  System.Diagnostics.Debug.Assert(TestPoint("finishStartCommit"));
5755  }
5756 
5757  /// <summary> Returns <c>true</c> iff the index in the named directory is
5758  /// currently locked.
5759  /// </summary>
5760  /// <param name="directory">the directory to check for a lock
5761  /// </param>
5762  /// <throws> IOException if there is a low-level IO error </throws>
5763  public static bool IsLocked(Directory directory)
5764  {
5765  return directory.MakeLock(WRITE_LOCK_NAME).IsLocked();
5766  }
5767 
5768  /// <summary> Forcibly unlocks the index in the named directory.
5769  /// <p/>
5770  /// Caution: this should only be used by failure recovery code,
5771  /// when it is known that no other process nor thread is in fact
5772  /// currently accessing this index.
5773  /// </summary>
5774  public static void Unlock(Directory directory)
5775  {
5776  directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).Release();
5777  }
5778 
5779  /// <summary> Specifies maximum field length (in number of tokens/terms) in <see cref="IndexWriter" /> constructors.
5780  /// <see cref="SetMaxFieldLength(int)" /> overrides the value set by
5781  /// the constructor.
5782  /// </summary>
5783  public sealed class MaxFieldLength
5784  {
5785 
5786  private int limit;
5787  private System.String name;
5788 
5789  /// <summary> Private type-safe-enum-pattern constructor.
5790  ///
5791  /// </summary>
5792  /// <param name="name">instance name
5793  /// </param>
5794  /// <param name="limit">maximum field length
5795  /// </param>
5796  internal MaxFieldLength(System.String name, int limit)
5797  {
5798  this.name = name;
5799  this.limit = limit;
5800  }
5801 
5802  /// <summary> Public constructor to allow users to specify the maximum field size limit.
5803  ///
5804  /// </summary>
5805  /// <param name="limit">The maximum field length
5806  /// </param>
5807  public MaxFieldLength(int limit):this("User-specified", limit)
5808  {
5809  }
5810 
5811  public int Limit
5812  {
5813  get { return limit; }
5814  }
5815 
5816  public override System.String ToString()
5817  {
5818  return name + ":" + limit;
5819  }
5820 
5821  /// <summary>Sets the maximum field length to <see cref="int.MaxValue" />. </summary>
5822  public static readonly MaxFieldLength UNLIMITED = new MaxFieldLength("UNLIMITED", System.Int32.MaxValue);
5823 
5824  /// <summary> Sets the maximum field length to
5825  /// <see cref="DEFAULT_MAX_FIELD_LENGTH" />
5826  ///
5827  /// </summary>
5828  public static readonly MaxFieldLength LIMITED;
5829  static MaxFieldLength()
5830  {
5831  LIMITED = new MaxFieldLength("LIMITED", Lucene.Net.Index.IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
5832  }
5833  }
5834 
5835  /// <summary>If <see cref="GetReader()" /> has been called (ie, this writer
5836  /// is in near real-time mode), then after a merge
5837  /// completes, this class can be invoked to warm the
5838  /// reader on the newly merged segment, before the merge
5839  /// commits. This is not required for near real-time
5840  /// search, but will reduce search latency on opening a
5841  /// new near real-time reader after a merge completes.
5842  ///
5843  /// <p/><b>NOTE:</b> This API is experimental and might
5844  /// change in incompatible ways in the next release.<p/>
5845  ///
5846  /// <p/><b>NOTE</b>: warm is called before any deletes have
5847  /// been carried over to the merged segment.
5848  /// </summary>
5849  public abstract class IndexReaderWarmer
5850  {
5851  public abstract void Warm(IndexReader reader);
5852  }
5853 
5854  private IndexReaderWarmer mergedSegmentWarmer;
5855 
5856  /// <summary>Gets or sets the merged segment warmer. See <see cref="IndexReaderWarmer" />
5857  ///.
5858  /// </summary>
5859  public virtual IndexReaderWarmer MergedSegmentWarmer
5860  {
5861  set { mergedSegmentWarmer = value; }
5862  get { return mergedSegmentWarmer; }
5863  }
5864 
5865  private void HandleOOM(System.OutOfMemoryException oom, System.String location)
5866  {
5867  if (infoStream != null)
5868  {
5869  Message("hit OutOfMemoryError inside " + location);
5870  }
5871  hitOOM = true;
5872  throw oom;
5873  }
5874 
5875  // Used only by assert for testing. Current points:
5876  // startDoFlush
5877  // startCommitMerge
5878  // startStartCommit
5879  // midStartCommit
5880  // midStartCommit2
5881  // midStartCommitSuccess
5882  // finishStartCommit
5883  // startCommitMergeDeletes
5884  // startMergeInit
5885  // startApplyDeletes
5886  // DocumentsWriter.ThreadState.init start
5887  public /*internal*/ virtual bool TestPoint(System.String name)
5888  {
5889  return true;
5890  }
5891 
5892  internal virtual bool NrtIsCurrent(SegmentInfos infos)
5893  {
5894  lock (this)
5895  {
5896  if (!infos.Equals(segmentInfos))
5897  {
5898  // if any structural changes (new segments), we are
5899  // stale
5900  return false;
5901  }
5902  else if (infos.Generation != segmentInfos.Generation)
5903  {
5904  // if any commit took place since we were opened, we
5905  // are stale
5906  return false;
5907  }
5908  else
5909  {
5910  return !docWriter.AnyChanges;
5911  }
5912  }
5913  }
5914 
5915  internal virtual bool IsClosed()
5916  {
5917  lock (this)
5918  {
5919  return closed;
5920  }
5921  }
5922 
5923  static IndexWriter()
5924  {
5925  MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
5926  }
5927  }
5928 }