Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
IndexFileDeleter.cs
Go to the documentation of this file.
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 
18 using System;
19 using System.Collections.Generic;
20 using Lucene.Net.Support;
21 using Directory = Lucene.Net.Store.Directory;
22 
23 namespace Lucene.Net.Index
24 {
25 
26  /// <summary>
27  /// <para>This class keeps track of each SegmentInfos instance that
28  /// is still "live", either because it corresponds to a
29  /// segments_N file in the Directory (a "commit", i.e. a
30  /// committed SegmentInfos) or because it's an in-memory
31  /// SegmentInfos that a writer is actively updating but has
32  /// not yet committed. This class uses simple reference
33  /// counting to map the live SegmentInfos instances to
34  /// individual files in the Directory.</para>
35  ///
36  /// <para>The same directory file may be referenced by more than
37  /// one IndexCommit, i.e. more than one SegmentInfos.
38  /// Therefore we count how many commits reference each file.
39  /// When all the commits referencing a certain file have been
40  /// deleted, the refcount for that file becomes zero, and the
41  /// file is deleted.</para>
42  ///
43  /// <para>A separate deletion policy interface
44  /// (IndexDeletionPolicy) is consulted on creation (onInit)
45  /// and once per commit (onCommit), to decide when a commit
46  /// should be removed.</para>
47  ///
48  /// <para>It is the business of the IndexDeletionPolicy to choose
49  /// when to delete commit points. The actual mechanics of
50  /// file deletion, retrying, etc, derived from the deletion
51  /// of commit points is the business of the IndexFileDeleter.</para>
52  ///
53  /// <para>The current default deletion policy is
54  /// <see cref="KeepOnlyLastCommitDeletionPolicy"/>, which removes all
55  /// prior commits when a new commit has completed. This
56  /// matches the behavior before 2.2.</para>
57  ///
58  /// <para>Note that you must hold the write.lock before
59  /// instantiating this class. It opens segments_N file(s)
60  /// directly with no retry logic.</para>
61  /// </summary>
62 
63  public sealed class IndexFileDeleter : IDisposable
64  {
65 
66  //// Files that we tried to delete but failed (likely
67  /// because they are open and we are running on Windows),
68  /// so we will retry them again later: ////
69  private IList<string> deletable;
70 
71  //// Reference count for all files in the index.
72  /// Counts how many existing commits reference a file.
73  /// Maps String to RefCount (class below) instances: ////
74  private IDictionary<string, RefCount> refCounts = new HashMap<string, RefCount>();
75 
76  //// Holds all commits (segments_N) currently in the index.
77  /// This will have just 1 commit if you are using the
78  /// default delete policy (KeepOnlyLastCommitDeletionPolicy).
79  /// Other policies may leave commit points live for longer
80  /// in which case this list would be longer than 1: ////
81  private List<CommitPoint> commits = new List<CommitPoint>();
82 
83  //// Holds files we had incref'd from the previous
84  /// non-commit checkpoint: ////
85  private List<ICollection<string>> lastFiles = new List<ICollection<string>>();
86 
87  //// Commits that the IndexDeletionPolicy have decided to delete: ////
88  private List<CommitPoint> commitsToDelete = new List<CommitPoint>();
89 
90  private System.IO.StreamWriter infoStream;
91  private Directory directory;
92  private IndexDeletionPolicy policy;
93  private DocumentsWriter docWriter;
94 
95  internal bool startingCommitDeleted;
96  private SegmentInfos lastSegmentInfos;
97 
98  private HashSet<string> synced;
99 
100  /// <summary>Change to true to see details of reference counts when
101  /// infoStream != null
102  /// </summary>
103  public static bool VERBOSE_REF_COUNTS = false;
104 
105  internal void SetInfoStream(System.IO.StreamWriter infoStream)
106  {
107  this.infoStream = infoStream;
108  if (infoStream != null)
109  {
110  Message("setInfoStream deletionPolicy=" + policy);
111  }
112  }
113 
114  private void Message(System.String message)
115  {
116  infoStream.WriteLine("IFD [" + new DateTime().ToString() + "; " + ThreadClass.Current().Name + "]: " + message);
117  }
118 
119  /// <summary> Initialize the deleter: find all previous commits in
120  /// the Directory, incref the files they reference, call
121  /// the policy to let it delete commits. This will remove
122  /// any files not referenced by any of the commits.
123  /// </summary>
124  /// <throws> CorruptIndexException if the index is corrupt </throws>
125  /// <throws> IOException if there is a low-level IO error </throws>
126  public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, HashSet<string> synced)
127  {
128 
129  this.docWriter = docWriter;
130  this.infoStream = infoStream;
131  this.synced = synced;
132 
133  if (infoStream != null)
134  {
135  Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy);
136  }
137 
138  this.policy = policy;
139  this.directory = directory;
140 
141  // First pass: walk the files and initialize our ref
142  // counts:
143  long currentGen = segmentInfos.Generation;
145 
146  System.String[] files = directory.ListAll();
147 
148  CommitPoint currentCommitPoint = null;
149 
150  for (int i = 0; i < files.Length; i++)
151  {
152 
153  System.String fileName = files[i];
154 
155  if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
156  {
157 
158  // Add this file to refCounts with initial count 0:
159  GetRefCount(fileName);
160 
161  if (fileName.StartsWith(IndexFileNames.SEGMENTS))
162  {
163 
164  // This is a commit (segments or segments_N), and
165  // it's valid (<= the max gen). Load it, then
166  // incref all files it refers to:
167  if (infoStream != null)
168  {
169  Message("init: load commit \"" + fileName + "\"");
170  }
171  SegmentInfos sis = new SegmentInfos();
172  try
173  {
174  sis.Read(directory, fileName);
175  }
176  catch (System.IO.FileNotFoundException)
177  {
178  // LUCENE-948: on NFS (and maybe others), if
179  // you have writers switching back and forth
180  // between machines, it's very likely that the
181  // dir listing will be stale and will claim a
182  // file segments_X exists when in fact it
183  // doesn't. So, we catch this and handle it
184  // as if the file does not exist
185  if (infoStream != null)
186  {
187  Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
188  }
189  sis = null;
190  }
191  catch (System.IO.IOException)
192  {
193  if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen)
194  {
195  throw;
196  }
197  else
198  {
199  // Most likely we are opening an index that
200  // has an aborted "future" commit, so suppress
201  // exc in this case
202  sis = null;
203  }
204  }
205  if (sis != null)
206  {
207  CommitPoint commitPoint = new CommitPoint(this, commitsToDelete, directory, sis);
208  if (sis.Generation == segmentInfos.Generation)
209  {
210  currentCommitPoint = commitPoint;
211  }
212  commits.Add(commitPoint);
213  IncRef(sis, true);
214 
215  if (lastSegmentInfos == null || sis.Generation > lastSegmentInfos.Generation)
216  {
217  lastSegmentInfos = sis;
218  }
219  }
220  }
221  }
222  }
223 
224  if (currentCommitPoint == null)
225  {
226  // We did not in fact see the segments_N file
227  // corresponding to the segmentInfos that was passed
228  // in. Yet, it must exist, because our caller holds
229  // the write lock. This can happen when the directory
230  // listing was stale (eg when index accessed via NFS
231  // client with stale directory listing cache). So we
232  // try now to explicitly open this commit point:
233  SegmentInfos sis = new SegmentInfos();
234  try
235  {
236  sis.Read(directory, segmentInfos.GetCurrentSegmentFileName());
237  }
238  catch (System.IO.IOException)
239  {
240  throw new CorruptIndexException("failed to locate current segments_N file");
241  }
242  if (infoStream != null)
243  Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName());
244  currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis);
245  commits.Add(currentCommitPoint);
246  IncRef(sis, true);
247  }
248 
249  // We keep commits list in sorted order (oldest to newest):
250  commits.Sort();
251 
252  // Now delete anything with ref count at 0. These are
253  // presumably abandoned files eg due to crash of
254  // IndexWriter.
255  foreach(KeyValuePair<string, RefCount> entry in refCounts)
256  {
257  string fileName = entry.Key;
258  RefCount rc = refCounts[fileName];
259  if (0 == rc.count)
260  {
261  if (infoStream != null)
262  {
263  Message("init: removing unreferenced file \"" + fileName + "\"");
264  }
265  DeleteFile(fileName);
266  }
267  }
268 
269  // Finally, give policy a chance to remove things on
270  // startup:
271  policy.OnInit(commits);
272 
273  // Always protect the incoming segmentInfos since
274  // sometime it may not be the most recent commit
275  Checkpoint(segmentInfos, false);
276 
277  startingCommitDeleted = currentCommitPoint.IsDeleted;
278 
279  DeleteCommits();
280  }
281 
282  public SegmentInfos LastSegmentInfos
283  {
284  get { return lastSegmentInfos; }
285  }
286 
287  /// <summary> Remove the CommitPoints in the commitsToDelete List by
288  /// DecRef'ing all files from each SegmentInfos.
289  /// </summary>
290  private void DeleteCommits()
291  {
292 
293  int size = commitsToDelete.Count;
294 
295  if (size > 0)
296  {
297 
298  // First decref all files that had been referred to by
299  // the now-deleted commits:
300  for (int i = 0; i < size; i++)
301  {
302  CommitPoint commit = commitsToDelete[i];
303  if (infoStream != null)
304  {
305  Message("deleteCommits: now decRef commit \"" + commit.SegmentsFileName + "\"");
306  }
307  foreach(string file in commit.files)
308  {
309  DecRef(file);
310  }
311  }
312  commitsToDelete.Clear();
313 
314  // Now compact commits to remove deleted ones (preserving the sort):
315  size = commits.Count;
316  int readFrom = 0;
317  int writeTo = 0;
318  while (readFrom < size)
319  {
320  CommitPoint commit = commits[readFrom];
321  if (!commit.deleted)
322  {
323  if (writeTo != readFrom)
324  {
325  commits[writeTo] = commits[readFrom];
326  }
327  writeTo++;
328  }
329  readFrom++;
330  }
331 
332  while (size > writeTo)
333  {
334  commits.RemoveAt(size - 1);
335  size--;
336  }
337  }
338  }
339 
340  /// <summary> Writer calls this when it has hit an error and had to
341  /// roll back, to tell us that there may now be
342  /// unreferenced files in the filesystem. So we re-list
343  /// the filesystem and delete such files. If segmentName
344  /// is non-null, we will only delete files corresponding to
345  /// that segment.
346  /// </summary>
347  public void Refresh(System.String segmentName)
348  {
349  System.String[] files = directory.ListAll();
351  System.String segmentPrefix1;
352  System.String segmentPrefix2;
353  if (segmentName != null)
354  {
355  segmentPrefix1 = segmentName + ".";
356  segmentPrefix2 = segmentName + "_";
357  }
358  else
359  {
360  segmentPrefix1 = null;
361  segmentPrefix2 = null;
362  }
363 
364  for (int i = 0; i < files.Length; i++)
365  {
366  System.String fileName = files[i];
367  if (filter.Accept(null, fileName) && (segmentName == null || fileName.StartsWith(segmentPrefix1) || fileName.StartsWith(segmentPrefix2)) && !refCounts.ContainsKey(fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
368  {
369  // Unreferenced file, so remove it
370  if (infoStream != null)
371  {
372  Message("refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\"");
373  }
374  DeleteFile(fileName);
375  }
376  }
377  }
378 
379  public void Refresh()
380  {
381  Refresh(null);
382  }
383 
384  public void Dispose()
385  {
386  // Move to protected method if class becomes unsealed
387  // DecRef old files from the last checkpoint, if any:
388  int size = lastFiles.Count;
389  if (size > 0)
390  {
391  for (int i = 0; i < size; i++)
392  DecRef(lastFiles[i]);
393  lastFiles.Clear();
394  }
395 
396  DeletePendingFiles();
397  }
398 
399  private void DeletePendingFiles()
400  {
401  if (deletable != null)
402  {
403  IList<string> oldDeletable = deletable;
404  deletable = null;
405  int size = oldDeletable.Count;
406  for (int i = 0; i < size; i++)
407  {
408  if (infoStream != null)
409  {
410  Message("delete pending file " + oldDeletable[i]);
411  }
412  DeleteFile(oldDeletable[i]);
413  }
414  }
415  }
416 
417  /// <summary> For definition of "check point" see IndexWriter comments:
418  /// "Clarification: Check Points (and commits)".
419  ///
420  /// Writer calls this when it has made a "consistent
421  /// change" to the index, meaning new files are written to
422  /// the index and the in-memory SegmentInfos have been
423  /// modified to point to those files.
424  ///
425  /// This may or may not be a commit (segments_N may or may
426  /// not have been written).
427  ///
428  /// We simply incref the files referenced by the new
429  /// SegmentInfos and decref the files we had previously
430  /// seen (if any).
431  ///
432  /// If this is a commit, we also call the policy to give it
433  /// a chance to remove other commits. If any commits are
434  /// removed, we decref their files as well.
435  /// </summary>
436  public void Checkpoint(SegmentInfos segmentInfos, bool isCommit)
437  {
438 
439  if (infoStream != null)
440  {
441  Message("now checkpoint \"" + segmentInfos.GetCurrentSegmentFileName() + "\" [" + segmentInfos.Count + " segments " + "; isCommit = " + isCommit + "]");
442  }
443 
444  // Try again now to delete any previously un-deletable
445  // files (because they were in use, on Windows):
446  DeletePendingFiles();
447 
448  // Incref the files:
449  IncRef(segmentInfos, isCommit);
450 
451  if (isCommit)
452  {
453  // Append to our commits list:
454  commits.Add(new CommitPoint(this, commitsToDelete, directory, segmentInfos));
455 
456  // Tell policy so it can remove commits:
457  policy.OnCommit(commits);
458 
459  // Decref files for commits that were deleted by the policy:
460  DeleteCommits();
461  }
462  else
463  {
464 
465  IList<string> docWriterFiles;
466  if (docWriter != null)
467  {
468  docWriterFiles = docWriter.OpenFiles();
469  if (docWriterFiles != null)
470  // We must incRef these files before decRef'ing
471  // last files to make sure we don't accidentally
472  // delete them:
473  IncRef(docWriterFiles);
474  }
475  else
476  docWriterFiles = null;
477 
478  // DecRef old files from the last checkpoint, if any:
479  int size = lastFiles.Count;
480  if (size > 0)
481  {
482  for (int i = 0; i < size; i++)
483  DecRef(lastFiles[i]);
484  lastFiles.Clear();
485  }
486 
487  // Save files so we can decr on next checkpoint/commit:
488  lastFiles.Add(segmentInfos.Files(directory, false));
489 
490  if (docWriterFiles != null)
491  {
492  lastFiles.Add(docWriterFiles);
493  }
494  }
495  }
496 
497  internal void IncRef(SegmentInfos segmentInfos, bool isCommit)
498  {
499  // If this is a commit point, also incRef the
500  // segments_N file:
501  foreach(string fileName in segmentInfos.Files(directory, isCommit))
502  {
503  IncRef(fileName);
504  }
505  }
506 
507  internal void IncRef(ICollection<string> files)
508  {
509  foreach(string file in files)
510  {
511  IncRef(file);
512  }
513  }
514 
515  internal void IncRef(string fileName)
516  {
517  RefCount rc = GetRefCount(fileName);
518  if (infoStream != null && VERBOSE_REF_COUNTS)
519  {
520  Message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
521  }
522  rc.IncRef();
523  }
524 
525  internal void DecRef(ICollection<string> files)
526  {
527  foreach(string file in files)
528  {
529  DecRef(file);
530  }
531  }
532 
533  internal void DecRef(System.String fileName)
534  {
535  RefCount rc = GetRefCount(fileName);
536  if (infoStream != null && VERBOSE_REF_COUNTS)
537  {
538  Message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
539  }
540  if (0 == rc.DecRef())
541  {
542  // This file is no longer referenced by any past
543  // commit points nor by the in-memory SegmentInfos:
544  DeleteFile(fileName);
545  refCounts.Remove(fileName);
546 
547  if (synced != null) {
548  lock(synced)
549  {
550  synced.Remove(fileName);
551  }
552  }
553  }
554  }
555 
556  internal void DecRef(SegmentInfos segmentInfos)
557  {
558  foreach(string file in segmentInfos.Files(directory, false))
559  {
560  DecRef(file);
561  }
562  }
563 
564  public bool Exists(String fileName)
565  {
566  if (!refCounts.ContainsKey(fileName))
567  {
568  return false;
569  }
570  else
571  {
572  return GetRefCount(fileName).count > 0;
573  }
574  }
575 
576  private RefCount GetRefCount(System.String fileName)
577  {
578  RefCount rc;
579  if (!refCounts.ContainsKey(fileName))
580  {
581  rc = new RefCount(fileName);
582  refCounts[fileName] = rc;
583  }
584  else
585  {
586  rc = refCounts[fileName];
587  }
588  return rc;
589  }
590 
591  internal void DeleteFiles(System.Collections.Generic.IList<string> files)
592  {
593  foreach(string file in files)
594  DeleteFile(file);
595  }
596 
597  /// <summary>Deletes the specified files, but only if they are new
598  /// (have not yet been incref'd).
599  /// </summary>
600  internal void DeleteNewFiles(System.Collections.Generic.ICollection<string> files)
601  {
602  foreach(string fileName in files)
603  {
604  if (!refCounts.ContainsKey(fileName))
605  {
606  if (infoStream != null)
607  {
608  Message("delete new file \"" + fileName + "\"");
609  }
610  DeleteFile(fileName);
611  }
612  }
613  }
614 
615  internal void DeleteFile(System.String fileName)
616  {
617  try
618  {
619  if (infoStream != null)
620  {
621  Message("delete \"" + fileName + "\"");
622  }
623  directory.DeleteFile(fileName);
624  }
625  catch (System.IO.IOException e)
626  {
627  // if delete fails
628  if (directory.FileExists(fileName))
629  {
630 
631  // Some operating systems (e.g. Windows) don't
632  // permit a file to be deleted while it is opened
633  // for read (e.g. by another process or thread). So
634  // we assume that when a delete fails it is because
635  // the file is open in another process, and queue
636  // the file for subsequent deletion.
637 
638  if (infoStream != null)
639  {
640  Message("IndexFileDeleter: unable to remove file \"" + fileName + "\": " + e.ToString() + "; Will re-try later.");
641  }
642  if (deletable == null)
643  {
644  deletable = new List<string>();
645  }
646  deletable.Add(fileName); // add to deletable
647  }
648  }
649  }
650 
651  /// <summary> Tracks the reference count for a single index file:</summary>
652  sealed private class RefCount
653  {
654 
655  // fileName used only for better assert error messages
656  internal System.String fileName;
657  internal bool initDone;
658  internal RefCount(System.String fileName)
659  {
660  this.fileName = fileName;
661  }
662 
663  internal int count;
664 
665  public int IncRef()
666  {
667  if (!initDone)
668  {
669  initDone = true;
670  }
671  else
672  {
673  System.Diagnostics.Debug.Assert(count > 0, "RefCount is 0 pre-increment for file " + fileName);
674  }
675  return ++count;
676  }
677 
678  public int DecRef()
679  {
680  System.Diagnostics.Debug.Assert(count > 0, "RefCount is 0 pre-decrement for file " + fileName);
681  return --count;
682  }
683  }
684 
685  /// <summary> Holds details for each commit point. This class is
686  /// also passed to the deletion policy. Note: this class
687  /// has a natural ordering that is inconsistent with
688  /// equals.
689  /// </summary>
690 
691  sealed private class CommitPoint:IndexCommit, System.IComparable<CommitPoint>
692  {
693  private void InitBlock(IndexFileDeleter enclosingInstance)
694  {
695  this.enclosingInstance = enclosingInstance;
696  }
697  private IndexFileDeleter enclosingInstance;
698  public IndexFileDeleter Enclosing_Instance
699  {
700  get
701  {
702  return enclosingInstance;
703  }
704 
705  }
706 
707  internal long gen;
708  internal ICollection<string> files;
709  internal string segmentsFileName;
710  internal bool deleted;
711  internal Directory directory;
712  internal ICollection<CommitPoint> commitsToDelete;
713  internal long version;
714  internal long generation;
715  internal bool isOptimized;
716  internal IDictionary<string, string> userData;
717 
718  public CommitPoint(IndexFileDeleter enclosingInstance, ICollection<CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos)
719  {
720  InitBlock(enclosingInstance);
721  this.directory = directory;
722  this.commitsToDelete = commitsToDelete;
723  userData = segmentInfos.UserData;
724  segmentsFileName = segmentInfos.GetCurrentSegmentFileName();
725  version = segmentInfos.Version;
726  generation = segmentInfos.Generation;
727  files = segmentInfos.Files(directory, true);
728  gen = segmentInfos.Generation;
729  isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions();
730 
731  System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory));
732  }
733 
734  public override string ToString()
735  {
736  return "IndexFileDeleter.CommitPoint(" + segmentsFileName + ")";
737  }
738 
739  public override bool IsOptimized
740  {
741  get { return isOptimized; }
742  }
743 
744  public override string SegmentsFileName
745  {
746  get { return segmentsFileName; }
747  }
748 
749  public override ICollection<string> FileNames
750  {
751  get { return files; }
752  }
753 
754  public override Directory Directory
755  {
756  get { return directory; }
757  }
758 
759  public override long Version
760  {
761  get { return version; }
762  }
763 
764  public override long Generation
765  {
766  get { return generation; }
767  }
768 
769  public override IDictionary<string, string> UserData
770  {
771  get { return userData; }
772  }
773 
774  /// <summary> Called only be the deletion policy, to remove this
775  /// commit point from the index.
776  /// </summary>
777  public override void Delete()
778  {
779  if (!deleted)
780  {
781  deleted = true;
782  Enclosing_Instance.commitsToDelete.Add(this);
783  }
784  }
785 
786  public override bool IsDeleted
787  {
788  get { return deleted; }
789  }
790 
791  public int CompareTo(CommitPoint commit)
792  {
793  if (gen < commit.gen)
794  {
795  return - 1;
796  }
797  else if (gen > commit.gen)
798  {
799  return 1;
800  }
801  else
802  {
803  return 0;
804  }
805  }
806  }
807  }
808 }