Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
CheckIndex.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using Lucene.Net.Support;
22 using AbstractField = Lucene.Net.Documents.AbstractField;
23 using Document = Lucene.Net.Documents.Document;
24 using Directory = Lucene.Net.Store.Directory;
25 using FSDirectory = Lucene.Net.Store.FSDirectory;
26 using IndexInput = Lucene.Net.Store.IndexInput;
27 
28 namespace Lucene.Net.Index
29 {
30 
31  /// <summary> Basic tool and API to check the health of an index and
32  /// write a new segments file that removes reference to
33  /// problematic segments.
34  ///
35  /// <p/>As this tool checks every byte in the index, on a large
36  /// index it can take quite a long time to run.
37  ///
38  /// <p/><b>WARNING</b>: this tool and API is new and
39  /// experimental and is subject to suddenly change in the
40  /// next release. Please make a complete backup of your
41  /// index before using this to fix your index!
42  /// </summary>
43  public class CheckIndex
44  {
45  private StreamWriter infoStream;
46  private readonly Directory dir;
47 
48  /// <summary> Returned from <see cref="CheckIndex_Renamed_Method()" /> detailing the health and status of the index.
49  ///
50  /// <p/><b>WARNING</b>: this API is new and experimental and is
51  /// subject to suddenly change in the next release.
52  ///
53  /// </summary>
54 
55  public class Status
56  {
57 
58  /// <summary>True if no problems were found with the index. </summary>
59  public bool clean;
60 
61  /// <summary>True if we were unable to locate and load the segments_N file. </summary>
62  public bool missingSegments;
63 
64  /// <summary>True if we were unable to open the segments_N file. </summary>
65  public bool cantOpenSegments;
66 
67  /// <summary>True if we were unable to read the version number from segments_N file. </summary>
68  public bool missingSegmentVersion;
69 
70  /// <summary>Name of latest segments_N file in the index. </summary>
71  public System.String segmentsFileName;
72 
73  /// <summary>Number of segments in the index. </summary>
74  public int numSegments;
75 
76  /// <summary>String description of the version of the index. </summary>
77  public System.String segmentFormat;
78 
79  /// <summary>Empty unless you passed specific segments list to check as optional 3rd argument.</summary>
80  /// <seealso>
81  /// <cref>CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList)</cref>
82  /// </seealso>
83  public List<string> segmentsChecked = new List<string>();
84 
85  /// <summary>True if the index was created with a newer version of Lucene than the CheckIndex tool. </summary>
86  public bool toolOutOfDate;
87 
88  /// <summary>List of <see cref="SegmentInfoStatus" /> instances, detailing status of each segment. </summary>
89  public IList<SegmentInfoStatus> segmentInfos = new List<SegmentInfoStatus>();
90 
91  /// <summary>Directory index is in. </summary>
92  public Directory dir;
93 
94  /// <summary> SegmentInfos instance containing only segments that
95  /// had no problems (this is used with the <see cref="CheckIndex.FixIndex" />
96  /// method to repair the index.
97  /// </summary>
98  internal SegmentInfos newSegments;
99 
100  /// <summary>How many documents will be lost to bad segments. </summary>
101  public int totLoseDocCount;
102 
103  /// <summary>How many bad segments were found. </summary>
104  public int numBadSegments;
105 
106  /// <summary>True if we checked only specific segments (<see cref="CheckIndex.CheckIndex_Renamed_Method(List{string})" />)
107  /// was called with non-null
108  /// argument).
109  /// </summary>
110  public bool partial;
111 
112  /// <summary>Holds the userData of the last commit in the index </summary>
113  public IDictionary<string, string> userData;
114 
115  /// <summary>Holds the status of each segment in the index.
116  /// See <see cref="SegmentInfos" />.
117  ///
118  /// <p/><b>WARNING</b>: this API is new and experimental and is
119  /// subject to suddenly change in the next release.
120  /// </summary>
121  public class SegmentInfoStatus
122  {
123  /// <summary>Name of the segment. </summary>
124  public System.String name;
125 
126  /// <summary>Document count (does not take deletions into account). </summary>
127  public int docCount;
128 
129  /// <summary>True if segment is compound file format. </summary>
130  public bool compound;
131 
132  /// <summary>Number of files referenced by this segment. </summary>
133  public int numFiles;
134 
135  /// <summary>Net size (MB) of the files referenced by this
136  /// segment.
137  /// </summary>
138  public double sizeMB;
139 
140  /// <summary>Doc store offset, if this segment shares the doc
141  /// store files (stored fields and term vectors) with
142  /// other segments. This is -1 if it does not share.
143  /// </summary>
144  public int docStoreOffset = - 1;
145 
146  /// <summary>String of the shared doc store segment, or null if
147  /// this segment does not share the doc store files.
148  /// </summary>
149  public System.String docStoreSegment;
150 
151  /// <summary>True if the shared doc store files are compound file
152  /// format.
153  /// </summary>
154  public bool docStoreCompoundFile;
155 
156  /// <summary>True if this segment has pending deletions. </summary>
157  public bool hasDeletions;
158 
159  /// <summary>Name of the current deletions file name. </summary>
160  public System.String deletionsFileName;
161 
162  /// <summary>Number of deleted documents. </summary>
163  public int numDeleted;
164 
165  /// <summary>True if we were able to open a SegmentReader on this
166  /// segment.
167  /// </summary>
168  public bool openReaderPassed;
169 
170  /// <summary>Number of fields in this segment. </summary>
171  internal int numFields;
172 
173  /// <summary>True if at least one of the fields in this segment
174  /// does not omitTermFreqAndPositions.
175  /// </summary>
176  /// <seealso cref="AbstractField.OmitTermFreqAndPositions">
177  /// </seealso>
178  public bool hasProx;
179 
180  /// <summary>Map&lt;String, String&gt; that includes certain
181  /// debugging details that IndexWriter records into
182  /// each segment it creates
183  /// </summary>
184  public IDictionary<string, string> diagnostics;
185 
186  /// <summary>Status for testing of field norms (null if field norms could not be tested). </summary>
188 
189  /// <summary>Status for testing of indexed terms (null if indexed terms could not be tested). </summary>
191 
192  /// <summary>Status for testing of stored fields (null if stored fields could not be tested). </summary>
194 
195  /// <summary>Status for testing of term vectors (null if term vectors could not be tested). </summary>
197  }
198 
199  /// <summary> Status from testing field norms.</summary>
200  public sealed class FieldNormStatus
201  {
202  /// <summary>Number of fields successfully tested </summary>
203  public long totFields = 0L;
204 
205  /// <summary>Exception thrown during term index test (null on success) </summary>
206  public System.Exception error = null;
207  }
208 
209  /// <summary> Status from testing term index.</summary>
210  public sealed class TermIndexStatus
211  {
212  /// <summary>Total term count </summary>
213  public long termCount = 0L;
214 
215  /// <summary>Total frequency across all terms. </summary>
216  public long totFreq = 0L;
217 
218  /// <summary>Total number of positions. </summary>
219  public long totPos = 0L;
220 
221  /// <summary>Exception thrown during term index test (null on success) </summary>
222  public System.Exception error = null;
223  }
224 
225  /// <summary> Status from testing stored fields.</summary>
226  public sealed class StoredFieldStatus
227  {
228 
229  /// <summary>Number of documents tested. </summary>
230  public int docCount = 0;
231 
232  /// <summary>Total number of stored fields tested. </summary>
233  public long totFields = 0;
234 
235  /// <summary>Exception thrown during stored fields test (null on success) </summary>
236  public System.Exception error = null;
237  }
238 
239  /// <summary> Status from testing stored fields.</summary>
240  public sealed class TermVectorStatus
241  {
242 
243  /// <summary>Number of documents tested. </summary>
244  public int docCount = 0;
245 
246  /// <summary>Total number of term vectors tested. </summary>
247  public long totVectors = 0;
248 
249  /// <summary>Exception thrown during term vector test (null on success) </summary>
250  public System.Exception error = null;
251  }
252  }
253 
254  /// <summary>Create a new CheckIndex on the directory. </summary>
255  public CheckIndex(Directory dir)
256  {
257  this.dir = dir;
258  infoStream = null;
259  }
260 
261  /// <summary>Set infoStream where messages should go. If null, no
262  /// messages are printed
263  /// </summary>
264  public virtual void SetInfoStream(StreamWriter @out)
265  {
266  infoStream = @out;
267  }
268 
269  private void Msg(System.String msg)
270  {
271  if (infoStream != null)
272  infoStream.WriteLine(msg);
273  }
274 
275  private class MySegmentTermDocs:SegmentTermDocs
276  {
277 
278  internal int delCount;
279 
280  internal MySegmentTermDocs(SegmentReader p):base(p)
281  {
282  }
283 
284  public override void Seek(Term term)
285  {
286  base.Seek(term);
287  delCount = 0;
288  }
289 
290  protected internal override void SkippingDoc()
291  {
292  delCount++;
293  }
294  }
295 
296  /// <summary>Returns a <see cref="Status" /> instance detailing
297  /// the state of the index.
298  ///
299  /// <p/>As this method checks every byte in the index, on a large
300  /// index it can take quite a long time to run.
301  ///
302  /// <p/><b>WARNING</b>: make sure
303  /// you only call this when the index is not opened by any
304  /// writer.
305  /// </summary>
306  public virtual Status CheckIndex_Renamed_Method()
307  {
308  return CheckIndex_Renamed_Method(null);
309  }
310 
311  /// <summary>Returns a <see cref="Status" /> instance detailing
312  /// the state of the index.
313  ///
314  /// </summary>
315  /// <param name="onlySegments">list of specific segment names to check
316  ///
317  /// <p/>As this method checks every byte in the specified
318  /// segments, on a large index it can take quite a long
319  /// time to run.
320  ///
321  /// <p/><b>WARNING</b>: make sure
322  /// you only call this when the index is not opened by any
323  /// writer.
324  /// </param>
325  public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments)
326  {
327  System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
328  SegmentInfos sis = new SegmentInfos();
329  Status result = new Status();
330  result.dir = dir;
331  try
332  {
333  sis.Read(dir);
334  }
335  catch (System.Exception t)
336  {
337  Msg("ERROR: could not read any segments file in directory");
338  result.missingSegments = true;
339  if (infoStream != null)
340  infoStream.WriteLine(t.StackTrace);
341  return result;
342  }
343 
344  int numSegments = sis.Count;
345  var segmentsFileName = sis.GetCurrentSegmentFileName();
346  IndexInput input = null;
347  try
348  {
349  input = dir.OpenInput(segmentsFileName);
350  }
351  catch (System.Exception t)
352  {
353  Msg("ERROR: could not open segments file in directory");
354  if (infoStream != null)
355  infoStream.WriteLine(t.StackTrace);
356  result.cantOpenSegments = true;
357  return result;
358  }
359  int format = 0;
360  try
361  {
362  format = input.ReadInt();
363  }
364  catch (System.Exception t)
365  {
366  Msg("ERROR: could not read segment file version in directory");
367  if (infoStream != null)
368  infoStream.WriteLine(t.StackTrace);
369  result.missingSegmentVersion = true;
370  return result;
371  }
372  finally
373  {
374  if (input != null)
375  input.Close();
376  }
377 
378  System.String sFormat = "";
379  bool skip = false;
380 
381  if (format == SegmentInfos.FORMAT)
382  sFormat = "FORMAT [Lucene Pre-2.1]";
383  if (format == SegmentInfos.FORMAT_LOCKLESS)
384  sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
385  else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
386  sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
387  else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
388  sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
389  else
390  {
391  if (format == SegmentInfos.FORMAT_CHECKSUM)
392  sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
393  else if (format == SegmentInfos.FORMAT_DEL_COUNT)
394  sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
395  else if (format == SegmentInfos.FORMAT_HAS_PROX)
396  sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
397  else if (format == SegmentInfos.FORMAT_USER_DATA)
398  sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
399  else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
400  sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
401  else if (format < SegmentInfos.CURRENT_FORMAT)
402  {
403  sFormat = "int=" + format + " [newer version of Lucene than this tool]";
404  skip = true;
405  }
406  else
407  {
408  sFormat = format + " [Lucene 1.3 or prior]";
409  }
410  }
411 
412  result.segmentsFileName = segmentsFileName;
413  result.numSegments = numSegments;
414  result.segmentFormat = sFormat;
415  result.userData = sis.UserData;
416  System.String userDataString;
417  if (sis.UserData.Count > 0)
418  {
419  userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
420  }
421  else
422  {
423  userDataString = "";
424  }
425 
426  Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
427 
428  if (onlySegments != null)
429  {
430  result.partial = true;
431  if (infoStream != null)
432  infoStream.Write("\nChecking only these segments:");
433  foreach(string s in onlySegments)
434  {
435  if (infoStream != null)
436  {
437  infoStream.Write(" " + s);
438  }
439  }
440  result.segmentsChecked.AddRange(onlySegments);
441  Msg(":");
442  }
443 
444  if (skip)
445  {
446  Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
447  result.toolOutOfDate = true;
448  return result;
449  }
450 
451 
452  result.newSegments = (SegmentInfos) sis.Clone();
453  result.newSegments.Clear();
454 
455  for (int i = 0; i < numSegments; i++)
456  {
457  SegmentInfo info = sis.Info(i);
458  if (onlySegments != null && !onlySegments.Contains(info.name))
459  continue;
460  var segInfoStat = new Status.SegmentInfoStatus();
461  result.segmentInfos.Add(segInfoStat);
462  Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
463  segInfoStat.name = info.name;
464  segInfoStat.docCount = info.docCount;
465 
466  int toLoseDocCount = info.docCount;
467 
468  SegmentReader reader = null;
469 
470  try
471  {
472  Msg(" compound=" + info.GetUseCompoundFile());
473  segInfoStat.compound = info.GetUseCompoundFile();
474  Msg(" hasProx=" + info.HasProx);
475  segInfoStat.hasProx = info.HasProx;
476  Msg(" numFiles=" + info.Files().Count);
477  segInfoStat.numFiles = info.Files().Count;
478  Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
479  segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
480  IDictionary<string, string> diagnostics = info.Diagnostics;
481  segInfoStat.diagnostics = diagnostics;
482  if (diagnostics.Count > 0)
483  {
484  Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
485  }
486 
487  int docStoreOffset = info.DocStoreOffset;
488  if (docStoreOffset != - 1)
489  {
490  Msg(" docStoreOffset=" + docStoreOffset);
491  segInfoStat.docStoreOffset = docStoreOffset;
492  Msg(" docStoreSegment=" + info.DocStoreSegment);
493  segInfoStat.docStoreSegment = info.DocStoreSegment;
494  Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
495  segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
496  }
497  System.String delFileName = info.GetDelFileName();
498  if (delFileName == null)
499  {
500  Msg(" no deletions");
501  segInfoStat.hasDeletions = false;
502  }
503  else
504  {
505  Msg(" has deletions [delFileName=" + delFileName + "]");
506  segInfoStat.hasDeletions = true;
507  segInfoStat.deletionsFileName = delFileName;
508  }
509  if (infoStream != null)
510  infoStream.Write(" test: open reader.........");
511  reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
512 
513  segInfoStat.openReaderPassed = true;
514 
515  int numDocs = reader.NumDocs();
516  toLoseDocCount = numDocs;
517  if (reader.HasDeletions)
518  {
519  if (reader.deletedDocs.Count() != info.GetDelCount())
520  {
521  throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
522  }
523  if (reader.deletedDocs.Count() > reader.MaxDoc)
524  {
525  throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
526  }
527  if (info.docCount - numDocs != info.GetDelCount())
528  {
529  throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
530  }
531  segInfoStat.numDeleted = info.docCount - numDocs;
532  Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
533  }
534  else
535  {
536  if (info.GetDelCount() != 0)
537  {
538  throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
539  }
540  Msg("OK");
541  }
542  if (reader.MaxDoc != info.docCount)
543  throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
544 
545  // Test getFieldNames()
546  if (infoStream != null)
547  {
548  infoStream.Write(" test: fields..............");
549  }
550  ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
551  Msg("OK [" + fieldNames.Count + " fields]");
552  segInfoStat.numFields = fieldNames.Count;
553 
554  // Test Field Norms
555  segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
556 
557  // Test the Term Index
558  segInfoStat.termIndexStatus = TestTermIndex(info, reader);
559 
560  // Test Stored Fields
561  segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
562 
563  // Test Term Vectors
564  segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
565 
566  // Rethrow the first exception we encountered
567  // This will cause stats for failed segments to be incremented properly
568  if (segInfoStat.fieldNormStatus.error != null)
569  {
570  throw new SystemException("Field Norm test failed");
571  }
572  else if (segInfoStat.termIndexStatus.error != null)
573  {
574  throw new SystemException("Term Index test failed");
575  }
576  else if (segInfoStat.storedFieldStatus.error != null)
577  {
578  throw new SystemException("Stored Field test failed");
579  }
580  else if (segInfoStat.termVectorStatus.error != null)
581  {
582  throw new System.SystemException("Term Vector test failed");
583  }
584 
585  Msg("");
586  }
587  catch (System.Exception t)
588  {
589  Msg("FAILED");
590  const string comment = "fixIndex() would remove reference to this segment";
591  Msg(" WARNING: " + comment + "; full exception:");
592  if (infoStream != null)
593  infoStream.WriteLine(t.StackTrace);
594  Msg("");
595  result.totLoseDocCount += toLoseDocCount;
596  result.numBadSegments++;
597  continue;
598  }
599  finally
600  {
601  if (reader != null)
602  reader.Close();
603  }
604 
605  // Keeper
606  result.newSegments.Add((SegmentInfo)info.Clone());
607  }
608 
609  if (0 == result.numBadSegments)
610  {
611  result.clean = true;
612  Msg("No problems were detected with this index.\n");
613  }
614  else
615  Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
616 
617  return result;
618  }
619 
620  /// <summary> Test field norms.</summary>
621  private Status.FieldNormStatus TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader)
622  {
623  var status = new Status.FieldNormStatus();
624 
625  try
626  {
627  // Test Field Norms
628  if (infoStream != null)
629  {
630  infoStream.Write(" test: field norms.........");
631  }
632 
633  var b = new byte[reader.MaxDoc];
634  foreach(string fieldName in fieldNames)
635  {
636  if (reader.HasNorms(fieldName))
637  {
638  reader.Norms(fieldName, b, 0);
639  ++status.totFields;
640  }
641  }
642 
643  Msg("OK [" + status.totFields + " fields]");
644  }
645  catch (System.Exception e)
646  {
647  Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
648  status.error = e;
649  if (infoStream != null)
650  {
651  infoStream.WriteLine(e.StackTrace);
652  }
653  }
654 
655  return status;
656  }
657 
658  /// <summary> Test the term index.</summary>
659  private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
660  {
661  var status = new Status.TermIndexStatus();
662 
663  try
664  {
665  if (infoStream != null)
666  {
667  infoStream.Write(" test: terms, freq, prox...");
668  }
669 
670  TermEnum termEnum = reader.Terms();
671  TermPositions termPositions = reader.TermPositions();
672 
673  // Used only to count up # deleted docs for this term
674  var myTermDocs = new MySegmentTermDocs(reader);
675 
676  int maxDoc = reader.MaxDoc;
677 
678  while (termEnum.Next())
679  {
680  status.termCount++;
681  Term term = termEnum.Term;
682  int docFreq = termEnum.DocFreq();
683  termPositions.Seek(term);
684  int lastDoc = - 1;
685  int freq0 = 0;
686  status.totFreq += docFreq;
687  while (termPositions.Next())
688  {
689  freq0++;
690  int doc = termPositions.Doc;
691  int freq = termPositions.Freq;
692  if (doc <= lastDoc)
693  {
694  throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
695  }
696  if (doc >= maxDoc)
697  {
698  throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
699  }
700 
701  lastDoc = doc;
702  if (freq <= 0)
703  {
704  throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
705  }
706 
707  int lastPos = - 1;
708  status.totPos += freq;
709  for (int j = 0; j < freq; j++)
710  {
711  int pos = termPositions.NextPosition();
712  if (pos < - 1)
713  {
714  throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
715  }
716  if (pos < lastPos)
717  {
718  throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
719  }
720  lastPos = pos;
721  }
722  }
723 
724  // Now count how many deleted docs occurred in
725  // this term:
726  int delCount;
727  if (reader.HasDeletions)
728  {
729  myTermDocs.Seek(term);
730  while (myTermDocs.Next())
731  {
732  }
733  delCount = myTermDocs.delCount;
734  }
735  else
736  {
737  delCount = 0;
738  }
739 
740  if (freq0 + delCount != docFreq)
741  {
742  throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
743  }
744  }
745 
746  Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
747  }
748  catch (System.Exception e)
749  {
750  Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
751  status.error = e;
752  if (infoStream != null)
753  {
754  infoStream.WriteLine(e.StackTrace);
755  }
756  }
757 
758  return status;
759  }
760 
761  /// <summary> Test stored fields for a segment.</summary>
762  private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
763  {
764  var status = new Status.StoredFieldStatus();
765 
766  try
767  {
768  if (infoStream != null)
769  {
770  infoStream.Write(" test: stored fields.......");
771  }
772 
773  // Scan stored fields for all documents
774  for (int j = 0; j < info.docCount; ++j)
775  {
776  if (!reader.IsDeleted(j))
777  {
778  status.docCount++;
779  Document doc = reader.Document(j);
780  status.totFields += doc.GetFields().Count;
781  }
782  }
783 
784  // Validate docCount
785  if (status.docCount != reader.NumDocs())
786  {
787  throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
788  }
789 
790  Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
791  }
792  catch (System.Exception e)
793  {
794  Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
795  status.error = e;
796  if (infoStream != null)
797  {
798  infoStream.WriteLine(e.StackTrace);
799  }
800  }
801 
802  return status;
803  }
804 
805  /// <summary> Test term vectors for a segment.</summary>
806  private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
807  {
808  var status = new Status.TermVectorStatus();
809 
810  try
811  {
812  if (infoStream != null)
813  {
814  infoStream.Write(" test: term vectors........");
815  }
816 
817  for (int j = 0; j < info.docCount; ++j)
818  {
819  if (!reader.IsDeleted(j))
820  {
821  status.docCount++;
822  ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
823  if (tfv != null)
824  {
825  status.totVectors += tfv.Length;
826  }
827  }
828  }
829 
830  Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
831  }
832  catch (System.Exception e)
833  {
834  Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
835  status.error = e;
836  if (infoStream != null)
837  {
838  infoStream.WriteLine(e.StackTrace);
839  }
840  }
841 
842  return status;
843  }
844 
845  /// <summary>Repairs the index using previously returned result
846  /// from <see cref="CheckIndex" />. Note that this does not
847  /// remove any of the unreferenced files after it's done;
848  /// you must separately open an <see cref="IndexWriter" />, which
849  /// deletes unreferenced files when it's created.
850  ///
851  /// <p/><b>WARNING</b>: this writes a
852  /// new segments file into the index, effectively removing
853  /// all documents in broken segments from the index.
854  /// BE CAREFUL.
855  ///
856  /// <p/><b>WARNING</b>: Make sure you only call this when the
857  /// index is not opened by any writer.
858  /// </summary>
859  public virtual void FixIndex(Status result)
860  {
861  if (result.partial)
862  throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
863  result.newSegments.Commit(result.dir);
864  }
865 
866  private static bool assertsOn;
867 
868  private static bool TestAsserts()
869  {
870  assertsOn = true;
871  return true;
872  }
873 
874  private static bool AssertsOn()
875  {
876  System.Diagnostics.Debug.Assert(TestAsserts());
877  return assertsOn;
878  }
879 
880  /// <summary>Command-line interface to check and fix an index.
881  /// <p/>
882  /// Run it like this:
883  /// <code>
884  /// java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
885  /// </code>
886  /// <list type="bullet">
887  /// <item><c>-fix</c>: actually write a new segments_N file, removing any problematic segments</item>
888  /// <item><c>-segment X</c>: only check the specified
889  /// segment(s). This can be specified multiple times,
890  /// to check more than one segment, eg <c>-segment _2
891  /// -segment _a</c>. You can't use this with the -fix
892  /// option.</item>
893  /// </list>
894  /// <p/><b>WARNING</b>: <c>-fix</c> should only be used on an emergency basis as it will cause
895  /// documents (perhaps many) to be permanently removed from the index. Always make
896  /// a backup copy of your index before running this! Do not run this tool on an index
897  /// that is actively being written to. You have been warned!
898  /// <p/> Run without -fix, this tool will open the index, report version information
899  /// and report any exceptions it hits and what action it would take if -fix were
900  /// specified. With -fix, this tool will remove any segments that have issues and
901  /// write a new segments_N file. This means all documents contained in the affected
902  /// segments will be removed.
903  /// <p/>
904  /// This tool exits with exit code 1 if the index cannot be opened or has any
905  /// corruption, else 0.
906  /// </summary>
907  [STAThread]
908  public static void Main(System.String[] args)
909  {
910 
911  bool doFix = false;
912  var onlySegments = new List<string>();
913  System.String indexPath = null;
914  int i = 0;
915  while (i < args.Length)
916  {
917  if (args[i].Equals("-fix"))
918  {
919  doFix = true;
920  i++;
921  }
922  else if (args[i].Equals("-segment"))
923  {
924  if (i == args.Length - 1)
925  {
926  System.Console.Out.WriteLine("ERROR: missing name for -segment option");
927  System.Environment.Exit(1);
928  }
929  onlySegments.Add(args[i + 1]);
930  i += 2;
931  }
932  else
933  {
934  if (indexPath != null)
935  {
936  System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
937  System.Environment.Exit(1);
938  }
939  indexPath = args[i];
940  i++;
941  }
942  }
943 
944  if (indexPath == null)
945  {
946  System.Console.Out.WriteLine("\nERROR: index path not specified");
947  System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n");
948  System.Environment.Exit(1);
949  }
950 
951  if (!AssertsOn())
952  System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled");
953 
954  if (onlySegments.Count == 0)
955  onlySegments = null;
956  else if (doFix)
957  {
958  System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment");
959  System.Environment.Exit(1);
960  }
961 
962  System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n");
963  Directory dir = null;
964  try
965  {
966  dir = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath));
967  }
968  catch (Exception t)
969  {
970  Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
971  Console.Out.WriteLine(t.StackTrace);
972  Environment.Exit(1);
973  }
974 
975  var checker = new CheckIndex(dir);
976  var tempWriter = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding)
977  {AutoFlush = true};
978  checker.SetInfoStream(tempWriter);
979 
980  Status result = checker.CheckIndex_Renamed_Method(onlySegments);
981  if (result.missingSegments)
982  {
983  System.Environment.Exit(1);
984  }
985 
986  if (!result.clean)
987  {
988  if (!doFix)
989  {
990  System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
991  }
992  else
993  {
994  Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
995  Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
996  for (var s = 0; s < 5; s++)
997  {
998  System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
999  System.Console.Out.WriteLine(" " + (5 - s) + "...");
1000  }
1001  Console.Out.WriteLine("Writing...");
1002  checker.FixIndex(result);
1003  Console.Out.WriteLine("OK");
1004  Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
1005  }
1006  }
1007  System.Console.Out.WriteLine("");
1008 
1009  int exitCode;
1010  if (result != null && result.clean == true)
1011  exitCode = 0;
1012  else
1013  exitCode = 1;
1014  System.Environment.Exit(exitCode);
1015  }
1016  }
1017 }