19 using System.Collections.Generic;
21 using Lucene.Net.Support;
23 using Document = Lucene.Net.Documents.Document;
28 namespace Lucene.Net.Index
45 private StreamWriter infoStream;
83 public List<string> segmentsChecked =
new List<string>();
89 public IList<SegmentInfoStatus> segmentInfos =
new List<SegmentInfoStatus>();
144 public int docStoreOffset = - 1;
171 internal int numFields;
203 public long totFields = 0L;
206 public System.Exception error = null;
213 public long termCount = 0L;
216 public long totFreq = 0L;
219 public long totPos = 0L;
222 public System.Exception error = null;
230 public int docCount = 0;
233 public long totFields = 0;
236 public System.Exception error = null;
244 public int docCount = 0;
247 public long totVectors = 0;
250 public System.Exception error = null;
264 public virtual void SetInfoStream(StreamWriter @out)
269 private void Msg(System.String msg)
271 if (infoStream != null)
272 infoStream.WriteLine(msg);
275 private class MySegmentTermDocs:SegmentTermDocs
278 internal int delCount;
280 internal MySegmentTermDocs(SegmentReader p):base(p)
284 public override void Seek(
Term term)
290 protected internal override void SkippingDoc()
306 public virtual Status CheckIndex_Renamed_Method()
308 return CheckIndex_Renamed_Method(null);
325 public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments)
327 System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
335 catch (System.Exception t)
337 Msg(
"ERROR: could not read any segments file in directory");
339 if (infoStream != null)
340 infoStream.WriteLine(t.StackTrace);
344 int numSegments = sis.Count;
349 input = dir.OpenInput(segmentsFileName);
351 catch (System.Exception t)
353 Msg(
"ERROR: could not open segments file in directory");
354 if (infoStream != null)
355 infoStream.WriteLine(t.StackTrace);
362 format = input.ReadInt();
364 catch (System.Exception t)
366 Msg(
"ERROR: could not read segment file version in directory");
367 if (infoStream != null)
368 infoStream.WriteLine(t.StackTrace);
378 System.String sFormat =
"";
382 sFormat =
"FORMAT [Lucene Pre-2.1]";
384 sFormat =
"FORMAT_LOCKLESS [Lucene 2.1]";
386 sFormat =
"FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
388 sFormat =
"FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
392 sFormat =
"FORMAT_CHECKSUM [Lucene 2.4]";
394 sFormat =
"FORMAT_DEL_COUNT [Lucene 2.4]";
396 sFormat =
"FORMAT_HAS_PROX [Lucene 2.4]";
398 sFormat =
"FORMAT_USER_DATA [Lucene 2.9]";
400 sFormat =
"FORMAT_DIAGNOSTICS [Lucene 2.9]";
403 sFormat =
"int=" + format +
" [newer version of Lucene than this tool]";
408 sFormat = format +
" [Lucene 1.3 or prior]";
416 System.String userDataString;
426 Msg(
"Segments file=" + segmentsFileName +
" numSegments=" + numSegments +
" version=" + sFormat + userDataString);
428 if (onlySegments != null)
431 if (infoStream != null)
432 infoStream.Write(
"\nChecking only these segments:");
433 foreach(
string s
in onlySegments)
435 if (infoStream != null)
437 infoStream.Write(
" " + s);
446 Msg(
"\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
453 result.newSegments.Clear();
455 for (
int i = 0; i < numSegments; i++)
458 if (onlySegments != null && !onlySegments.Contains(info.
name))
460 var segInfoStat =
new Status.SegmentInfoStatus();
462 Msg(
" " + (1 + i) +
" of " + numSegments +
": name=" + info.
name +
" docCount=" + info.
docCount);
463 segInfoStat.name = info.
name;
464 segInfoStat.docCount = info.
docCount;
474 Msg(
" hasProx=" + info.
HasProx);
475 segInfoStat.hasProx = info.
HasProx;
476 Msg(
" numFiles=" + info.
Files().Count);
477 segInfoStat.numFiles = info.
Files().Count;
478 Msg(System.String.Format(nf,
" size (MB)={0:f}",
new System.Object[] { (info.
SizeInBytes() / (1024.0 * 1024.0)) }));
479 segInfoStat.sizeMB = info.
SizeInBytes() / (1024.0 * 1024.0);
480 IDictionary<string, string> diagnostics = info.
Diagnostics;
481 segInfoStat.diagnostics = diagnostics;
482 if (diagnostics.Count > 0)
488 if (docStoreOffset != - 1)
490 Msg(
" docStoreOffset=" + docStoreOffset);
491 segInfoStat.docStoreOffset = docStoreOffset;
498 if (delFileName == null)
500 Msg(
" no deletions");
501 segInfoStat.hasDeletions =
false;
505 Msg(
" has deletions [delFileName=" + delFileName +
"]");
506 segInfoStat.hasDeletions =
true;
507 segInfoStat.deletionsFileName = delFileName;
509 if (infoStream != null)
510 infoStream.Write(
" test: open reader.........");
513 segInfoStat.openReaderPassed =
true;
515 int numDocs = reader.
NumDocs();
516 toLoseDocCount = numDocs;
519 if (reader.deletedDocs.Count() != info.
GetDelCount())
521 throw new System.SystemException(
"delete count mismatch: info=" + info.
GetDelCount() +
" vs deletedDocs.count()=" + reader.deletedDocs.Count());
523 if (reader.deletedDocs.Count() > reader.
MaxDoc)
525 throw new System.SystemException(
"too many deleted docs: MaxDoc=" + reader.
MaxDoc +
" vs deletedDocs.count()=" + reader.deletedDocs.Count());
529 throw new System.SystemException(
"delete count mismatch: info=" + info.
GetDelCount() +
" vs reader=" + (info.
docCount - numDocs));
531 segInfoStat.numDeleted = info.
docCount - numDocs;
532 Msg(
"OK [" + (segInfoStat.numDeleted) +
" deleted docs]");
538 throw new System.SystemException(
"delete count mismatch: info=" + info.
GetDelCount() +
" vs reader=" + (info.
docCount - numDocs));
543 throw new System.SystemException(
"SegmentReader.MaxDoc " + reader.
MaxDoc +
" != SegmentInfos.docCount " + info.
docCount);
546 if (infoStream != null)
548 infoStream.Write(
" test: fields..............");
551 Msg(
"OK [" + fieldNames.Count +
" fields]");
552 segInfoStat.numFields = fieldNames.Count;
555 segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
558 segInfoStat.termIndexStatus = TestTermIndex(info, reader);
561 segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
564 segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
568 if (segInfoStat.fieldNormStatus.error != null)
570 throw new SystemException(
"Field Norm test failed");
572 else if (segInfoStat.termIndexStatus.error != null)
574 throw new SystemException(
"Term Index test failed");
576 else if (segInfoStat.storedFieldStatus.error != null)
578 throw new SystemException(
"Stored Field test failed");
580 else if (segInfoStat.termVectorStatus.error != null)
582 throw new System.SystemException(
"Term Vector test failed");
587 catch (System.Exception t)
590 const string comment =
"fixIndex() would remove reference to this segment";
591 Msg(
" WARNING: " + comment +
"; full exception:");
592 if (infoStream != null)
593 infoStream.WriteLine(t.StackTrace);
612 Msg(
"No problems were detected with this index.\n");
621 private Status.FieldNormStatus TestFieldNorms(IEnumerable<string> fieldNames,
SegmentReader reader)
623 var status =
new Status.FieldNormStatus();
628 if (infoStream != null)
630 infoStream.Write(
" test: field norms.........");
633 var b =
new byte[reader.
MaxDoc];
634 foreach(
string fieldName
in fieldNames)
638 reader.
Norms(fieldName, b, 0);
643 Msg(
"OK [" + status.totFields +
" fields]");
645 catch (System.Exception e)
647 Msg(
"ERROR [" + System.Convert.ToString(e.Message) +
"]");
649 if (infoStream != null)
651 infoStream.WriteLine(e.StackTrace);
659 private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
661 var status =
new Status.TermIndexStatus();
665 if (infoStream != null)
667 infoStream.Write(
" test: terms, freq, prox...");
674 var myTermDocs =
new MySegmentTermDocs(reader);
676 int maxDoc = reader.MaxDoc;
678 while (termEnum.Next())
681 Term term = termEnum.Term;
682 int docFreq = termEnum.DocFreq();
683 termPositions.Seek(term);
686 status.totFreq += docFreq;
687 while (termPositions.Next())
690 int doc = termPositions.Doc;
691 int freq = termPositions.Freq;
694 throw new System.SystemException(
"term " + term +
": doc " + doc +
" <= lastDoc " + lastDoc);
698 throw new System.SystemException(
"term " + term +
": doc " + doc +
" >= maxDoc " + maxDoc);
704 throw new System.SystemException(
"term " + term +
": doc " + doc +
": freq " + freq +
" is out of bounds");
708 status.totPos += freq;
709 for (
int j = 0; j < freq; j++)
711 int pos = termPositions.NextPosition();
714 throw new System.SystemException(
"term " + term +
": doc " + doc +
": pos " + pos +
" is out of bounds");
718 throw new System.SystemException(
"term " + term +
": doc " + doc +
": pos " + pos +
" < lastPos " + lastPos);
727 if (reader.HasDeletions)
729 myTermDocs.Seek(term);
730 while (myTermDocs.Next())
733 delCount = myTermDocs.delCount;
740 if (freq0 + delCount != docFreq)
742 throw new System.SystemException(
"term " + term +
" docFreq=" + docFreq +
" != num docs seen " + freq0 +
" + num docs deleted " + delCount);
746 Msg(
"OK [" + status.termCount +
" terms; " + status.totFreq +
" terms/docs pairs; " + status.totPos +
" tokens]");
748 catch (System.Exception e)
750 Msg(
"ERROR [" + System.Convert.ToString(e.Message) +
"]");
752 if (infoStream != null)
754 infoStream.WriteLine(e.StackTrace);
762 private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
764 var status =
new Status.StoredFieldStatus();
768 if (infoStream != null)
770 infoStream.Write(
" test: stored fields.......");
774 for (
int j = 0; j < info.docCount; ++j)
776 if (!reader.IsDeleted(j))
780 status.totFields += doc.GetFields().Count;
785 if (status.docCount != reader.NumDocs())
787 throw new System.SystemException(
"docCount=" + status.docCount +
" but saw " + status.docCount +
" undeleted docs");
790 Msg(
string.Format(format,
"OK [{0:d} total field count; avg {1:f} fields per doc]",
new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
792 catch (System.Exception e)
794 Msg(
"ERROR [" + System.Convert.ToString(e.Message) +
"]");
796 if (infoStream != null)
798 infoStream.WriteLine(e.StackTrace);
806 private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
808 var status =
new Status.TermVectorStatus();
812 if (infoStream != null)
814 infoStream.Write(
" test: term vectors........");
817 for (
int j = 0; j < info.docCount; ++j)
819 if (!reader.IsDeleted(j))
822 ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
825 status.totVectors += tfv.Length;
830 Msg(System.String.Format(format,
"OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]",
new object[] { status.totVectors, (((
float) status.totVectors) / status.docCount) }));
832 catch (System.Exception e)
834 Msg(
"ERROR [" + System.Convert.ToString(e.Message) +
"]");
836 if (infoStream != null)
838 infoStream.WriteLine(e.StackTrace);
859 public virtual void FixIndex(
Status result)
862 throw new System.ArgumentException(
"can only fix an index that was fully checked (this status checked a subset of segments)");
866 private static bool assertsOn;
868 private static bool TestAsserts()
874 private static bool AssertsOn()
876 System.Diagnostics.Debug.Assert(TestAsserts());
908 public static void Main(System.String[] args)
912 var onlySegments =
new List<string>();
913 System.String indexPath = null;
915 while (i < args.Length)
917 if (args[i].Equals(
"-fix"))
922 else if (args[i].Equals(
"-segment"))
924 if (i == args.Length - 1)
926 System.Console.Out.WriteLine(
"ERROR: missing name for -segment option");
927 System.Environment.Exit(1);
929 onlySegments.Add(args[i + 1]);
934 if (indexPath != null)
936 System.Console.Out.WriteLine(
"ERROR: unexpected extra argument '" + args[i] +
"'");
937 System.Environment.Exit(1);
944 if (indexPath == null)
946 System.Console.Out.WriteLine(
"\nERROR: index path not specified");
947 System.Console.Out.WriteLine(
"\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
"\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" +
" times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
" You can't use this with the -fix option\n" +
"\n" +
"**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
"documents (perhaps many) to be permanently removed from the index. Always make\n" +
"a backup copy of your index before running this! Do not run this tool on an index\n" +
"that is actively being written to. You have been warned!\n" +
"\n" +
"Run without -fix, this tool will open the index, report version information\n" +
"and report any exceptions it hits and what action it would take if -fix were\n" +
"specified. With -fix, this tool will remove any segments that have issues and\n" +
"write a new segments_N file. This means all documents contained in the affected\n" +
"segments will be removed.\n" +
"\n" +
"This tool exits with exit code 1 if the index cannot be opened or has any\n" +
"corruption, else 0.\n");
948 System.Environment.Exit(1);
952 System.Console.Out.WriteLine(
"\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled");
954 if (onlySegments.Count == 0)
958 System.Console.Out.WriteLine(
"ERROR: cannot specify both -fix and -segment");
959 System.Environment.Exit(1);
962 System.Console.Out.WriteLine(
"\nOpening index @ " + indexPath +
"\n");
966 dir =
FSDirectory.Open(
new System.IO.DirectoryInfo(indexPath));
970 Console.Out.WriteLine(
"ERROR: could not open directory \"" + indexPath +
"\"; exiting");
971 Console.Out.WriteLine(t.StackTrace);
976 var tempWriter =
new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding)
978 checker.SetInfoStream(tempWriter);
980 Status result = checker.CheckIndex_Renamed_Method(onlySegments);
983 System.Environment.Exit(1);
990 System.Console.Out.WriteLine(
"WARNING: would write new segments file, and " + result.
totLoseDocCount +
" documents would be lost, if -fix were specified\n");
994 Console.Out.WriteLine(
"WARNING: " + result.
totLoseDocCount +
" documents will be lost\n");
995 Console.Out.WriteLine(
"NOTE: will write new segments file in 5 seconds; this will remove " + result.
totLoseDocCount +
" docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
996 for (var s = 0; s < 5; s++)
998 System.Threading.Thread.Sleep(
new System.TimeSpan((System.Int64) 10000 * 1000));
999 System.Console.Out.WriteLine(
" " + (5 - s) +
"...");
1001 Console.Out.WriteLine(
"Writing...");
1002 checker.FixIndex(result);
1003 Console.Out.WriteLine(
"OK");
1007 System.Console.Out.WriteLine(
"");
1010 if (result != null && result.
clean ==
true)
1014 System.Environment.Exit(exitCode);