19 using System.Collections.Generic;
20 using Document = Lucene.Net.Documents.Document;
29 namespace Lucene.Net.Index
46 private class AnonymousClassCheckAbort:CheckAbort
50 this.enclosingInstance = enclosingInstance;
57 return enclosingInstance;
61 internal AnonymousClassCheckAbort(
SegmentMerger enclosingInstance, Lucene.Net.Index.MergePolicy.OneMerge Param1, Lucene.Net.Store.Directory Param2):base(Param1, Param2)
63 InitBlock(enclosingInstance);
65 public override void Work(
double units)
70 private class AnonymousClassCheckAbort1:CheckAbort
74 this.enclosingInstance = enclosingInstance;
81 return enclosingInstance;
85 internal AnonymousClassCheckAbort1(
SegmentMerger enclosingInstance, Lucene.Net.Index.MergePolicy.OneMerge Param1, Lucene.Net.Store.Directory Param2):base(Param1, Param2)
87 InitBlock(enclosingInstance);
89 public override void Work(
double units)
95 private void InitBlock()
101 internal static readonly byte[] NORMS_HEADER =
new byte[]{(byte)
'N', (byte)
'R', (byte)
'M', unchecked((byte) - 1)};
104 private System.String segment;
105 private int termIndexInterval;
107 private IList<IndexReader> readers =
new List<IndexReader>();
110 private int mergedDocs;
112 private CheckAbort checkAbort;
118 private bool mergeDocStores;
123 private const int MAX_RAW_MERGE_DOCS = 4192;
137 checkAbort =
new AnonymousClassCheckAbort(
this, null, null);
147 checkAbort =
new CheckAbort(merge, directory);
151 checkAbort =
new AnonymousClassCheckAbort1(
this, null, null);
156 internal bool HasProx()
158 return fieldInfos.HasProx();
199 internal int Merge(
bool mergeDocStores)
202 this.mergeDocStores = mergeDocStores;
211 mergedDocs = MergeFields();
215 if (mergeDocStores && fieldInfos.HasVectors())
225 internal void CloseReaders()
233 internal ICollection<string> GetMergedFiles()
235 ISet<string> fileSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<
string>();
238 for (
int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
240 System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
242 if (ext.Equals(IndexFileNames.PROX_EXTENSION) && !HasProx())
245 if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
246 fileSet.Add(segment +
"." + ext);
250 for (
int i = 0; i < fieldInfos.Size(); i++)
252 FieldInfo fi = fieldInfos.FieldInfo(i);
253 if (fi.isIndexed && !fi.omitNorms)
255 fileSet.Add(segment +
"." + IndexFileNames.NORMS_EXTENSION);
261 if (fieldInfos.HasVectors() && mergeDocStores)
263 for (
int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++)
265 fileSet.Add(segment +
"." + IndexFileNames.VECTOR_EXTENSIONS[i]);
272 public ICollection<string> CreateCompoundFile(System.String fileName)
274 ICollection<string> files = GetMergedFiles();
278 foreach(var file
in files)
280 cfsWriter.AddFile(file);
289 private void AddIndexed(
IndexReader reader,
FieldInfos fInfos, ICollection<string> names,
bool storeTermVectors,
bool storePositionWithTermVector,
bool storeOffsetWithTermVector,
bool storePayloads,
bool omitTFAndPositions)
291 foreach (var field
in names)
293 fInfos.
Add(field,
true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector,
294 !reader.
HasNorms(field), storePayloads, omitTFAndPositions);
298 private SegmentReader[] matchingSegmentReaders;
299 private int[] rawDocLengths;
300 private int[] rawDocLengths2;
302 private void SetMatchingSegmentReaders()
307 int numReaders = readers.Count;
308 matchingSegmentReaders =
new SegmentReader[numReaders];
314 for (
int i = 0; i < numReaders; i++)
317 if (reader is SegmentReader)
319 SegmentReader segmentReader = (SegmentReader) reader;
321 FieldInfos segmentFieldInfos = segmentReader.FieldInfos();
322 int numFieldInfos = segmentFieldInfos.Size();
323 for (
int j = 0; same && j < numFieldInfos; j++)
325 same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
329 matchingSegmentReaders[i] = segmentReader;
335 rawDocLengths =
new int[MAX_RAW_MERGE_DOCS];
336 rawDocLengths2 =
new int[MAX_RAW_MERGE_DOCS];
344 private int MergeFields()
353 SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
354 fieldInfos = (FieldInfos) sr.core.fieldInfos.
Clone();
358 fieldInfos =
new FieldInfos();
363 if (reader is SegmentReader)
365 SegmentReader segmentReader = (SegmentReader) reader;
366 FieldInfos readerFieldInfos = segmentReader.FieldInfos();
367 int numReaderFieldInfos = readerFieldInfos.Size();
368 for (
int j = 0; j < numReaderFieldInfos; j++)
370 FieldInfo fi = readerFieldInfos.FieldInfo(j);
371 fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
376 AddIndexed(reader, fieldInfos, reader.GetFieldNames(
FieldOption.TERMVECTOR_WITH_POSITION_OFFSET),
true,
true,
true,
false,
false);
377 AddIndexed(reader, fieldInfos, reader.GetFieldNames(
FieldOption.TERMVECTOR_WITH_POSITION),
true,
true,
false,
false,
false);
378 AddIndexed(reader, fieldInfos, reader.GetFieldNames(
FieldOption.TERMVECTOR_WITH_OFFSET),
true,
false,
true,
false,
false);
379 AddIndexed(reader, fieldInfos, reader.GetFieldNames(
FieldOption.TERMVECTOR),
true,
false,
false,
false,
false);
380 AddIndexed(reader, fieldInfos, reader.GetFieldNames(
FieldOption.OMIT_TERM_FREQ_AND_POSITIONS),
false,
false,
false,
false,
true);
381 AddIndexed(reader, fieldInfos, reader.GetFieldNames(
FieldOption.STORES_PAYLOADS),
false,
false,
false,
true,
false);
382 AddIndexed(reader, fieldInfos, reader.GetFieldNames(
FieldOption.INDEXED),
false,
false,
false,
false,
false);
383 fieldInfos.Add(reader.GetFieldNames(
FieldOption.UNINDEXED),
false);
386 fieldInfos.Write(directory, segment +
".fnm");
390 SetMatchingSegmentReaders();
395 FieldsWriter fieldsWriter =
new FieldsWriter(directory, segment, fieldInfos);
402 SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
403 FieldsReader matchingFieldsReader = null;
404 if (matchingSegmentReader != null)
406 FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
407 if (fieldsReader != null && fieldsReader.CanReadRawDocs())
409 matchingFieldsReader = fieldsReader;
412 if (reader.HasDeletions)
414 docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader);
418 docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader);
424 fieldsWriter.Dispose();
427 System.String fileName = segment +
"." + IndexFileNames.FIELDS_INDEX_EXTENSION;
428 long fdxFileLength = directory.FileLength(fileName);
430 if (4 + ((
long) docCount) * 8 != fdxFileLength)
436 throw new System.SystemException(
"mergeFields produced an invalid result: docCount is " + docCount +
" but fdx file size is " + fdxFileLength +
" file=" + fileName +
" file exists?=" + directory.FileExists(fileName) +
"; now aborting this merge to prevent index corruption");
445 docCount += reader.NumDocs();
452 private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter,
IndexReader reader, FieldsReader matchingFieldsReader)
455 int maxDoc = reader.MaxDoc;
456 if (matchingFieldsReader != null)
459 for (
int j = 0; j < maxDoc; )
461 if (reader.IsDeleted(j))
469 int start = j, numDocs = 0;
476 if (reader.IsDeleted(j))
482 while (numDocs < MAX_RAW_MERGE_DOCS);
484 IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
485 fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
487 checkAbort.Work(300 * numDocs);
492 for (
int j = 0; j < maxDoc; j++)
494 if (reader.IsDeleted(j))
502 fieldsWriter.AddDocument(doc);
504 checkAbort.Work(300);
510 private int CopyFieldsNoDeletions(FieldsWriter fieldsWriter,
IndexReader reader, FieldsReader matchingFieldsReader)
512 int maxDoc = reader.MaxDoc;
514 if (matchingFieldsReader != null)
517 while (docCount < maxDoc)
519 int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
520 IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
521 fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
523 checkAbort.Work(300 * len);
528 for (; docCount < maxDoc; docCount++)
532 Document doc = reader.Document(docCount);
533 fieldsWriter.AddDocument(doc);
534 checkAbort.Work(300);
542 private void MergeVectors()
544 TermVectorsWriter termVectorsWriter =
new TermVectorsWriter(directory, segment, fieldInfos);
551 SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
552 TermVectorsReader matchingVectorsReader = null;
553 if (matchingSegmentReader != null)
555 TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();
558 if (vectorsReader != null && vectorsReader.CanReadRawDocs())
560 matchingVectorsReader = vectorsReader;
563 if (reader.HasDeletions)
565 CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
569 CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
575 termVectorsWriter.Dispose();
578 System.String fileName = segment +
"." + IndexFileNames.VECTORS_INDEX_EXTENSION;
579 long tvxSize = directory.FileLength(fileName);
581 if (4 + ((
long) mergedDocs) * 16 != tvxSize)
587 throw new System.SystemException(
"mergeVectors produced an invalid result: mergedDocs is " + mergedDocs +
" but tvx size is " + tvxSize +
" file=" + fileName +
" file exists?=" + directory.FileExists(fileName) +
"; now aborting this merge to prevent index corruption");
590 private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader,
IndexReader reader)
592 int maxDoc = reader.MaxDoc;
593 if (matchingVectorsReader != null)
596 for (
int docNum = 0; docNum < maxDoc; )
598 if (reader.IsDeleted(docNum))
606 int start = docNum, numDocs = 0;
611 if (docNum >= maxDoc)
613 if (reader.IsDeleted(docNum))
619 while (numDocs < MAX_RAW_MERGE_DOCS);
621 matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
622 termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
623 checkAbort.Work(300 * numDocs);
628 for (
int docNum = 0; docNum < maxDoc; docNum++)
630 if (reader.IsDeleted(docNum))
638 ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
639 termVectorsWriter.AddAllDocVectors(vectors);
640 checkAbort.Work(300);
645 private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader,
IndexReader reader)
647 int maxDoc = reader.MaxDoc;
648 if (matchingVectorsReader != null)
652 while (docCount < maxDoc)
654 int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
655 matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
656 termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
658 checkAbort.Work(300 * len);
663 for (
int docNum = 0; docNum < maxDoc; docNum++)
667 ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
668 termVectorsWriter.AddAllDocVectors(vectors);
669 checkAbort.Work(300);
674 private SegmentMergeQueue queue = null;
676 private void MergeTerms()
679 SegmentWriteState state =
new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval);
681 FormatPostingsFieldsConsumer consumer =
new FormatPostingsFieldsWriter(state, fieldInfos);
685 queue =
new SegmentMergeQueue(readers.Count);
687 MergeTermInfos(consumer);
697 internal bool omitTermFreqAndPositions;
699 private void MergeTermInfos(FormatPostingsFieldsConsumer consumer)
701 int base_Renamed = 0;
702 int readerCount = readers.Count;
703 for (
int i = 0; i < readerCount; i++)
707 SegmentMergeInfo smi =
new SegmentMergeInfo(base_Renamed, termEnum, reader);
708 int[] docMap = smi.GetDocMap();
713 docMaps =
new int[readerCount][];
714 delCounts =
new int[readerCount];
717 delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs();
720 base_Renamed += reader.NumDocs();
722 System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount);
731 SegmentMergeInfo[] match =
new SegmentMergeInfo[readers.Count];
733 System.String currentField = null;
734 FormatPostingsTermsConsumer termsConsumer = null;
736 while (queue.Size() > 0)
739 match[matchSize++] = queue.Pop();
740 Term term = match[0].term;
741 SegmentMergeInfo top = queue.Top();
743 while (top != null && term.CompareTo(top.term) == 0)
745 match[matchSize++] = queue.Pop();
749 if ((System.Object) currentField != (System.Object) term.Field)
751 currentField = term.Field;
752 if (termsConsumer != null)
753 termsConsumer.Finish();
754 FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
755 termsConsumer = consumer.AddField(fieldInfo);
756 omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
759 int df = AppendPostings(termsConsumer, match, matchSize);
761 checkAbort.Work(df / 3.0);
763 while (matchSize > 0)
765 SegmentMergeInfo smi = match[--matchSize];
775 private byte[] payloadBuffer;
776 private int[][] docMaps;
777 internal int[][] GetDocMaps()
781 private int[] delCounts;
782 internal int[] GetDelCounts()
800 private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis,
int n)
803 FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text);
805 for (
int i = 0; i < n; i++)
807 SegmentMergeInfo smi = smis[i];
809 System.Diagnostics.Debug.Assert(postings != null);
810 int base_Renamed = smi.base_Renamed;
811 int[] docMap = smi.GetDocMap();
812 postings.Seek(smi.termEnum);
814 while (postings.Next())
817 int doc = postings.Doc;
822 int freq = postings.Freq;
823 FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);
825 if (!omitTermFreqAndPositions)
827 for (
int j = 0; j < freq; j++)
829 int position = postings.NextPosition();
830 int payloadLength = postings.PayloadLength;
831 if (payloadLength > 0)
833 if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
834 payloadBuffer =
new byte[payloadLength];
835 postings.GetPayload(payloadBuffer, 0);
837 posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
839 posConsumer.Finish();
843 docConsumer.Finish();
848 private void MergeNorms()
850 byte[] normBuffer = null;
854 int numFieldInfos = fieldInfos.Size();
855 for (
int i = 0; i < numFieldInfos; i++)
857 FieldInfo fi = fieldInfos.FieldInfo(i);
858 if (fi.isIndexed && !fi.omitNorms)
862 output = directory.CreateOutput(segment +
"." + IndexFileNames.NORMS_EXTENSION);
863 output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
867 int maxDoc = reader.MaxDoc;
868 if (normBuffer == null || normBuffer.Length < maxDoc)
871 normBuffer =
new byte[maxDoc];
873 reader.Norms(fi.name, normBuffer, 0);
874 if (!reader.HasDeletions)
877 output.WriteBytes(normBuffer, maxDoc);
883 for (
int k = 0; k < maxDoc; k++)
885 if (!reader.IsDeleted(k))
887 output.WriteByte(normBuffer[k]);
891 checkAbort.Work(maxDoc);
905 internal class CheckAbort
907 private double workCount;
908 private MergePolicy.OneMerge merge;
910 public CheckAbort(MergePolicy.OneMerge merge,
Directory dir)
923 public virtual void Work(
double units)
926 if (workCount >= 10000.0)
928 merge.CheckAborted(dir);