24 namespace Lucene.Net.Index
31 internal const int FORMAT_VERSION = 2;
34 internal const int FORMAT_VERSION2 = 3;
37 internal const int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
40 internal static readonly
int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
43 internal const int FORMAT_SIZE = 4;
45 internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x1);
46 internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x2);
54 private int numTotalDocs;
58 private int docStoreOffset;
61 private bool isDisposed;
80 format = CheckValidFormat(tvx);
82 int tvdFormat = CheckValidFormat(tvd);
84 int tvfFormat = CheckValidFormat(tvf);
86 System.Diagnostics.Debug.Assert(format == tvdFormat);
87 System.Diagnostics.Debug.Assert(format == tvfFormat);
89 if (format >= FORMAT_VERSION2)
91 System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
92 numTotalDocs = (int)(tvx.Length() >> 4);
96 System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
97 numTotalDocs = (int)(tvx.Length() >> 3);
100 if (-1 == docStoreOffset)
102 this.docStoreOffset = 0;
103 this.size = numTotalDocs;
104 System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
108 this.docStoreOffset = docStoreOffset;
112 System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset,
"numTotalDocs=" + numTotalDocs +
" size=" + size +
" docStoreOffset=" + docStoreOffset);
125 this.fieldInfos = fieldInfos;
154 private void SeekTvx(
int docNum)
156 if (format < FORMAT_VERSION2)
157 tvx.Seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE);
159 tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
162 internal virtual bool CanReadRawDocs()
164 return format >= FORMAT_UTF8_LENGTH_IN_BYTES;
174 internal void RawDocs(
int[] tvdLengths,
int[] tvfLengths,
int startDocID,
int numDocs)
179 for (
int i = 0; i < tvdLengths.Length; i++)
183 for (
int i = 0; i < tvfLengths.Length; i++)
192 if (format < FORMAT_VERSION2)
193 throw new System.SystemException(
"cannot read raw docs with older term vector formats");
197 long tvdPosition = tvx.ReadLong();
198 tvd.Seek(tvdPosition);
200 long tvfPosition = tvx.ReadLong();
201 tvf.Seek(tvfPosition);
203 long lastTvdPosition = tvdPosition;
204 long lastTvfPosition = tvfPosition;
207 while (count < numDocs)
209 int docID = docStoreOffset + startDocID + count + 1;
210 System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
211 if (docID < numTotalDocs)
213 tvdPosition = tvx.ReadLong();
214 tvfPosition = tvx.ReadLong();
218 tvdPosition = tvd.Length();
219 tvfPosition = tvf.Length();
220 System.Diagnostics.Debug.Assert(count == numDocs - 1);
222 tvdLengths[count] = (int) (tvdPosition - lastTvdPosition);
223 tvfLengths[count] = (int) (tvfPosition - lastTvfPosition);
225 lastTvdPosition = tvdPosition;
226 lastTvfPosition = tvfPosition;
230 private int CheckValidFormat(
IndexInput in_Renamed)
232 int format = in_Renamed.ReadInt();
233 if (format > FORMAT_CURRENT)
235 throw new CorruptIndexException(
"Incompatible format version: " + format +
" expected " + FORMAT_CURRENT +
" or less");
240 public void Dispose()
245 protected virtual void Dispose(
bool disposing)
247 if (isDisposed)
return;
253 System.IO.IOException keep = null;
259 catch (System.IO.IOException e)
269 catch (System.IO.IOException e)
279 catch (System.IO.IOException e)
286 throw new System.IO.IOException(keep.StackTrace);
296 internal virtual int Size()
312 long tvdPosition = tvx.ReadLong();
314 tvd.Seek(tvdPosition);
315 int fieldCount = tvd.ReadVInt();
322 for (
int i = 0; i < fieldCount; i++)
324 if (format >= FORMAT_VERSION)
325 number = tvd.ReadVInt();
327 number += tvd.ReadVInt();
329 if (number == fieldNumber)
339 if (format >= FORMAT_VERSION2)
340 position = tvx.ReadLong();
342 position = tvd.ReadVLong();
343 for (
int i = 1; i <= found; i++)
344 position += tvd.ReadVLong();
347 ReadTermVector(field, position, mapper);
374 Get(docNum, field, mapper);
381 private System.String[] ReadFields(
int fieldCount)
384 System.String[] fields =
new System.String[fieldCount];
386 for (
int i = 0; i < fieldCount; i++)
388 if (format >= FORMAT_VERSION)
389 number = tvd.ReadVInt();
391 number += tvd.ReadVInt();
393 fields[i] = fieldInfos.
FieldName(number);
401 private long[] ReadTvfPointers(
int fieldCount)
405 if (format >= FORMAT_VERSION2)
406 position = tvx.ReadLong();
408 position = tvd.ReadVLong();
410 long[] tvfPointers =
new long[fieldCount];
411 tvfPointers[0] = position;
413 for (
int i = 1; i < fieldCount; i++)
415 position += tvd.ReadVLong();
416 tvfPointers[i] = position;
437 long tvdPosition = tvx.ReadLong();
439 tvd.Seek(tvdPosition);
440 int fieldCount = tvd.ReadVInt();
445 System.String[] fields = ReadFields(fieldCount);
446 long[] tvfPointers = ReadTvfPointers(fieldCount);
447 result = ReadTermVectors(docNum, fields, tvfPointers);
465 long tvdPosition = tvx.ReadLong();
467 tvd.Seek(tvdPosition);
468 int fieldCount = tvd.ReadVInt();
473 System.String[] fields = ReadFields(fieldCount);
474 long[] tvfPointers = ReadTvfPointers(fieldCount);
476 ReadTermVectors(fields, tvfPointers, mapper);
486 private SegmentTermVector[] ReadTermVectors(
int docNum, System.String[] fields,
long[] tvfPointers)
489 for (
int i = 0; i < fields.Length; i++)
492 mapper.SetDocumentNumber(docNum);
493 ReadTermVector(fields[i], tvfPointers[i], mapper);
499 private void ReadTermVectors(System.String[] fields,
long[] tvfPointers, TermVectorMapper mapper)
501 for (
int i = 0; i < fields.Length; i++)
503 ReadTermVector(fields[i], tvfPointers[i], mapper);
516 private void ReadTermVector(System.String field,
long tvfPointer, TermVectorMapper mapper)
521 tvf.Seek(tvfPointer);
523 int numTerms = tvf.ReadVInt();
532 if (format >= FORMAT_VERSION)
534 byte bits = tvf.ReadByte();
535 storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
536 storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
541 storePositions =
false;
542 storeOffsets =
false;
544 mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
550 bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;
555 charBuffer =
new char[10];
561 byteBuffer =
new byte[20];
564 for (
int i = 0; i < numTerms; i++)
566 start = tvf.ReadVInt();
567 deltaLength = tvf.ReadVInt();
568 totalLength = start + deltaLength;
575 if (charBuffer.Length < totalLength)
577 char[] newCharBuffer =
new char[(int) (1.5 * totalLength)];
578 Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
579 charBuffer = newCharBuffer;
581 tvf.ReadChars(charBuffer, start, deltaLength);
582 term =
new System.String(charBuffer, 0, totalLength);
587 if (byteBuffer.Length < totalLength)
589 byte[] newByteBuffer =
new byte[(int) (1.5 * totalLength)];
590 Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
591 byteBuffer = newByteBuffer;
593 tvf.ReadBytes(byteBuffer, start, deltaLength);
594 term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
596 int freq = tvf.ReadVInt();
597 int[] positions = null;
602 if (mapper.IsIgnoringPositions ==
false)
604 positions =
new int[freq];
605 int prevPosition = 0;
606 for (
int j = 0; j < freq; j++)
608 positions[j] = prevPosition + tvf.ReadVInt();
609 prevPosition = positions[j];
616 for (
int j = 0; j < freq; j++)
622 TermVectorOffsetInfo[] offsets = null;
626 if (mapper.IsIgnoringOffsets ==
false)
628 offsets =
new TermVectorOffsetInfo[freq];
630 for (
int j = 0; j < freq; j++)
632 int startOffset = prevOffset + tvf.ReadVInt();
633 int endOffset = startOffset + tvf.ReadVInt();
634 offsets[j] =
new TermVectorOffsetInfo(startOffset, endOffset);
635 prevOffset = endOffset;
640 for (
int j = 0; j < freq; j++)
647 mapper.Map(term, freq, offsets, positions);
651 public virtual System.Object Clone()
658 if (tvx != null && tvd != null && tvf != null)
674 private System.String[] terms;
675 private int[] termFreqs;
676 private int[][] positions;
678 private int currentPosition;
679 private bool storingOffsets;
680 private bool storingPositions;
681 private System.String field;
683 public override void SetExpectations(System.String field,
int numTerms,
bool storeOffsets,
bool storePositions)
686 terms =
new System.String[numTerms];
687 termFreqs =
new int[numTerms];
688 this.storingOffsets = storeOffsets;
689 this.storingPositions = storePositions;
691 this.positions =
new int[numTerms][];
698 terms[currentPosition] = term;
699 termFreqs[currentPosition] = frequency;
702 this.offsets[currentPosition] = offsets;
704 if (storingPositions)
706 this.positions[currentPosition] = positions;
717 if (field != null && terms != null)
719 if (storingPositions || storingOffsets)