Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
FormatPostingsDocsWriter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using IndexOutput = Lucene.Net.Store.IndexOutput;
20 using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
21 
22 namespace Lucene.Net.Index
23 {
24  /// <summary>Consumes doc and freq, writing them using the current
25  /// index file format
26  /// </summary>
28  {
29 
30  internal IndexOutput out_Renamed;
31  internal FormatPostingsTermsWriter parent;
32  internal FormatPostingsPositionsWriter posWriter;
33  internal DefaultSkipListWriter skipListWriter;
34  internal int skipInterval;
35  internal int totalNumDocs;
36 
37  internal bool omitTermFreqAndPositions;
38  internal bool storePayloads;
39  internal long freqStart;
40  internal FieldInfo fieldInfo;
41 
43  {
44  this.parent = parent;
45  System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION);
46  state.flushedFiles.Add(fileName);
47  out_Renamed = parent.parent.dir.CreateOutput(fileName);
48  totalNumDocs = parent.parent.totalNumDocs;
49 
50  // TODO: abstraction violation
51  skipInterval = parent.parent.termsOut.skipInterval;
52  skipListWriter = parent.parent.skipListWriter;
53  skipListWriter.SetFreqOutput(out_Renamed);
54 
55  posWriter = new FormatPostingsPositionsWriter(state, this);
56  }
57 
58  internal void SetField(FieldInfo fieldInfo)
59  {
60  this.fieldInfo = fieldInfo;
61  omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
62  storePayloads = fieldInfo.storePayloads;
63  posWriter.SetField(fieldInfo);
64  }
65 
66  internal int lastDocID;
67  internal int df;
68 
69  /// <summary>Adds a new doc in this term. If this returns null
70  /// then we just skip consuming positions/payloads.
71  /// </summary>
72  internal override FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq)
73  {
74 
75  int delta = docID - lastDocID;
76 
77  if (docID < 0 || (df > 0 && delta <= 0))
78  throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
79 
80  if ((++df % skipInterval) == 0)
81  {
82  // TODO: abstraction violation
83  skipListWriter.SetSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
84  skipListWriter.BufferSkip(df);
85  }
86 
87  System.Diagnostics.Debug.Assert(docID < totalNumDocs, "docID=" + docID + " totalNumDocs=" + totalNumDocs);
88 
89  lastDocID = docID;
90  if (omitTermFreqAndPositions)
91  out_Renamed.WriteVInt(delta);
92  else if (1 == termDocFreq)
93  out_Renamed.WriteVInt((delta << 1) | 1);
94  else
95  {
96  out_Renamed.WriteVInt(delta << 1);
97  out_Renamed.WriteVInt(termDocFreq);
98  }
99 
100  return posWriter;
101  }
102 
103  private TermInfo termInfo = new TermInfo(); // minimize consing
104  internal UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
105 
106  /// <summary>Called when we are done adding docs to this term </summary>
107  internal override void Finish()
108  {
109  long skipPointer = skipListWriter.WriteSkip(out_Renamed);
110 
111  // TODO: this is abstraction violation -- we should not
112  // peek up into parents terms encoding format
113  termInfo.Set(df, parent.freqStart, parent.proxStart, (int) (skipPointer - parent.freqStart));
114 
115  // TODO: we could do this incrementally
116  UnicodeUtil.UTF16toUTF8(parent.currentTerm, parent.currentTermStart, utf8);
117 
118  if (df > 0)
119  {
120  parent.termsOut.Add(fieldInfo.number, utf8.result, utf8.length, termInfo);
121  }
122 
123  lastDocID = 0;
124  df = 0;
125  }
126 
127  public void Dispose()
128  {
129  // Move to protected method if class becomes unsealed
130  out_Renamed.Dispose();
131  posWriter.Dispose();
132  }
133  }
134 }