Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
TermVectorsTermsWriter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using IndexOutput = Lucene.Net.Store.IndexOutput;
21 using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
22 using ArrayUtil = Lucene.Net.Util.ArrayUtil;
23 
24 namespace Lucene.Net.Index
25 {
27  {
28  private void InitBlock()
29  {
30  docFreeList = new PerDoc[1];
31  }
32 
33  internal DocumentsWriter docWriter;
34  internal TermVectorsWriter termVectorsWriter;
35  internal PerDoc[] docFreeList;
36  internal int freeCount;
37  internal IndexOutput tvx;
38  internal IndexOutput tvd;
39  internal IndexOutput tvf;
40  internal int lastDocID;
41 
43  {
44  InitBlock();
45  this.docWriter = docWriter;
46  }
47 
48  public override TermsHashConsumerPerThread AddThread(TermsHashPerThread termsHashPerThread)
49  {
50  return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
51  }
52 
53  internal override void CreatePostings(RawPostingList[] postings, int start, int count)
54  {
55  int end = start + count;
56  for (int i = start; i < end; i++)
57  postings[i] = new PostingList();
58  }
59 
60  public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
61  {
62  lock (this)
63  {
64  // NOTE: it's possible that all documents seen in this segment
65  // hit non-aborting exceptions, in which case we will
66  // not have yet init'd the TermVectorsWriter. This is
67  // actually OK (unlike in the stored fields case)
68  // because, although IieldInfos.hasVectors() will return
69  // true, the TermVectorsReader gracefully handles
70  // non-existence of the term vectors files.
71  if (tvx != null)
72  {
73 
74  if (state.numDocsInStore > 0)
75  // In case there are some final documents that we
76  // didn't see (because they hit a non-aborting exception):
77  Fill(state.numDocsInStore - docWriter.DocStoreOffset);
78 
79  tvx.Flush();
80  tvd.Flush();
81  tvf.Flush();
82  }
83 
84  foreach(var entry in threadsAndFields)
85  {
86  foreach(var field in entry.Value)
87  {
89  perField.termsHashPerField.Reset();
90  perField.ShrinkHash();
91  }
92 
94  perThread.termsHashPerThread.Reset(true);
95  }
96  }
97  }
98 
99  internal override void CloseDocStore(SegmentWriteState state)
100  {
101  lock (this)
102  {
103  if (tvx != null)
104  {
105  // At least one doc in this run had term vectors
106  // enabled
107  Fill(state.numDocsInStore - docWriter.DocStoreOffset);
108  tvx.Close();
109  tvf.Close();
110  tvd.Close();
111  tvx = null;
112  System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
113  System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
114  if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName))
115  throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
116 
117  state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
118  state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
119  state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
120 
121  docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
122  docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
123  docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
124 
125  lastDocID = 0;
126  }
127  }
128  }
129 
130  internal int allocCount;
131 
132  internal PerDoc GetPerDoc()
133  {
134  lock (this)
135  {
136  if (freeCount == 0)
137  {
138  allocCount++;
139  if (allocCount > docFreeList.Length)
140  {
141  // Grow our free list up front to make sure we have
142  // enough space to recycle all outstanding PerDoc
143  // instances
144  System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
145  docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
146  }
147  return new PerDoc(this);
148  }
149  else
150  return docFreeList[--freeCount];
151  }
152  }
153 
154  /// <summary>Fills in no-term-vectors for all docs we haven't seen
155  /// since the last doc that had term vectors.
156  /// </summary>
157  internal void Fill(int docID)
158  {
159  int docStoreOffset = docWriter.DocStoreOffset;
160  int end = docID + docStoreOffset;
161  if (lastDocID < end)
162  {
163  long tvfPosition = tvf.FilePointer;
164  while (lastDocID < end)
165  {
166  tvx.WriteLong(tvd.FilePointer);
167  tvd.WriteVInt(0);
168  tvx.WriteLong(tvfPosition);
169  lastDocID++;
170  }
171  }
172  }
173 
174  internal void InitTermVectorsWriter()
175  {
176  lock (this)
177  {
178  if (tvx == null)
179  {
180 
181  System.String docStoreSegment = docWriter.DocStoreSegment;
182 
183  if (docStoreSegment == null)
184  return ;
185 
186  System.Diagnostics.Debug.Assert(docStoreSegment != null);
187 
188  // If we hit an exception while init'ing the term
189  // vector output files, we must abort this segment
190  // because those files will be in an unknown
191  // state:
192  tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
193  tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
194  tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
195 
196  tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
197  tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
198  tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
199 
200  docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
201  docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
202  docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
203 
204  lastDocID = 0;
205  }
206  }
207  }
208 
209  internal void FinishDocument(PerDoc perDoc)
210  {
211  lock (this)
212  {
213 
214  System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start"));
215 
216  InitTermVectorsWriter();
217 
218  Fill(perDoc.docID);
219 
220  // Append term vectors to the real outputs:
221  tvx.WriteLong(tvd.FilePointer);
222  tvx.WriteLong(tvf.FilePointer);
223  tvd.WriteVInt(perDoc.numVectorFields);
224  if (perDoc.numVectorFields > 0)
225  {
226  for (int i = 0; i < perDoc.numVectorFields; i++)
227  tvd.WriteVInt(perDoc.fieldNumbers[i]);
228  System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]);
229  long lastPos = perDoc.fieldPointers[0];
230  for (int i = 1; i < perDoc.numVectorFields; i++)
231  {
232  long pos = perDoc.fieldPointers[i];
233  tvd.WriteVLong(pos - lastPos);
234  lastPos = pos;
235  }
236  perDoc.perDocTvf.WriteTo(tvf);
237  perDoc.numVectorFields = 0;
238  }
239 
240  System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset);
241 
242  lastDocID++;
243  perDoc.Reset();
244  Free(perDoc);
245  System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end"));
246  }
247  }
248 
249  public bool FreeRAM()
250  {
251  // We don't hold any state beyond one doc, so we don't
252  // free persistent RAM here
253  return false;
254  }
255 
256  public override void Abort()
257  {
258  if (tvx != null)
259  {
260  try
261  {
262  tvx.Close();
263  }
264  catch (System.Exception)
265  {
266  }
267  tvx = null;
268  }
269  if (tvd != null)
270  {
271  try
272  {
273  tvd.Close();
274  }
275  catch (System.Exception)
276  {
277  }
278  tvd = null;
279  }
280  if (tvf != null)
281  {
282  try
283  {
284  tvf.Close();
285  }
286  catch (System.Exception)
287  {
288  }
289  tvf = null;
290  }
291  lastDocID = 0;
292  }
293 
294  internal void Free(PerDoc doc)
295  {
296  lock (this)
297  {
298  System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
299  docFreeList[freeCount++] = doc;
300  }
301  }
302 
303  internal class PerDoc:DocumentsWriter.DocWriter
304  {
305  public PerDoc(TermVectorsTermsWriter enclosingInstance)
306  {
307  InitBlock(enclosingInstance);
308  }
309  private void InitBlock(TermVectorsTermsWriter enclosingInstance)
310  {
311  this.enclosingInstance = enclosingInstance;
312  buffer = enclosingInstance.docWriter.NewPerDocBuffer();
313  perDocTvf = new RAMOutputStream(buffer);
314  }
315  private TermVectorsTermsWriter enclosingInstance;
316  public TermVectorsTermsWriter Enclosing_Instance
317  {
318  get
319  {
320  return enclosingInstance;
321  }
322 
323  }
324 
325  internal DocumentsWriter.PerDocBuffer buffer;
326  internal RAMOutputStream perDocTvf;
327  internal int numVectorFields;
328 
329  internal int[] fieldNumbers = new int[1];
330  internal long[] fieldPointers = new long[1];
331 
332  internal void Reset()
333  {
334  perDocTvf.Reset();
335  buffer.Recycle();
336  numVectorFields = 0;
337  }
338 
339  public override void Abort()
340  {
341  Reset();
342  Enclosing_Instance.Free(this);
343  }
344 
345  internal void AddField(int fieldNumber)
346  {
347  if (numVectorFields == fieldNumbers.Length)
348  {
349  fieldNumbers = ArrayUtil.Grow(fieldNumbers);
350  fieldPointers = ArrayUtil.Grow(fieldPointers);
351  }
352  fieldNumbers[numVectorFields] = fieldNumber;
353  fieldPointers[numVectorFields] = perDocTvf.FilePointer;
354  numVectorFields++;
355  }
356 
357  public override long SizeInBytes()
358  {
359  return buffer.SizeInBytes;
360  }
361 
362  public override void Finish()
363  {
364  Enclosing_Instance.FinishDocument(this);
365  }
366  }
367 
368  internal sealed class PostingList:RawPostingList
369  {
370  internal int freq; // How many times this term occurred in the current doc
371  internal int lastOffset; // Last offset we saw
372  internal int lastPosition; // Last position where this term occurred
373  }
374 
375  internal override int BytesPerPosting()
376  {
377  return RawPostingList.BYTES_SIZE + 3 * DocumentsWriter.INT_NUM_BYTE;
378  }
379  }
380 }