Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
DocFieldProcessorPerThread.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Documents;
20 using Lucene.Net.Support;
21 using Document = Lucene.Net.Documents.Document;
22 using ArrayUtil = Lucene.Net.Util.ArrayUtil;
23 
24 namespace Lucene.Net.Index
25 {
26 
27  /// <summary> Gathers all Fieldables for a document under the same
28  /// name, updates FieldInfos, and calls per-field consumers
29  /// to process field by field.
30  ///
31  /// Currently, only a single thread visits the fields,
32  /// sequentially, for processing.
33  /// </summary>
34 
36  {
37  private void InitBlock()
38  {
39  docFreeList = new PerDoc[1];
40  }
41 
42  internal float docBoost;
43  internal int fieldGen;
44  internal DocFieldProcessor docFieldProcessor;
45  internal FieldInfos fieldInfos;
46  internal DocFieldConsumerPerThread consumer;
47 
48  // Holds all fields seen in current doc
49  internal DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
50  internal int fieldCount;
51 
52  // Hash table for all fields ever seen
53  internal DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
54  internal int hashMask = 1;
55  internal int totalFieldCount;
56 
57  internal StoredFieldsWriterPerThread fieldsWriter;
58 
59  internal DocumentsWriter.DocState docState;
60 
62  {
63  InitBlock();
64  this.docState = threadState.docState;
65  this.docFieldProcessor = docFieldProcessor;
66  this.fieldInfos = docFieldProcessor.fieldInfos;
67  this.consumer = docFieldProcessor.consumer.AddThread(this);
68  fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState);
69  }
70 
71  public override void Abort()
72  {
73  for (int i = 0; i < fieldHash.Length; i++)
74  {
75  DocFieldProcessorPerField field = fieldHash[i];
76  while (field != null)
77  {
78  DocFieldProcessorPerField next = field.next;
79  field.Abort();
80  field = next;
81  }
82  }
83  fieldsWriter.Abort();
84  consumer.Abort();
85  }
86 
87  public System.Collections.Generic.ICollection<DocFieldConsumerPerField> Fields()
88  {
89  System.Collections.Generic.ICollection<DocFieldConsumerPerField> fields =
90  new System.Collections.Generic.HashSet<DocFieldConsumerPerField>();
91  for (int i = 0; i < fieldHash.Length; i++)
92  {
93  DocFieldProcessorPerField field = fieldHash[i];
94  while (field != null)
95  {
96  fields.Add(field.consumer);
97  field = field.next;
98  }
99  }
100  System.Diagnostics.Debug.Assert(fields.Count == totalFieldCount);
101  return fields;
102  }
103 
104  /// <summary>If there are fields we've seen but did not see again
105  /// in the last run, then free them up.
106  /// </summary>
107 
108  internal void TrimFields(SegmentWriteState state)
109  {
110 
111  for (int i = 0; i < fieldHash.Length; i++)
112  {
113  DocFieldProcessorPerField perField = fieldHash[i];
114  DocFieldProcessorPerField lastPerField = null;
115 
116  while (perField != null)
117  {
118 
119  if (perField.lastGen == - 1)
120  {
121 
122  // This field was not seen since the previous
123  // flush, so, free up its resources now
124 
125  // Unhash
126  if (lastPerField == null)
127  fieldHash[i] = perField.next;
128  else
129  lastPerField.next = perField.next;
130 
131  if (state.docWriter.infoStream != null)
132  state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name);
133 
134  totalFieldCount--;
135  }
136  else
137  {
138  // Reset
139  perField.lastGen = - 1;
140  lastPerField = perField;
141  }
142 
143  perField = perField.next;
144  }
145  }
146  }
147 
148  private void Rehash()
149  {
150  int newHashSize = (fieldHash.Length * 2);
151  System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length);
152 
153  DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize];
154 
155  // Rehash
156  int newHashMask = newHashSize - 1;
157  for (int j = 0; j < fieldHash.Length; j++)
158  {
159  DocFieldProcessorPerField fp0 = fieldHash[j];
160  while (fp0 != null)
161  {
162  int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask;
163  DocFieldProcessorPerField nextFP0 = fp0.next;
164  fp0.next = newHashArray[hashPos2];
165  newHashArray[hashPos2] = fp0;
166  fp0 = nextFP0;
167  }
168  }
169 
170  fieldHash = newHashArray;
171  hashMask = newHashMask;
172  }
173 
174  public override DocumentsWriter.DocWriter ProcessDocument()
175  {
176 
177  consumer.StartDocument();
178  fieldsWriter.StartDocument();
179 
180  Document doc = docState.doc;
181 
182  System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
183 
184  fieldCount = 0;
185 
186  int thisFieldGen = fieldGen++;
187 
188  System.Collections.Generic.IList<IFieldable> docFields = doc.GetFields();
189  int numDocFields = docFields.Count;
190 
191  // Absorb any new fields first seen in this document.
192  // Also absorb any changes to fields we had already
193  // seen before (eg suddenly turning on norms or
194  // vectors, etc.):
195 
196  for (int i = 0; i < numDocFields; i++)
197  {
198  IFieldable field = docFields[i];
199  System.String fieldName = field.Name;
200 
201  // Make sure we have a PerField allocated
202  int hashPos = fieldName.GetHashCode() & hashMask;
203  DocFieldProcessorPerField fp = fieldHash[hashPos];
204  while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
205  fp = fp.next;
206 
207  if (fp == null)
208  {
209 
210  // TODO FI: we need to genericize the "flags" that a
211  // field holds, and, how these flags are merged; it
212  // needs to be more "pluggable" such that if I want
213  // to have a new "thing" my Fields can do, I can
214  // easily add it
215  FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored,
217  field.OmitNorms, false, field.OmitTermFreqAndPositions);
218 
219  fp = new DocFieldProcessorPerField(this, fi);
220  fp.next = fieldHash[hashPos];
221  fieldHash[hashPos] = fp;
222  totalFieldCount++;
223 
224  if (totalFieldCount >= fieldHash.Length / 2)
225  Rehash();
226  }
227  else
228  {
229  fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored,
231  field.OmitNorms, false, field.OmitTermFreqAndPositions);
232  }
233 
234  if (thisFieldGen != fp.lastGen)
235  {
236 
237  // First time we're seeing this field for this doc
238  fp.fieldCount = 0;
239 
240  if (fieldCount == fields.Length)
241  {
242  int newSize = fields.Length * 2;
243  DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
244  Array.Copy(fields, 0, newArray, 0, fieldCount);
245  fields = newArray;
246  }
247 
248  fields[fieldCount++] = fp;
249  fp.lastGen = thisFieldGen;
250  }
251 
252  if (fp.fieldCount == fp.fields.Length)
253  {
254  IFieldable[] newArray = new IFieldable[fp.fields.Length * 2];
255  Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
256  fp.fields = newArray;
257  }
258 
259  fp.fields[fp.fieldCount++] = field;
260  if (field.IsStored)
261  {
262  fieldsWriter.AddField(field, fp.fieldInfo);
263  }
264  }
265 
266  // If we are writing vectors then we must visit
267  // fields in sorted order so they are written in
268  // sorted order. TODO: we actually only need to
269  // sort the subset of fields that have vectors
270  // enabled; we could save [small amount of] CPU
271  // here.
272  QuickSort(fields, 0, fieldCount - 1);
273 
274  for (int i = 0; i < fieldCount; i++)
275  fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
276 
277  if (docState.maxTermPrefix != null && docState.infoStream != null)
278  {
279  docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
280  docState.maxTermPrefix = null;
281  }
282 
283  DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
284  DocumentsWriter.DocWriter two = consumer.FinishDocument();
285  if (one == null)
286  {
287  return two;
288  }
289  else if (two == null)
290  {
291  return one;
292  }
293  else
294  {
295  PerDoc both = GetPerDoc();
296  both.docID = docState.docID;
297  System.Diagnostics.Debug.Assert(one.docID == docState.docID);
298  System.Diagnostics.Debug.Assert(two.docID == docState.docID);
299  both.one = one;
300  both.two = two;
301  return both;
302  }
303  }
304 
305  internal void QuickSort(DocFieldProcessorPerField[] array, int lo, int hi)
306  {
307  if (lo >= hi)
308  return ;
309  else if (hi == 1 + lo)
310  {
311  if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0)
312  {
313  DocFieldProcessorPerField tmp = array[lo];
314  array[lo] = array[hi];
315  array[hi] = tmp;
316  }
317  return ;
318  }
319 
320  int mid = Number.URShift((lo + hi), 1);
321 
322  if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
323  {
324  DocFieldProcessorPerField tmp = array[lo];
325  array[lo] = array[mid];
326  array[mid] = tmp;
327  }
328 
329  if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0)
330  {
331  DocFieldProcessorPerField tmp = array[mid];
332  array[mid] = array[hi];
333  array[hi] = tmp;
334 
335  if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
336  {
337  DocFieldProcessorPerField tmp2 = array[lo];
338  array[lo] = array[mid];
339  array[mid] = tmp2;
340  }
341  }
342 
343  int left = lo + 1;
344  int right = hi - 1;
345 
346  if (left >= right)
347  return ;
348 
349  DocFieldProcessorPerField partition = array[mid];
350 
351  for (; ; )
352  {
353  while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0)
354  --right;
355 
356  while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0)
357  ++left;
358 
359  if (left < right)
360  {
361  DocFieldProcessorPerField tmp = array[left];
362  array[left] = array[right];
363  array[right] = tmp;
364  --right;
365  }
366  else
367  {
368  break;
369  }
370  }
371 
372  QuickSort(array, lo, left);
373  QuickSort(array, left + 1, hi);
374  }
375 
376  internal PerDoc[] docFreeList;
377  internal int freeCount;
378  internal int allocCount;
379 
380  internal PerDoc GetPerDoc()
381  {
382  lock (this)
383  {
384  if (freeCount == 0)
385  {
386  allocCount++;
387  if (allocCount > docFreeList.Length)
388  {
389  // Grow our free list up front to make sure we have
390  // enough space to recycle all outstanding PerDoc
391  // instances
392  System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
393  docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
394  }
395  return new PerDoc(this);
396  }
397  else
398  return docFreeList[--freeCount];
399  }
400  }
401 
402  internal void FreePerDoc(PerDoc perDoc)
403  {
404  lock (this)
405  {
406  System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
407  docFreeList[freeCount++] = perDoc;
408  }
409  }
410 
411  internal class PerDoc:DocumentsWriter.DocWriter
412  {
413  public PerDoc(DocFieldProcessorPerThread enclosingInstance)
414  {
415  InitBlock(enclosingInstance);
416  }
417  private void InitBlock(DocFieldProcessorPerThread enclosingInstance)
418  {
419  this.enclosingInstance = enclosingInstance;
420  }
421  private DocFieldProcessorPerThread enclosingInstance;
422  public DocFieldProcessorPerThread Enclosing_Instance
423  {
424  get
425  {
426  return enclosingInstance;
427  }
428 
429  }
430 
431  internal DocumentsWriter.DocWriter one;
432  internal DocumentsWriter.DocWriter two;
433 
434  public override long SizeInBytes()
435  {
436  return one.SizeInBytes() + two.SizeInBytes();
437  }
438 
439  public override void Finish()
440  {
441  try
442  {
443  try
444  {
445  one.Finish();
446  }
447  finally
448  {
449  two.Finish();
450  }
451  }
452  finally
453  {
454  Enclosing_Instance.FreePerDoc(this);
455  }
456  }
457 
458  public override void Abort()
459  {
460  try
461  {
462  try
463  {
464  one.Abort();
465  }
466  finally
467  {
468  two.Abort();
469  }
470  }
471  finally
472  {
473  Enclosing_Instance.FreePerDoc(this);
474  }
475  }
476  }
477  }
478 }