Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
FieldInfos.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Documents;
20 using Lucene.Net.Support;
21 using Document = Lucene.Net.Documents.Document;
22 using Directory = Lucene.Net.Store.Directory;
23 using IndexInput = Lucene.Net.Store.IndexInput;
24 using IndexOutput = Lucene.Net.Store.IndexOutput;
25 using StringHelper = Lucene.Net.Util.StringHelper;
26 
27 namespace Lucene.Net.Index
28 {
29 
30  /// <summary>Access to the Fieldable Info file that describes document fields and whether or
31  /// not they are indexed. Each segment has a separate Fieldable Info file. Objects
32  /// of this class are thread-safe for multiple readers, but only one thread can
33  /// be adding documents at a time, with no other reader or writer threads
34  /// accessing this object.
35  /// </summary>
36  public sealed class FieldInfos : ICloneable
37  {
38 
39  // Used internally (ie not written to *.fnm files) for pre-2.9 files
40  public const int FORMAT_PRE = - 1;
41 
42  // First used in 2.9; prior to 2.9 there was no format header
43  public const int FORMAT_START = - 2;
44 
45  internal static readonly int CURRENT_FORMAT = FORMAT_START;
46 
47  internal const byte IS_INDEXED = (0x1);
48  internal const byte STORE_TERMVECTOR = (0x2);
49  internal const byte STORE_POSITIONS_WITH_TERMVECTOR =(0x4);
50  internal const byte STORE_OFFSET_WITH_TERMVECTOR = (0x8);
51  internal const byte OMIT_NORMS = (0x10);
52  internal const byte STORE_PAYLOADS = (0x20);
53  internal const byte OMIT_TERM_FREQ_AND_POSITIONS = (0x40);
54 
55  private readonly System.Collections.Generic.List<FieldInfo> byNumber = new System.Collections.Generic.List<FieldInfo>();
56  private readonly HashMap<string, FieldInfo> byName = new HashMap<string, FieldInfo>();
57  private int format;
58 
59  public /*internal*/ FieldInfos()
60  {
61  }
62 
63  /// <summary> Construct a FieldInfos object using the directory and the name of the file
64  /// IndexInput
65  /// </summary>
66  /// <param name="d">The directory to open the IndexInput from
67  /// </param>
68  /// <param name="name">The name of the file to open the IndexInput from in the Directory
69  /// </param>
70  /// <throws> IOException </throws>
71  public /*internal*/ FieldInfos(Directory d, String name)
72  {
73  IndexInput input = d.OpenInput(name);
74  try
75  {
76  try
77  {
78  Read(input, name);
79  }
80  catch (System.IO.IOException)
81  {
82  if (format == FORMAT_PRE)
83  {
84  // LUCENE-1623: FORMAT_PRE (before there was a
85  // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
86  // encoding; retry with input set to pre-utf8
87  input.Seek(0);
88  input.SetModifiedUTF8StringsMode();
89  byNumber.Clear();
90  byName.Clear();
91 
92  bool rethrow = false;
93  try
94  {
95  Read(input, name);
96  }
97  catch (Exception)
98  {
99  // Ignore any new exception & set to throw original IOE
100  rethrow = true;
101  }
102  if(rethrow)
103  {
104  // Preserve stack trace
105  throw;
106  }
107  }
108  else
109  {
110  // The IOException cannot be caused by
111  // LUCENE-1623, so re-throw it
112  throw;
113  }
114  }
115  }
116  finally
117  {
118  input.Close();
119  }
120  }
121 
122  /// <summary> Returns a deep clone of this FieldInfos instance.</summary>
123  public Object Clone()
124  {
125  lock (this)
126  {
127  var fis = new FieldInfos();
128  int numField = byNumber.Count;
129  for (int i = 0; i < numField; i++)
130  {
131  var fi = (FieldInfo)byNumber[i].Clone();
132  fis.byNumber.Add(fi);
133  fis.byName[fi.name] = fi;
134  }
135  return fis;
136  }
137  }
138 
139  /// <summary>Adds field info for a Document. </summary>
140  public void Add(Document doc)
141  {
142  lock (this)
143  {
144  System.Collections.Generic.IList<IFieldable> fields = doc.GetFields();
145  foreach(IFieldable field in fields)
146  {
147  Add(field.Name, field.IsIndexed, field.IsTermVectorStored,
149  false, field.OmitTermFreqAndPositions);
150  }
151  }
152  }
153 
154  /// <summary>Returns true if any fields do not omitTermFreqAndPositions </summary>
155  internal bool HasProx()
156  {
157  int numFields = byNumber.Count;
158  for (int i = 0; i < numFields; i++)
159  {
160  FieldInfo fi = FieldInfo(i);
161  if (fi.isIndexed && !fi.omitTermFreqAndPositions)
162  {
163  return true;
164  }
165  }
166  return false;
167  }
168 
169  /// <summary> Add fields that are indexed. Whether they have termvectors has to be specified.
170  ///
171  /// </summary>
172  /// <param name="names">The names of the fields
173  /// </param>
174  /// <param name="storeTermVectors">Whether the fields store term vectors or not
175  /// </param>
176  /// <param name="storePositionWithTermVector">true if positions should be stored.
177  /// </param>
178  /// <param name="storeOffsetWithTermVector">true if offsets should be stored
179  /// </param>
180  public void AddIndexed(System.Collections.Generic.ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
181  {
182  lock (this)
183  {
184  foreach(string name in names)
185  {
186  Add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
187  }
188  }
189  }
190 
191  /// <summary> Assumes the fields are not storing term vectors.
192  ///
193  /// </summary>
194  /// <param name="names">The names of the fields
195  /// </param>
196  /// <param name="isIndexed">Whether the fields are indexed or not
197  ///
198  /// </param>
199  /// <seealso cref="Add(String, bool)">
200  /// </seealso>
201  public void Add(System.Collections.Generic.ICollection<string> names, bool isIndexed)
202  {
203  lock (this)
204  {
205  foreach(string name in names)
206  {
207  Add(name, isIndexed);
208  }
209  }
210  }
211 
212  /// <summary> Calls 5 parameter add with false for all TermVector parameters.
213  ///
214  /// </summary>
215  /// <param name="name">The name of the Fieldable
216  /// </param>
217  /// <param name="isIndexed">true if the field is indexed
218  /// </param>
219  /// <seealso cref="Add(String, bool, bool, bool, bool)">
220  /// </seealso>
221  public void Add(String name, bool isIndexed)
222  {
223  lock (this)
224  {
225  Add(name, isIndexed, false, false, false, false);
226  }
227  }
228 
229  /// <summary> Calls 5 parameter add with false for term vector positions and offsets.
230  ///
231  /// </summary>
232  /// <param name="name">The name of the field
233  /// </param>
234  /// <param name="isIndexed"> true if the field is indexed
235  /// </param>
236  /// <param name="storeTermVector">true if the term vector should be stored
237  /// </param>
238  public void Add(System.String name, bool isIndexed, bool storeTermVector)
239  {
240  lock (this)
241  {
242  Add(name, isIndexed, storeTermVector, false, false, false);
243  }
244  }
245 
246  /// <summary>If the field is not yet known, adds it. If it is known, checks to make
247  /// sure that the isIndexed flag is the same as was given previously for this
248  /// field. If not - marks it as being indexed. Same goes for the TermVector
249  /// parameters.
250  ///
251  /// </summary>
252  /// <param name="name">The name of the field
253  /// </param>
254  /// <param name="isIndexed">true if the field is indexed
255  /// </param>
256  /// <param name="storeTermVector">true if the term vector should be stored
257  /// </param>
258  /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
259  /// </param>
260  /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
261  /// </param>
262  public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
263  {
264  lock (this)
265  {
266 
267  Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
268  }
269  }
270 
271  /// <summary>If the field is not yet known, adds it. If it is known, checks to make
272  /// sure that the isIndexed flag is the same as was given previously for this
273  /// field. If not - marks it as being indexed. Same goes for the TermVector
274  /// parameters.
275  ///
276  /// </summary>
277  /// <param name="name">The name of the field
278  /// </param>
279  /// <param name="isIndexed">true if the field is indexed
280  /// </param>
281  /// <param name="storeTermVector">true if the term vector should be stored
282  /// </param>
283  /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
284  /// </param>
285  /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
286  /// </param>
287  /// <param name="omitNorms">true if the norms for the indexed field should be omitted
288  /// </param>
289  public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
290  {
291  lock (this)
292  {
293  Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false);
294  }
295  }
296 
297  /// <summary>If the field is not yet known, adds it. If it is known, checks to make
298  /// sure that the isIndexed flag is the same as was given previously for this
299  /// field. If not - marks it as being indexed. Same goes for the TermVector
300  /// parameters.
301  ///
302  /// </summary>
303  /// <param name="name">The name of the field
304  /// </param>
305  /// <param name="isIndexed">true if the field is indexed
306  /// </param>
307  /// <param name="storeTermVector">true if the term vector should be stored
308  /// </param>
309  /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
310  /// </param>
311  /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
312  /// </param>
313  /// <param name="omitNorms">true if the norms for the indexed field should be omitted
314  /// </param>
315  /// <param name="storePayloads">true if payloads should be stored for this field
316  /// </param>
317  /// <param name="omitTermFreqAndPositions">true if term freqs should be omitted for this field
318  /// </param>
319  public FieldInfo Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
320  {
321  lock (this)
322  {
323  FieldInfo fi = FieldInfo(name);
324  if (fi == null)
325  {
326  return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
327  }
328  else
329  {
330  fi.Update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
331  }
332  return fi;
333  }
334  }
335 
336  private FieldInfo AddInternal(String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
337  {
338  name = StringHelper.Intern(name);
339  var fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
340  byNumber.Add(fi);
341  byName[name] = fi;
342  return fi;
343  }
344 
345  public int FieldNumber(System.String fieldName)
346  {
347  FieldInfo fi = FieldInfo(fieldName);
348  return (fi != null)?fi.number:- 1;
349  }
350 
351  public FieldInfo FieldInfo(System.String fieldName)
352  {
353  return byName[fieldName];
354  }
355 
356  /// <summary> Return the fieldName identified by its number.
357  ///
358  /// </summary>
359  /// <param name="fieldNumber">
360  /// </param>
361  /// <returns> the fieldName or an empty string when the field
362  /// with the given number doesn't exist.
363  /// </returns>
364  public System.String FieldName(int fieldNumber)
365  {
366  FieldInfo fi = FieldInfo(fieldNumber);
367  return (fi != null) ? fi.name : "";
368  }
369 
370  /// <summary> Return the fieldinfo object referenced by the fieldNumber.</summary>
371  /// <param name="fieldNumber">
372  /// </param>
373  /// <returns> the FieldInfo object or null when the given fieldNumber
374  /// doesn't exist.
375  /// </returns>
376  public FieldInfo FieldInfo(int fieldNumber)
377  {
378  return (fieldNumber >= 0) ? byNumber[fieldNumber] : null;
379  }
380 
381  public int Size()
382  {
383  return byNumber.Count;
384  }
385 
386  public bool HasVectors()
387  {
388  bool hasVectors = false;
389  for (int i = 0; i < Size(); i++)
390  {
391  if (FieldInfo(i).storeTermVector)
392  {
393  hasVectors = true;
394  break;
395  }
396  }
397  return hasVectors;
398  }
399 
400  public void Write(Directory d, System.String name)
401  {
402  IndexOutput output = d.CreateOutput(name);
403  try
404  {
405  Write(output);
406  }
407  finally
408  {
409  output.Close();
410  }
411  }
412 
413  public void Write(IndexOutput output)
414  {
415  output.WriteVInt(CURRENT_FORMAT);
416  output.WriteVInt(Size());
417  for (int i = 0; i < Size(); i++)
418  {
419  FieldInfo fi = FieldInfo(i);
420  var bits = (byte) (0x0);
421  if (fi.isIndexed)
422  bits |= IS_INDEXED;
423  if (fi.storeTermVector)
424  bits |= STORE_TERMVECTOR;
425  if (fi.storePositionWithTermVector)
426  bits |= STORE_POSITIONS_WITH_TERMVECTOR;
427  if (fi.storeOffsetWithTermVector)
428  bits |= STORE_OFFSET_WITH_TERMVECTOR;
429  if (fi.omitNorms)
430  bits |= OMIT_NORMS;
431  if (fi.storePayloads)
432  bits |= STORE_PAYLOADS;
433  if (fi.omitTermFreqAndPositions)
434  bits |= OMIT_TERM_FREQ_AND_POSITIONS;
435 
436  output.WriteString(fi.name);
437  output.WriteByte(bits);
438  }
439  }
440 
441  private void Read(IndexInput input, String fileName)
442  {
443  int firstInt = input.ReadVInt();
444 
445  if (firstInt < 0)
446  {
447  // This is a real format
448  format = firstInt;
449  }
450  else
451  {
452  format = FORMAT_PRE;
453  }
454 
455  if (format != FORMAT_PRE & format != FORMAT_START)
456  {
457  throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
458  }
459 
460  int size;
461  if (format == FORMAT_PRE)
462  {
463  size = firstInt;
464  }
465  else
466  {
467  size = input.ReadVInt(); //read in the size
468  }
469 
470  for (int i = 0; i < size; i++)
471  {
472  String name = StringHelper.Intern(input.ReadString());
473  byte bits = input.ReadByte();
474  bool isIndexed = (bits & IS_INDEXED) != 0;
475  bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
476  bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
477  bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
478  bool omitNorms = (bits & OMIT_NORMS) != 0;
479  bool storePayloads = (bits & STORE_PAYLOADS) != 0;
480  bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
481 
482  AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
483  }
484 
485  if (input.FilePointer != input.Length())
486  {
487  throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length());
488  }
489  }
490  }
491 }