Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
DocInverterPerField.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Analysis.Tokenattributes;
20 using Lucene.Net.Documents;
21 using TokenStream = Lucene.Net.Analysis.TokenStream;
22 
23 namespace Lucene.Net.Index
24 {
25 
26  /// <summary> Holds state for inverting all occurrences of a single
27  /// field in the document. This class doesn't do anything
28  /// itself; instead, it forwards the tokens produced by
29  /// analysis to its own consumer
30  /// (InvertedDocConsumerPerField). It also interacts with an
31  /// endConsumer (InvertedDocEndConsumerPerField).
32  /// </summary>
33 
35  {
36 
37  private DocInverterPerThread perThread;
38  private FieldInfo fieldInfo;
39  internal InvertedDocConsumerPerField consumer;
40  internal InvertedDocEndConsumerPerField endConsumer;
41  internal DocumentsWriter.DocState docState;
42  internal FieldInvertState fieldState;
43 
44  public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
45  {
46  this.perThread = perThread;
47  this.fieldInfo = fieldInfo;
48  docState = perThread.docState;
49  fieldState = perThread.fieldState;
50  this.consumer = perThread.consumer.AddField(this, fieldInfo);
51  this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
52  }
53 
54  public override void Abort()
55  {
56  consumer.Abort();
57  endConsumer.Abort();
58  }
59 
60  public override void ProcessFields(IFieldable[] fields, int count)
61  {
62 
63  fieldState.Reset(docState.doc.Boost);
64 
65  int maxFieldLength = docState.maxFieldLength;
66 
67  bool doInvert = consumer.Start(fields, count);
68 
69  for (int i = 0; i < count; i++)
70  {
71 
72  IFieldable field = fields[i];
73 
74  // TODO FI: this should be "genericized" to querying
75  // consumer if it wants to see this particular field
76  // tokenized.
77  if (field.IsIndexed && doInvert)
78  {
79 
80  bool anyToken;
81 
82  if (fieldState.length > 0)
83  fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
84 
85  if (!field.IsTokenized)
86  {
87  // un-tokenized field
88  System.String stringValue = field.StringValue;
89  int valueLength = stringValue.Length;
90  perThread.singleToken.Reinit(stringValue, 0, valueLength);
91  fieldState.attributeSource = perThread.singleToken;
92  consumer.Start(field);
93 
94  bool success = false;
95  try
96  {
97  consumer.Add();
98  success = true;
99  }
100  finally
101  {
102  if (!success)
103  docState.docWriter.SetAborting();
104  }
105  fieldState.offset += valueLength;
106  fieldState.length++;
107  fieldState.position++;
108  anyToken = valueLength > 0;
109  }
110  else
111  {
112  // tokenized field
113  TokenStream stream;
114  TokenStream streamValue = field.TokenStreamValue;
115 
116  if (streamValue != null)
117  stream = streamValue;
118  else
119  {
120  // the field does not have a TokenStream,
121  // so we have to obtain one from the analyzer
122  System.IO.TextReader reader; // find or make Reader
123  System.IO.TextReader readerValue = field.ReaderValue;
124 
125  if (readerValue != null)
126  reader = readerValue;
127  else
128  {
129  System.String stringValue = field.StringValue;
130  if (stringValue == null)
131  throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
132  perThread.stringReader.Init(stringValue);
133  reader = perThread.stringReader;
134  }
135 
136  // Tokenize field and add to postingTable
137  stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
138  }
139 
140  // reset the TokenStream to the first token
141  stream.Reset();
142 
143  int startLength = fieldState.length;
144 
145  try
146  {
147  int offsetEnd = fieldState.offset - 1;
148 
149  bool hasMoreTokens = stream.IncrementToken();
150 
151  fieldState.attributeSource = stream;
152 
153  IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute<IOffsetAttribute>();
154  IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute<IPositionIncrementAttribute>();
155 
156  consumer.Start(field);
157 
158  for (; ; )
159  {
160 
161  // If we hit an exception in stream.next below
162  // (which is fairly common, eg if analyzer
163  // chokes on a given document), then it's
164  // non-aborting and (above) this one document
165  // will be marked as deleted, but still
166  // consume a docID
167 
168  if (!hasMoreTokens)
169  break;
170 
171  int posIncr = posIncrAttribute.PositionIncrement;
172  fieldState.position += posIncr;
173  if (fieldState.position > 0)
174  {
175  fieldState.position--;
176  }
177 
178  if (posIncr == 0)
179  fieldState.numOverlap++;
180 
181  bool success = false;
182  try
183  {
184  // If we hit an exception in here, we abort
185  // all buffered documents since the last
186  // flush, on the likelihood that the
187  // internal state of the consumer is now
188  // corrupt and should not be flushed to a
189  // new segment:
190  consumer.Add();
191  success = true;
192  }
193  finally
194  {
195  if (!success)
196  docState.docWriter.SetAborting();
197  }
198  fieldState.position++;
199  offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
200  if (++fieldState.length >= maxFieldLength)
201  {
202  if (docState.infoStream != null)
203  docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
204  break;
205  }
206 
207  hasMoreTokens = stream.IncrementToken();
208  }
209  // trigger streams to perform end-of-stream operations
210  stream.End();
211 
212  fieldState.offset += offsetAttribute.EndOffset;
213  anyToken = fieldState.length > startLength;
214  }
215  finally
216  {
217  stream.Close();
218  }
219  }
220 
221  if (anyToken)
222  fieldState.offset += docState.analyzer.GetOffsetGap(field);
223  fieldState.boost *= field.Boost;
224  }
225 
226  // LUCENE-2387: don't hang onto the field, so GC can
227  // reclaim
228  fields[i] = null;
229  }
230 
231  consumer.Finish();
232  endConsumer.Finish();
233  }
234  }
235 }