Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
SegmentTermPositions.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 using IndexInput = Lucene.Net.Store.IndexInput;
21 
22 namespace Lucene.Net.Index
23 {
24  internal sealed class SegmentTermPositions : SegmentTermDocs, TermPositions
25  {
26  private IndexInput proxStream;
27  private int proxCount;
28  private int position;
29 
30  // the current payload length
31  private int payloadLength;
32  // indicates whether the payload of the currend position has
33  // been read from the proxStream yet
34  private bool needToLoadPayload;
35 
36  // these variables are being used to remember information
37  // for a lazy skip
38  private long lazySkipPointer = - 1;
39  private int lazySkipProxCount = 0;
40 
41  internal SegmentTermPositions(SegmentReader p):base(p)
42  {
43  this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time
44  }
45 
46  internal override void Seek(TermInfo ti, Term term)
47  {
48  base.Seek(ti, term);
49  if (ti != null)
50  lazySkipPointer = ti.proxPointer;
51 
52  lazySkipProxCount = 0;
53  proxCount = 0;
54  payloadLength = 0;
55  needToLoadPayload = false;
56  }
57 
58  protected override void Dispose(bool disposing)
59  {
60  base.Dispose(disposing);
61  if (proxStream != null)
62  proxStream.Dispose();
63  }
64 
65  public int NextPosition()
66  {
67  if (currentFieldOmitTermFreqAndPositions)
68  // This field does not store term freq, positions, payloads
69  return 0;
70  // perform lazy skips if neccessary
71  LazySkip();
72  proxCount--;
73  return position += ReadDeltaPosition();
74  }
75 
76  private int ReadDeltaPosition()
77  {
78  int delta = proxStream.ReadVInt();
79  if (currentFieldStoresPayloads)
80  {
81  // if the current field stores payloads then
82  // the position delta is shifted one bit to the left.
83  // if the LSB is set, then we have to read the current
84  // payload length
85  if ((delta & 1) != 0)
86  {
87  payloadLength = proxStream.ReadVInt();
88  }
89  delta = Number.URShift(delta, 1);
90  needToLoadPayload = true;
91  }
92  return delta;
93  }
94 
95  protected internal override void SkippingDoc()
96  {
97  // we remember to skip a document lazily
98  lazySkipProxCount += freq;
99  }
100 
101  public override bool Next()
102  {
103  // we remember to skip the remaining positions of the current
104  // document lazily
105  lazySkipProxCount += proxCount;
106 
107  if (base.Next())
108  {
109  // run super
110  proxCount = freq; // note frequency
111  position = 0; // reset position
112  return true;
113  }
114  return false;
115  }
116 
117  public override int Read(int[] docs, int[] freqs)
118  {
119  throw new System.NotSupportedException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
120  }
121 
122 
123  /// <summary>Called by super.skipTo(). </summary>
124  protected internal override void SkipProx(long proxPointer, int payloadLength)
125  {
126  // we save the pointer, we might have to skip there lazily
127  lazySkipPointer = proxPointer;
128  lazySkipProxCount = 0;
129  proxCount = 0;
130  this.payloadLength = payloadLength;
131  needToLoadPayload = false;
132  }
133 
134  private void SkipPositions(int n)
135  {
136  System.Diagnostics.Debug.Assert(!currentFieldOmitTermFreqAndPositions);
137  for (int f = n; f > 0; f--)
138  {
139  // skip unread positions
140  ReadDeltaPosition();
141  SkipPayload();
142  }
143  }
144 
145  private void SkipPayload()
146  {
147  if (needToLoadPayload && payloadLength > 0)
148  {
149  proxStream.Seek(proxStream.FilePointer + payloadLength);
150  }
151  needToLoadPayload = false;
152  }
153 
154  // It is not always neccessary to move the prox pointer
155  // to a new document after the freq pointer has been moved.
156  // Consider for example a phrase query with two terms:
157  // the freq pointer for term 1 has to move to document x
158  // to answer the question if the term occurs in that document. But
159  // only if term 2 also matches document x, the positions have to be
160  // read to figure out if term 1 and term 2 appear next
161  // to each other in document x and thus satisfy the query.
162  // So we move the prox pointer lazily to the document
163  // as soon as positions are requested.
164  private void LazySkip()
165  {
166  if (proxStream == null)
167  {
168  // clone lazily
169  proxStream = (IndexInput) parent.core.proxStream.Clone();
170  }
171 
172  // we might have to skip the current payload
173  // if it was not read yet
174  SkipPayload();
175 
176  if (lazySkipPointer != - 1)
177  {
178  proxStream.Seek(lazySkipPointer);
179  lazySkipPointer = - 1;
180  }
181 
182  if (lazySkipProxCount != 0)
183  {
184  SkipPositions(lazySkipProxCount);
185  lazySkipProxCount = 0;
186  }
187  }
188 
189  public int PayloadLength
190  {
191  get { return payloadLength; }
192  }
193 
194  public byte[] GetPayload(byte[] data, int offset)
195  {
196  if (!needToLoadPayload)
197  {
198  throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
199  }
200 
201  // read payloads lazily
202  byte[] retArray;
203  int retOffset;
204  if (data == null || data.Length - offset < payloadLength)
205  {
206  // the array is too small to store the payload data,
207  // so we allocate a new one
208  retArray = new byte[payloadLength];
209  retOffset = 0;
210  }
211  else
212  {
213  retArray = data;
214  retOffset = offset;
215  }
216  proxStream.ReadBytes(retArray, retOffset, payloadLength);
217  needToLoadPayload = false;
218  return retArray;
219  }
220 
221  public bool IsPayloadAvailable
222  {
223  get { return needToLoadPayload && payloadLength > 0; }
224  }
225  }
226 }