Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
SortedVIntList.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 using DocIdSet = Lucene.Net.Search.DocIdSet;
21 using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
22 
23 namespace Lucene.Net.Util
24 {
25 
26  /// <summary> Stores and iterate on sorted integers in compressed form in RAM. <br/>
27  /// The code for compressing the differences between ascending integers was
28  /// borrowed from <see cref="Lucene.Net.Store.IndexInput" /> and
29  /// <see cref="Lucene.Net.Store.IndexOutput" />.<p/>
30  /// <b>NOTE:</b> this class assumes the stored integers are doc Ids (hence why it
31  /// extends <see cref="DocIdSet" />). Therefore its <see cref="Iterator()" /> assumes <see cref="DocIdSetIterator.NO_MORE_DOCS" />
32  /// can be used as sentinel. If you intent to use
33  /// this value, then make sure it's not used during search flow.
34  /// </summary>
35  public class SortedVIntList:DocIdSet
36  {
37  private class AnonymousClassDocIdSetIterator:DocIdSetIterator
38  {
39  public AnonymousClassDocIdSetIterator(SortedVIntList enclosingInstance)
40  {
41  InitBlock(enclosingInstance);
42  }
43  private void InitBlock(SortedVIntList enclosingInstance)
44  {
45  this.enclosingInstance = enclosingInstance;
46  }
47  private SortedVIntList enclosingInstance;
48  public SortedVIntList Enclosing_Instance
49  {
50  get
51  {
52  return enclosingInstance;
53  }
54 
55  }
56  internal int bytePos = 0;
57  internal int lastInt = 0;
58  internal int doc = - 1;
59 
60  private void Advance()
61  {
62  // See Lucene.Net.Store.IndexInput.readVInt()
63  sbyte b = Enclosing_Instance.bytes[bytePos++];
64  lastInt += (b & Lucene.Net.Util.SortedVIntList.VB1);
65  for (int s = Lucene.Net.Util.SortedVIntList.BIT_SHIFT; (b & ~ Lucene.Net.Util.SortedVIntList.VB1) != 0; s += Lucene.Net.Util.SortedVIntList.BIT_SHIFT)
66  {
67  b = Enclosing_Instance.bytes[bytePos++];
68  lastInt += ((b & Lucene.Net.Util.SortedVIntList.VB1) << s);
69  }
70  }
71 
72  public override int DocID()
73  {
74  return doc;
75  }
76 
77  public override int NextDoc()
78  {
79  if (bytePos >= Enclosing_Instance.lastBytePos)
80  {
81  doc = NO_MORE_DOCS;
82  }
83  else
84  {
85  Advance();
86  doc = lastInt;
87  }
88  return doc;
89  }
90 
91  public override int Advance(int target)
92  {
93  while (bytePos < Enclosing_Instance.lastBytePos)
94  {
95  Advance();
96  if (lastInt >= target)
97  {
98  return doc = lastInt;
99  }
100  }
101  return doc = NO_MORE_DOCS;
102  }
103  }
104  /// <summary>When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
105  /// a SortedVIntList representing the index numbers of the set bits
106  /// will be smaller than that BitSet.
107  /// </summary>
108  internal const int BITS2VINTLIST_SIZE = 8;
109 
110  private int size;
111  private sbyte[] bytes;
112  private int lastBytePos;
113 
114  /// <summary> Create a SortedVIntList from all elements of an array of integers.
115  ///
116  /// </summary>
117  /// <param name="sortedInts"> A sorted array of non negative integers.
118  /// </param>
119  public SortedVIntList(params int[] sortedInts):this(sortedInts, sortedInts.Length)
120  {
121  }
122 
123  /// <summary> Create a SortedVIntList from an array of integers.</summary>
124  /// <param name="sortedInts"> An array of sorted non negative integers.
125  /// </param>
126  /// <param name="inputSize"> The number of integers to be used from the array.
127  /// </param>
128  public SortedVIntList(int[] sortedInts, int inputSize)
129  {
130  SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
131  for (int i = 0; i < inputSize; i++)
132  {
133  builder.AddInt(sortedInts[i]);
134  }
135  builder.Done();
136  }
137 
138  /// <summary> Create a SortedVIntList from a BitSet.</summary>
139  /// <param name="bits"> A bit set representing a set of integers.
140  /// </param>
141  public SortedVIntList(System.Collections.BitArray bits)
142  {
143  SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
144  int nextInt = BitSetSupport.NextSetBit(bits, 0);
145  while (nextInt != - 1)
146  {
147  builder.AddInt(nextInt);
148  nextInt = BitSetSupport.NextSetBit(bits, nextInt + 1);
149  }
150  builder.Done();
151  }
152 
153  /// <summary> Create a SortedVIntList from an OpenBitSet.</summary>
154  /// <param name="bits"> A bit set representing a set of integers.
155  /// </param>
157  {
158  SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
159  int nextInt = bits.NextSetBit(0);
160  while (nextInt != - 1)
161  {
162  builder.AddInt(nextInt);
163  nextInt = bits.NextSetBit(nextInt + 1);
164  }
165  builder.Done();
166  }
167 
168  /// <summary> Create a SortedVIntList.</summary>
169  /// <param name="docIdSetIterator"> An iterator providing document numbers as a set of integers.
170  /// This DocIdSetIterator is iterated completely when this constructor
171  /// is called and it must provide the integers in non
172  /// decreasing order.
173  /// </param>
174  public SortedVIntList(DocIdSetIterator docIdSetIterator)
175  {
176  SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
177  int doc;
178  while ((doc = docIdSetIterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
179  {
180  builder.AddInt(doc);
181  }
182  builder.Done();
183  }
184 
185 
186  private class SortedVIntListBuilder
187  {
188  private void InitBlock(SortedVIntList enclosingInstance)
189  {
190  this.enclosingInstance = enclosingInstance;
191  }
192  private SortedVIntList enclosingInstance;
193  public SortedVIntList Enclosing_Instance
194  {
195  get
196  {
197  return enclosingInstance;
198  }
199 
200  }
201  private int lastInt = 0;
202 
203  internal SortedVIntListBuilder(SortedVIntList enclosingInstance)
204  {
205  InitBlock(enclosingInstance);
206  Enclosing_Instance.InitBytes();
207  lastInt = 0;
208  }
209 
210  internal virtual void AddInt(int nextInt)
211  {
212  int diff = nextInt - lastInt;
213  if (diff < 0)
214  {
215  throw new System.ArgumentException("Input not sorted or first element negative.");
216  }
217 
218  if ((Enclosing_Instance.lastBytePos + Enclosing_Instance.MAX_BYTES_PER_INT) > Enclosing_Instance.bytes.Length)
219  {
220  // biggest possible int does not fit
221  Enclosing_Instance.ResizeBytes((Enclosing_Instance.bytes.Length * 2) + Enclosing_Instance.MAX_BYTES_PER_INT);
222  }
223 
224  // See Lucene.Net.Store.IndexOutput.writeVInt()
225  while ((diff & ~ Lucene.Net.Util.SortedVIntList.VB1) != 0)
226  {
227  // The high bit of the next byte needs to be set.
228  Enclosing_Instance.bytes[Enclosing_Instance.lastBytePos++] = (sbyte) ((diff & Lucene.Net.Util.SortedVIntList.VB1) | ~ Lucene.Net.Util.SortedVIntList.VB1);
229  diff = Number.URShift(diff, Lucene.Net.Util.SortedVIntList.BIT_SHIFT);
230  }
231  Enclosing_Instance.bytes[Enclosing_Instance.lastBytePos++] = (sbyte) diff; // Last byte, high bit not set.
232  Enclosing_Instance.size++;
233  lastInt = nextInt;
234  }
235 
236  internal virtual void Done()
237  {
238  Enclosing_Instance.ResizeBytes(Enclosing_Instance.lastBytePos);
239  }
240  }
241 
242 
243  private void InitBytes()
244  {
245  size = 0;
246  bytes = new sbyte[128]; // initial byte size
247  lastBytePos = 0;
248  }
249 
250  private void ResizeBytes(int newSize)
251  {
252  if (newSize != bytes.Length)
253  {
254  sbyte[] newBytes = new sbyte[newSize];
255  Array.Copy(bytes, 0, newBytes, 0, lastBytePos);
256  bytes = newBytes;
257  }
258  }
259 
260  private const int VB1 = 0x7F;
261  private const int BIT_SHIFT = 7;
262  private int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
263 
264  /// <value> The total number of sorted integers. </value>
265  public virtual int Size
266  {
267  get { return size; }
268  }
269 
270  /// <value> The size of the byte array storing the compressed sorted integers. </value>
271  public virtual int ByteSize
272  {
273  get { return bytes.Length; }
274  }
275 
276  /// <summary>This DocIdSet implementation is cacheable. </summary>
277  public override bool IsCacheable
278  {
279  get { return true; }
280  }
281 
282  /// <returns> An iterator over the sorted integers.
283  /// </returns>
284  public override DocIdSetIterator Iterator()
285  {
286  return new AnonymousClassDocIdSetIterator(this);
287  }
288  }
289 }