Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
OpenBitSetIterator.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
19 
20 namespace Lucene.Net.Util
21 {
22 
23  /// <summary>An iterator to iterate over set bits in an OpenBitSet.
24  /// This is faster than nextSetBit() for iterating over the complete set of bits,
25  /// especially when the density of the bits set is high.
26  ///
27  /// </summary>
28  /// <version> $Id$
29  /// </version>
31  {
32 
33  // The General Idea: instead of having an array per byte that has
34  // the offsets of the next set bit, that array could be
35  // packed inside a 32 bit integer (8 4 bit numbers). That
36  // should be faster than accessing an array for each index, and
37  // the total array size is kept smaller (256*sizeof(int))=1K
38  // NOTE: Removed protected access for CLS-Compliance
39  /*protected*/ internal static readonly uint[] bitlist = new uint[]
40  {
41  0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41,
42  0x42, 0x421, 0x43, 0x431, 0x432, 0x4321, 0x5, 0x51,
43  0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, 0x54, 0x541,
44  0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6,
45  0x61, 0x62, 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64,
46  0x641, 0x642, 0x6421, 0x643, 0x6431, 0x6432, 0x64321,
47  0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532,
48  0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543,
49  0x65431, 0x65432, 0x654321, 0x7, 0x71, 0x72, 0x721,
50  0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421,
51  0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752,
52  0x7521, 0x753, 0x7531, 0x7532, 0x75321, 0x754, 0x7541,
53  0x7542, 0x75421, 0x7543, 0x75431, 0x75432, 0x754321,
54  0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632,
55  0x76321, 0x764, 0x7641, 0x7642, 0x76421, 0x7643,
56  0x76431, 0x76432, 0x764321, 0x765, 0x7651, 0x7652,
57  0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654,
58  0x76541, 0x76542, 0x765421, 0x76543, 0x765431, 0x765432
59  , 0x7654321, 0x8, 0x81, 0x82, 0x821, 0x83, 0x831, 0x832
60  , 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431,
61  0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853,
62  0x8531, 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421
63  , 0x8543, 0x85431, 0x85432, 0x854321, 0x86, 0x861,
64  0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864,
65  0x8641, 0x8642, 0x86421, 0x8643, 0x86431, 0x86432,
66  0x864321, 0x865, 0x8651, 0x8652, 0x86521, 0x8653,
67  0x86531, 0x86532, 0x865321, 0x8654, 0x86541, 0x86542,
68  0x865421, 0x86543, 0x865431, 0x865432, 0x8654321, 0x87,
69  0x871, 0x872, 0x8721, 0x873, 0x8731, 0x8732, 0x87321,
70  0x874, 0x8741, 0x8742, 0x87421, 0x8743, 0x87431,
71  0x87432, 0x874321, 0x875, 0x8751, 0x8752, 0x87521,
72  0x8753, 0x87531, 0x87532, 0x875321, 0x8754, 0x87541,
73  0x87542, 0x875421, 0x87543, 0x875431, 0x875432,
74  0x8754321, 0x876, 0x8761, 0x8762, 0x87621, 0x8763,
75  0x87631, 0x87632, 0x876321, 0x8764, 0x87641, 0x87642,
76  0x876421, 0x87643, 0x876431, 0x876432, 0x8764321,
77  0x8765, 0x87651, 0x87652, 0x876521, 0x87653, 0x876531,
78  0x876532, 0x8765321, 0x87654,
79  0x876541, 0x876542, 0x8765421, 0x876543, 0x8765431,
80  0x8765432, 0x87654321
81  };
82  /// <summary>** the python code that generated bitlist
83  /// def bits2int(val):
84  /// arr=0
85  /// for shift in range(8,0,-1):
86  /// if val &amp; 0x80:
87  /// arr = (arr &lt;&lt; 4) | shift
88  /// val = val &lt;&lt; 1
89  /// return arr
90  /// def int_table():
91  /// tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
92  /// return ','.join(tbl)
93  /// ****
94  /// </summary>
95 
96  // hmmm, what about an iterator that finds zeros though,
97  // or a reverse iterator... should they be separate classes
98  // for efficiency, or have a common root interface? (or
99  // maybe both? could ask for a SetBitsIterator, etc...
100 
101  private readonly long[] arr;
102  private readonly int words;
103  private int i = - 1;
104  private long word;
105  private int wordShift;
106  private int indexArray;
107  private int curDocId = - 1;
108 
109  public OpenBitSetIterator(OpenBitSet obs):this(obs.Bits, obs.NumWords)
110  {
111  }
112 
113  public OpenBitSetIterator(long[] bits, int numWords)
114  {
115  arr = bits;
116  words = numWords;
117  }
118 
119  // 64 bit shifts
120  private void Shift()
121  {
122  if ((int) word == 0)
123  {
124  wordShift += 32; word = (long) ((ulong) word >> 32);
125  }
126  if ((word & 0x0000FFFF) == 0)
127  {
128  wordShift += 16; word = (long) ((ulong) word >> 16);
129  }
130  if ((word & 0x000000FF) == 0)
131  {
132  wordShift += 8; word = (long) ((ulong) word >> 8);
133  }
134  indexArray = (int) bitlist[word & 0xff];
135  }
136 
137  /*/// <summary>** alternate shift implementations
138  /// // 32 bit shifts, but a long shift needed at the end
139  /// private void shift2() {
140  /// int y = (int)word;
141  /// if (y==0) {wordShift +=32; y = (int)(word >>>32); }
142  /// if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
143  /// if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
144  /// indexArray = bitlist[y & 0xff];
145  /// word >>>= (wordShift +1);
146  /// }
147  /// private void shift3() {
148  /// int lower = (int)word;
149  /// int lowByte = lower & 0xff;
150  /// if (lowByte != 0) {
151  /// indexArray=bitlist[lowByte];
152  /// return;
153  /// }
154  /// shift();
155  /// }
156  /// ****
157  /// </summary>*/
158 
159  public override int NextDoc()
160  {
161  if (indexArray == 0)
162  {
163  if (word != 0)
164  {
165  word = (long) ((ulong) word >> 8);
166  wordShift += 8;
167  }
168 
169  while (word == 0)
170  {
171  if (++i >= words)
172  {
173  return curDocId = NO_MORE_DOCS;
174  }
175  word = arr[i];
176  wordShift = - 1; // loop invariant code motion should move this
177  }
178 
179  // after the first time, should I go with a linear search, or
180  // stick with the binary search in shift?
181  Shift();
182  }
183 
184  int bitIndex = (indexArray & 0x0f) + wordShift;
185  indexArray = (int) ((uint) indexArray >> 4);
186  // should i<<6 be cached as a separate variable?
187  // it would only save one cycle in the best circumstances.
188  return curDocId = (i << 6) + bitIndex;
189  }
190 
191  public override int Advance(int target)
192  {
193  indexArray = 0;
194  i = target >> 6;
195  if (i >= words)
196  {
197  word = 0; // setup so next() will also return -1
198  return curDocId = NO_MORE_DOCS;
199  }
200  wordShift = target & 0x3f;
201  word = (long) ((ulong) arr[i] >> wordShift);
202  if (word != 0)
203  {
204  wordShift--; // compensate for 1 based arrIndex
205  }
206  else
207  {
208  while (word == 0)
209  {
210  if (++i >= words)
211  {
212  return curDocId = NO_MORE_DOCS;
213  }
214  word = arr[i];
215  }
216  wordShift = - 1;
217  }
218 
219  Shift();
220 
221  int bitIndex = (indexArray & 0x0f) + wordShift;
222  indexArray = (int) ((uint) indexArray >> 4);
223  // should i<<6 be cached as a separate variable?
224  // it would only save one cycle in the best circumstances.
225  return curDocId = (i << 6) + bitIndex;
226  }
227 
228  public override int DocID()
229  {
230  return curDocId;
231  }
232  }
233 }