Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
BitVector.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 using Directory = Lucene.Net.Store.Directory;
21 using IndexInput = Lucene.Net.Store.IndexInput;
22 using IndexOutput = Lucene.Net.Store.IndexOutput;
23 
24 namespace Lucene.Net.Util
25 {
26 
27  /// <summary>Optimized implementation of a vector of bits. This is more-or-less like
28  /// java.util.BitSet, but also includes the following:
29  /// <list type="bullet">
30  /// <item>a count() method, which efficiently computes the number of one bits;</item>
31  /// <item>optimized read from and write to disk;</item>
32  /// <item>inlinable get() method;</item>
33  /// <item>store and load, as bit set or d-gaps, depending on sparseness;</item>
34  /// </list>
35  /// </summary>
36  public sealed class BitVector : ICloneable
37  {
38 
39  private byte[] bits;
40  private int size;
41  private int count;
42 
43  /// <summary>Constructs a vector capable of holding <c>n</c> bits. </summary>
44  public BitVector(int n)
45  {
46  size = n;
47  bits = new byte[(size >> 3) + 1];
48  count = 0;
49  }
50 
51  internal BitVector(byte[] bits, int size)
52  {
53  this.bits = bits;
54  this.size = size;
55  count = -1;
56  }
57 
58  public System.Object Clone()
59  {
60  byte[] copyBits = new byte[bits.Length];
61  Array.Copy(bits, 0, copyBits, 0, bits.Length);
62  BitVector clone = new BitVector(copyBits, size);
63  clone.count = count;
64  return clone;
65  }
66 
67  /// <summary>Sets the value of <c>bit</c> to one. </summary>
68  public void Set(int bit)
69  {
70  if (bit >= size)
71  {
72  throw new System. IndexOutOfRangeException("Index of bound " + bit);
73  }
74  bits[bit >> 3] |= (byte) (1 << (bit & 7));
75  count = - 1;
76  }
77 
78  /// <summary>Sets the value of <c>bit</c> to true, and
79  /// returns true if bit was already set
80  /// </summary>
81  public bool GetAndSet(int bit)
82  {
83  if (bit >= size)
84  {
85  throw new System. IndexOutOfRangeException("Index of bound " + bit);
86  }
87  int pos = bit >> 3;
88  int v = bits[pos];
89  int flag = 1 << (bit & 7);
90  if ((flag & v) != 0)
91  return true;
92  else
93  {
94  bits[pos] = (byte) (v | flag);
95  if (count != - 1)
96  count++;
97  return false;
98  }
99  }
100 
101  /// <summary>Sets the value of <c>bit</c> to zero. </summary>
102  public void Clear(int bit)
103  {
104  if (bit >= size)
105  {
106  throw new System.IndexOutOfRangeException("Index of bound " + bit);
107  }
108  bits[bit >> 3] &= (byte) (~ (1 << (bit & 7)));
109  count = - 1;
110  }
111 
112  /// <summary>Returns <c>true</c> if <c>bit</c> is one and
113  /// <c>false</c> if it is zero.
114  /// </summary>
115  public bool Get(int bit)
116  {
117  System.Diagnostics.Debug.Assert(bit >= 0 && bit < size, "bit " + bit + " is out of bounds 0.." +(size - 1));
118  return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
119  }
120 
121  /// <summary>Returns the number of bits in this vector. This is also one greater than
122  /// the number of the largest valid bit number.
123  /// </summary>
124  public int Size()
125  {
126  return size;
127  }
128 
129  /// <summary>Returns the total number of one bits in this vector. This is efficiently
130  /// computed and cached, so that, if the vector is not changed, no
131  /// recomputation is done for repeated calls.
132  /// </summary>
133  public int Count()
134  {
135  // if the vector has been modified
136  if (count == - 1)
137  {
138  int c = 0;
139  int end = bits.Length;
140  for (int i = 0; i < end; i++)
141  c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
142  count = c;
143  }
144  return count;
145  }
146 
147  /// <summary>
148  /// For testing
149  /// </summary>
150  [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
151  public int GetRecomputedCount()
152  {
153  int c = 0;
154  int end = bits.Length;
155  for (int i = 0; i < end; i++)
156  c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
157  return c;
158  }
159 
160  private static readonly byte[] BYTE_COUNTS = new byte[]{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
161 
162 
163  /// <summary>Writes this vector to the file <c>name</c> in Directory
164  /// <c>d</c>, in a format that can be read by the constructor
165  /// <see cref="BitVector(Directory, String)" />.
166  /// </summary>
167  public void Write(Directory d, System.String name)
168  {
169  IndexOutput output = d.CreateOutput(name);
170  try
171  {
172  if (IsSparse())
173  {
174  WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps.
175  }
176  else
177  {
178  WriteBits(output);
179  }
180  }
181  finally
182  {
183  output.Close();
184  }
185  }
186 
187  /// <summary>Write as a bit set </summary>
188  private void WriteBits(IndexOutput output)
189  {
190  output.WriteInt(Size()); // write size
191  output.WriteInt(Count()); // write count
192  output.WriteBytes(bits, bits.Length);
193  }
194 
195  /// <summary>Write as a d-gaps list </summary>
196  private void WriteDgaps(IndexOutput output)
197  {
198  output.WriteInt(- 1); // mark using d-gaps
199  output.WriteInt(Size()); // write size
200  output.WriteInt(Count()); // write count
201  int last = 0;
202  int n = Count();
203  int m = bits.Length;
204  for (int i = 0; i < m && n > 0; i++)
205  {
206  if (bits[i] != 0)
207  {
208  output.WriteVInt(i - last);
209  output.WriteByte(bits[i]);
210  last = i;
211  n -= BYTE_COUNTS[bits[i] & 0xFF];
212  }
213  }
214  }
215 
216  /// <summary>Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. </summary>
217  private bool IsSparse()
218  {
219  // note: order of comparisons below set to favor smaller values (no binary range search.)
220  // note: adding 4 because we start with ((int) -1) to indicate d-gaps format.
221  // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore
222  // multiplying count by (8+8) or (8+16) or (8+24) etc.:
223  // - first 8 for writing bits[i] (1 byte vs. 1 bit), and
224  // - second part for writing the byte-number d-gap as vint.
225  // note: factor is for read/write of byte-arrays being faster than vints.
226  int factor = 10;
227  if (bits.Length < (1 << 7))
228  return factor * (4 + (8 + 8) * Count()) < Size();
229  if (bits.Length < (1 << 14))
230  return factor * (4 + (8 + 16) * Count()) < Size();
231  if (bits.Length < (1 << 21))
232  return factor * (4 + (8 + 24) * Count()) < Size();
233  if (bits.Length < (1 << 28))
234  return factor * (4 + (8 + 32) * Count()) < Size();
235  return factor * (4 + (8 + 40) * Count()) < Size();
236  }
237 
238  /// <summary>Constructs a bit vector from the file <c>name</c> in Directory
239  /// <c>d</c>, as written by the <see cref="Write" /> method.
240  /// </summary>
241  public BitVector(Directory d, System.String name)
242  {
243  IndexInput input = d.OpenInput(name);
244  try
245  {
246  size = input.ReadInt(); // read size
247  if (size == - 1)
248  {
249  ReadDgaps(input);
250  }
251  else
252  {
253  ReadBits(input);
254  }
255  }
256  finally
257  {
258  input.Close();
259  }
260  }
261 
262  /// <summary>Read as a bit set </summary>
263  private void ReadBits(IndexInput input)
264  {
265  count = input.ReadInt(); // read count
266  bits = new byte[(size >> 3) + 1]; // allocate bits
267  input.ReadBytes(bits, 0, bits.Length);
268  }
269 
270  /// <summary>read as a d-gaps list </summary>
271  private void ReadDgaps(IndexInput input)
272  {
273  size = input.ReadInt(); // (re)read size
274  count = input.ReadInt(); // read count
275  bits = new byte[(size >> 3) + 1]; // allocate bits
276  int last = 0;
277  int n = Count();
278  while (n > 0)
279  {
280  last += input.ReadVInt();
281  bits[last] = input.ReadByte();
282  n -= BYTE_COUNTS[bits[last] & 0xFF];
283  }
284  }
285 
286  /// <summary> Retrieve a subset of this BitVector.
287  ///
288  /// </summary>
289  /// <param name="start">starting index, inclusive
290  /// </param>
291  /// <param name="end">ending index, exclusive
292  /// </param>
293  /// <returns> subset
294  /// </returns>
295  public BitVector Subset(int start, int end)
296  {
297  if (start < 0 || end > Size() || end < start)
298  throw new System.IndexOutOfRangeException();
299  // Special case -- return empty vector is start == end
300  if (end == start)
301  return new BitVector(0);
302  byte[] bits = new byte[(Number.URShift((end - start - 1), 3)) + 1];
303  int s = Number.URShift(start, 3);
304  for (int i = 0; i < bits.Length; i++)
305  {
306  int cur = 0xFF & this.bits[i + s];
307  int next = i + s + 1 >= this.bits.Length?0:0xFF & this.bits[i + s + 1];
308  bits[i] = (byte) ((Number.URShift(cur, (start & 7))) | ((next << (8 - (start & 7)))));
309  }
310  int bitsToClear = (bits.Length * 8 - (end - start)) % 8;
311  bits[bits.Length - 1] &= (byte) (~ (0xFF << (8 - bitsToClear)));
312  return new BitVector(bits, end - start);
313  }
314  }
315 }