Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
OpenBitSet.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 using DocIdSet = Lucene.Net.Search.DocIdSet;
21 using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
22 
23 namespace Lucene.Net.Util
24 {
25 
79 
80  [Serializable]
81  public class OpenBitSet:DocIdSet, System.ICloneable
82  {
83  protected internal long[] internalbits;
84  protected internal int wlen; // number of words (elements) used in the array
85 
91  public OpenBitSet(long numBits)
92  {
93  internalbits = new long[Bits2words(numBits)];
94  wlen = internalbits.Length;
95  }
96 
97  public OpenBitSet():this(64)
98  {
99  }
100 
114  public OpenBitSet(long[] bits, int numWords)
115  {
116  this.internalbits = bits;
117  this.wlen = numWords;
118  }
119 
120  public override DocIdSetIterator Iterator()
121  {
122  return new OpenBitSetIterator(internalbits, wlen);
123  }
124 
126  public override bool IsCacheable
127  {
128  get { return true; }
129  }
130 
132  public virtual long Capacity()
133  {
134  return internalbits.Length << 6;
135  }
136 
140  public virtual long Size()
141  {
142  return Capacity();
143  }
144 
146  public virtual bool IsEmpty()
147  {
148  return Cardinality() == 0;
149  }
150 
152  [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1819:PropertiesShouldNotReturnArrays")]
153  public virtual long[] Bits
154  {
155  set { this.internalbits = value; }
156  get { return internalbits; }
157  }
158 
160  public virtual int NumWords
161  {
162  get { return wlen; }
163  set { this.wlen = value; }
164  }
165 
166 
168  public virtual bool Get(int index)
169  {
170  int i = index >> 6; // div 64
171  // signed shift will keep a negative index and force an
172  // array-index-out-of-bounds-exception, removing the need for an explicit check.
173  if (i >= internalbits.Length)
174  return false;
175 
176  int bit = index & 0x3f; // mod 64
177  long bitmask = 1L << bit;
178  return (internalbits[i] & bitmask) != 0;
179  }
180 
181 
185  public virtual bool FastGet(int index)
186  {
187  int i = index >> 6; // div 64
188  // signed shift will keep a negative index and force an
189  // array-index-out-of-bounds-exception, removing the need for an explicit check.
190  int bit = index & 0x3f; // mod 64
191  long bitmask = 1L << bit;
192  return (internalbits[i] & bitmask) != 0;
193  }
194 
195 
196 
198  public virtual bool Get(long index)
199  {
200  int i = (int) (index >> 6); // div 64
201  if (i >= internalbits.Length)
202  return false;
203  int bit = (int) index & 0x3f; // mod 64
204  long bitmask = 1L << bit;
205  return (internalbits[i] & bitmask) != 0;
206  }
207 
211  public virtual bool FastGet(long index)
212  {
213  int i = (int) (index >> 6); // div 64
214  int bit = (int) index & 0x3f; // mod 64
215  long bitmask = 1L << bit;
216  return (internalbits[i] & bitmask) != 0;
217  }
218 
219  /*
220  // alternate implementation of get()
221  public boolean get1(int index) {
222  int i = index >> 6; // div 64
223  int bit = index & 0x3f; // mod 64
224  return ((bits[i]>>>bit) & 0x01) != 0;
225  // this does a long shift and a bittest (on x86) vs
226  // a long shift, and a long AND, (the test for zero is prob a no-op)
227  // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
228  }
229  */
230 
231 
235  public virtual int GetBit(int index)
236  {
237  int i = index >> 6; // div 64
238  int bit = index & 0x3f; // mod 64
239  return ((int )((ulong) (internalbits[i]) >> bit)) & 0x01;
240  }
241 
242 
243  /*
244  public boolean get2(int index) {
245  int word = index >> 6; // div 64
246  int bit = index & 0x0000003f; // mod 64
247  return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
248  // we could right shift and check for parity bit, if it was available to us.
249  }
250  */
251 
253  public virtual void Set(long index)
254  {
255  int wordNum = ExpandingWordNum(index);
256  int bit = (int) index & 0x3f;
257  long bitmask = 1L << bit;
258  internalbits[wordNum] |= bitmask;
259  }
260 
261 
265  public virtual void FastSet(int index)
266  {
267  int wordNum = index >> 6; // div 64
268  int bit = index & 0x3f; // mod 64
269  long bitmask = 1L << bit;
270  internalbits[wordNum] |= bitmask;
271  }
272 
276  public virtual void FastSet(long index)
277  {
278  int wordNum = (int) (index >> 6);
279  int bit = (int) index & 0x3f;
280  long bitmask = 1L << bit;
281  internalbits[wordNum] |= bitmask;
282  }
283 
291  public virtual void Set(long startIndex, long endIndex)
292  {
293  if (endIndex <= startIndex)
294  return ;
295 
296  int startWord = (int) (startIndex >> 6);
297 
298  // since endIndex is one past the end, this is index of the last
299  // word to be changed.
300  int endWord = ExpandingWordNum(endIndex - 1);
301 
302  long startmask = - 1L << (int) startIndex;
303  long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
304 
305  if (startWord == endWord)
306  {
307  internalbits[startWord] |= (startmask & endmask);
308  return ;
309  }
310 
311  internalbits[startWord] |= startmask;
312  for (int i = startWord + 1; i < endWord; i++)
313  internalbits[i] = -1L;
314  internalbits[endWord] |= endmask;
315  }
316 
317 
318 
319  protected internal virtual int ExpandingWordNum(long index)
320  {
321  int wordNum = (int) (index >> 6);
322  if (wordNum >= wlen)
323  {
324  EnsureCapacity(index + 1);
325  wlen = wordNum + 1;
326  }
327  return wordNum;
328  }
329 
330 
334  public virtual void FastClear(int index)
335  {
336  int wordNum = index >> 6;
337  int bit = index & 0x03f;
338  long bitmask = 1L << bit;
339  internalbits[wordNum] &= ~ bitmask;
340  // hmmm, it takes one more instruction to clear than it does to set... any
341  // way to work around this? If there were only 63 bits per word, we could
342  // use a right shift of 10111111...111 in binary to position the 0 in the
343  // correct place (using sign extension).
344  // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
345  // by the JVM into a native instruction.
346  // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
347  }
348 
352  public virtual void FastClear(long index)
353  {
354  int wordNum = (int) (index >> 6); // div 64
355  int bit = (int) index & 0x3f; // mod 64
356  long bitmask = 1L << bit;
357  internalbits[wordNum] &= ~ bitmask;
358  }
359 
361  public virtual void Clear(long index)
362  {
363  int wordNum = (int) (index >> 6); // div 64
364  if (wordNum >= wlen)
365  return ;
366  int bit = (int) index & 0x3f; // mod 64
367  long bitmask = 1L << bit;
368  internalbits[wordNum] &= ~ bitmask;
369  }
370 
378  public virtual void Clear(int startIndex, int endIndex)
379  {
380  if (endIndex <= startIndex)
381  return ;
382 
383  int startWord = (startIndex >> 6);
384  if (startWord >= wlen)
385  return ;
386 
387  // since endIndex is one past the end, this is index of the last
388  // word to be changed.
389  int endWord = ((endIndex - 1) >> 6);
390 
391  long startmask = - 1L << startIndex;
392  long endmask = (long) (0xffffffffffffffffUL >> - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
393 
394  // invert masks since we are clearing
395  startmask = ~ startmask;
396  endmask = ~ endmask;
397 
398  if (startWord == endWord)
399  {
400  internalbits[startWord] &= (startmask | endmask);
401  return ;
402  }
403 
404  internalbits[startWord] &= startmask;
405 
406  int middle = System.Math.Min(wlen, endWord);
407  for (int i = startWord + 1; i < middle; i++)
408  internalbits[i] = 0L;
409  if (endWord < wlen)
410  {
411  internalbits[endWord] &= endmask;
412  }
413  }
414 
415 
423  public virtual void Clear(long startIndex, long endIndex)
424  {
425  if (endIndex <= startIndex)
426  return ;
427 
428  int startWord = (int) (startIndex >> 6);
429  if (startWord >= wlen)
430  return ;
431 
432  // since endIndex is one past the end, this is index of the last
433  // word to be changed.
434  int endWord = (int) ((endIndex - 1) >> 6);
435 
436  long startmask = - 1L << (int) startIndex;
437  long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
438 
439  // invert masks since we are clearing
440  startmask = ~ startmask;
441  endmask = ~ endmask;
442 
443  if (startWord == endWord)
444  {
445  internalbits[startWord] &= (startmask | endmask);
446  return ;
447  }
448 
449  internalbits[startWord] &= startmask;
450 
451  int middle = System.Math.Min(wlen, endWord);
452  for (int i = startWord + 1; i < middle; i++)
453  internalbits[i] = 0L;
454  if (endWord < wlen)
455  {
456  internalbits[endWord] &= endmask;
457  }
458  }
459 
460 
461 
465  public virtual bool GetAndSet(int index)
466  {
467  int wordNum = index >> 6; // div 64
468  int bit = index & 0x3f; // mod 64
469  long bitmask = 1L << bit;
470  bool val = (internalbits[wordNum] & bitmask) != 0;
471  internalbits[wordNum] |= bitmask;
472  return val;
473  }
474 
478  public virtual bool GetAndSet(long index)
479  {
480  int wordNum = (int) (index >> 6); // div 64
481  int bit = (int) index & 0x3f; // mod 64
482  long bitmask = 1L << bit;
483  bool val = (internalbits[wordNum] & bitmask) != 0;
484  internalbits[wordNum] |= bitmask;
485  return val;
486  }
487 
491  public virtual void FastFlip(int index)
492  {
493  int wordNum = index >> 6; // div 64
494  int bit = index & 0x3f; // mod 64
495  long bitmask = 1L << bit;
496  internalbits[wordNum] ^= bitmask;
497  }
498 
502  public virtual void FastFlip(long index)
503  {
504  int wordNum = (int) (index >> 6); // div 64
505  int bit = (int) index & 0x3f; // mod 64
506  long bitmask = 1L << bit;
507  internalbits[wordNum] ^= bitmask;
508  }
509 
511  public virtual void Flip(long index)
512  {
513  int wordNum = ExpandingWordNum(index);
514  int bit = (int) index & 0x3f; // mod 64
515  long bitmask = 1L << bit;
516  internalbits[wordNum] ^= bitmask;
517  }
518 
522  public virtual bool FlipAndGet(int index)
523  {
524  int wordNum = index >> 6; // div 64
525  int bit = index & 0x3f; // mod 64
526  long bitmask = 1L << bit;
527  internalbits[wordNum] ^= bitmask;
528  return (internalbits[wordNum] & bitmask) != 0;
529  }
530 
534  public virtual bool FlipAndGet(long index)
535  {
536  int wordNum = (int) (index >> 6); // div 64
537  int bit = (int) index & 0x3f; // mod 64
538  long bitmask = 1L << bit;
539  internalbits[wordNum] ^= bitmask;
540  return (internalbits[wordNum] & bitmask) != 0;
541  }
542 
550  public virtual void Flip(long startIndex, long endIndex)
551  {
552  if (endIndex <= startIndex)
553  return ;
554  int startWord = (int) (startIndex >> 6);
555 
556  // since endIndex is one past the end, this is index of the last
557  // word to be changed.
558  int endWord = ExpandingWordNum(endIndex - 1);
559 
560  /* Grrr, java shifting wraps around so -1L>>>64 == -1
561  * for that reason, make sure not to use endmask if the bits to flip will
562  * be zero in the last word (redefine endWord to be the last changed...)
563  long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
564  long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
565  ***/
566 
567  long startmask = - 1L << (int) startIndex;
568  long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
569 
570  if (startWord == endWord)
571  {
572  internalbits[startWord] ^= (startmask & endmask);
573  return ;
574  }
575 
576  internalbits[startWord] ^= startmask;
577 
578  for (int i = startWord + 1; i < endWord; i++)
579  {
580  internalbits[i] = ~ internalbits[i];
581  }
582 
583  internalbits[endWord] ^= endmask;
584  }
585 
586 
587  /*
588  public static int pop(long v0, long v1, long v2, long v3) {
589  // derived from pop_array by setting last four elems to 0.
590  // exchanges one pop() call for 10 elementary operations
591  // saving about 7 instructions... is there a better way?
592  long twosA=v0 & v1;
593  long ones=v0^v1;
594 
595  long u2=ones^v2;
596  long twosB =(ones&v2)|(u2&v3);
597  ones=u2^v3;
598 
599  long fours=(twosA&twosB);
600  long twos=twosA^twosB;
601 
602  return (pop(fours)<<2)
603  + (pop(twos)<<1)
604  + pop(ones);
605 
606  }
607  */
608 
609 
612  public virtual long Cardinality()
613  {
614  return BitUtil.Pop_array(internalbits, 0, wlen);
615  }
616 
620  public static long IntersectionCount(OpenBitSet a, OpenBitSet b)
621  {
622  return BitUtil.Pop_intersect(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
623  }
624 
628  public static long UnionCount(OpenBitSet a, OpenBitSet b)
629  {
630  long tot = BitUtil.Pop_union(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
631  if (a.wlen < b.wlen)
632  {
633  tot += BitUtil.Pop_array(b.internalbits, a.wlen, b.wlen - a.wlen);
634  }
635  else if (a.wlen > b.wlen)
636  {
637  tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen);
638  }
639  return tot;
640  }
641 
646  public static long AndNotCount(OpenBitSet a, OpenBitSet b)
647  {
648  long tot = BitUtil.Pop_andnot(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
649  if (a.wlen > b.wlen)
650  {
651  tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen);
652  }
653  return tot;
654  }
655 
659  public static long XorCount(OpenBitSet a, OpenBitSet b)
660  {
661  long tot = BitUtil.Pop_xor(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen));
662  if (a.wlen < b.wlen)
663  {
664  tot += BitUtil.Pop_array(b.internalbits, a.wlen, b.wlen - a.wlen);
665  }
666  else if (a.wlen > b.wlen)
667  {
668  tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen);
669  }
670  return tot;
671  }
672 
673 
677  public virtual int NextSetBit(int index)
678  {
679  int i = index >> 6;
680  if (i >= wlen)
681  return - 1;
682  int subIndex = index & 0x3f; // index within the word
683  long word = internalbits[i] >> subIndex; // skip all the bits to the right of index
684 
685  if (word != 0)
686  {
687  return (i << 6) + subIndex + BitUtil.Ntz(word);
688  }
689 
690  while (++i < wlen)
691  {
692  word = internalbits[i];
693  if (word != 0)
694  return (i << 6) + BitUtil.Ntz(word);
695  }
696 
697  return - 1;
698  }
699 
703  public virtual long NextSetBit(long index)
704  {
705  int i = (int) (index >> 6);
706  if (i >= wlen)
707  return - 1;
708  int subIndex = (int) index & 0x3f; // index within the word
709  long word = (long) ((ulong) internalbits[i] >> subIndex); // skip all the bits to the right of index
710 
711  if (word != 0)
712  {
713  return (((long) i) << 6) + (subIndex + BitUtil.Ntz(word));
714  }
715 
716  while (++i < wlen)
717  {
718  word = internalbits[i];
719  if (word != 0)
720  return (((long) i) << 6) + BitUtil.Ntz(word);
721  }
722 
723  return - 1;
724  }
725 
726 
727 
728 
729  public virtual System.Object Clone()
730  {
731  try
732  {
733  OpenBitSet obs = new OpenBitSet((long[]) internalbits.Clone(), wlen);
734  //obs.bits = new long[obs.bits.Length];
735  //obs.bits.CopyTo(obs.bits, 0); // hopefully an array clone is as fast(er) than arraycopy
736  return obs;
737  }
738  catch (System.Exception e)
739  {
740  throw new System.SystemException(e.Message, e);
741  }
742  }
743 
745  public virtual void Intersect(OpenBitSet other)
746  {
747  int newLen = System.Math.Min(this.wlen, other.wlen);
748  long[] thisArr = this.internalbits;
749  long[] otherArr = other.internalbits;
750  // testing against zero can be more efficient
751  int pos = newLen;
752  while (--pos >= 0)
753  {
754  thisArr[pos] &= otherArr[pos];
755  }
756  if (this.wlen > newLen)
757  {
758  // fill zeros from the new shorter length to the old length
759  for (int i = newLen; i < this.wlen; i++)
760  internalbits[i] = 0L;
761  }
762  this.wlen = newLen;
763  }
764 
766  public virtual void Union(OpenBitSet other)
767  {
768  int newLen = System.Math.Max(wlen, other.wlen);
769  EnsureCapacityWords(newLen);
770 
771  long[] thisArr = this.internalbits;
772  long[] otherArr = other.internalbits;
773  int pos = System.Math.Min(wlen, other.wlen);
774  while (--pos >= 0)
775  {
776  thisArr[pos] |= otherArr[pos];
777  }
778  if (this.wlen < newLen)
779  {
780  Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
781  }
782  this.wlen = newLen;
783  }
784 
785 
787  public virtual void Remove(OpenBitSet other)
788  {
789  int idx = System.Math.Min(wlen, other.wlen);
790  long[] thisArr = this.internalbits;
791  long[] otherArr = other.internalbits;
792  while (--idx >= 0)
793  {
794  thisArr[idx] &= ~ otherArr[idx];
795  }
796  }
797 
799  public virtual void Xor(OpenBitSet other)
800  {
801  int newLen = System.Math.Max(wlen, other.wlen);
802  EnsureCapacityWords(newLen);
803 
804  long[] thisArr = this.internalbits;
805  long[] otherArr = other.internalbits;
806  int pos = System.Math.Min(wlen, other.wlen);
807  while (--pos >= 0)
808  {
809  thisArr[pos] ^= otherArr[pos];
810  }
811  if (this.wlen < newLen)
812  {
813  Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
814  }
815  this.wlen = newLen;
816  }
817 
818 
819  // some BitSet compatability methods
820 
821  //* see <see cref="intersect" /> */
822  public virtual void And(OpenBitSet other)
823  {
824  Intersect(other);
825  }
826 
827  //* see <see cref="union" /> */
828  public virtual void Or(OpenBitSet other)
829  {
830  Union(other);
831  }
832 
833  //* see <see cref="andNot" /> */
834  public virtual void AndNot(OpenBitSet other)
835  {
836  Remove(other);
837  }
838 
840  public virtual bool Intersects(OpenBitSet other)
841  {
842  int pos = System.Math.Min(this.wlen, other.wlen);
843  long[] thisArr = this.internalbits;
844  long[] otherArr = other.internalbits;
845  while (--pos >= 0)
846  {
847  if ((thisArr[pos] & otherArr[pos]) != 0)
848  return true;
849  }
850  return false;
851  }
852 
853 
854 
858  public virtual void EnsureCapacityWords(int numWords)
859  {
860  if (internalbits.Length < numWords)
861  {
862  internalbits = ArrayUtil.Grow(internalbits, numWords);
863  }
864  }
865 
869  public virtual void EnsureCapacity(long numBits)
870  {
871  EnsureCapacityWords(Bits2words(numBits));
872  }
873 
877  public virtual void TrimTrailingZeros()
878  {
879  int idx = wlen - 1;
880  while (idx >= 0 && internalbits[idx] == 0)
881  idx--;
882  wlen = idx + 1;
883  }
884 
886  public static int Bits2words(long numBits)
887  {
888  return (int) ((((numBits - 1) >> 6)) + 1);
889  }
890 
891 
893  public override bool Equals(System.Object o)
894  {
895  if (this == o)
896  return true;
897  if (!(o is OpenBitSet))
898  return false;
899  OpenBitSet a;
900  OpenBitSet b = (OpenBitSet) o;
901  // make a the larger set.
902  if (b.wlen > this.wlen)
903  {
904  a = b; b = this;
905  }
906  else
907  {
908  a = this;
909  }
910 
911  // check for any set bits out of the range of b
912  for (int i = a.wlen - 1; i >= b.wlen; i--)
913  {
914  if (a.internalbits[i] != 0)
915  return false;
916  }
917 
918  for (int i = b.wlen - 1; i >= 0; i--)
919  {
920  if (a.internalbits[i] != b.internalbits[i])
921  return false;
922  }
923 
924  return true;
925  }
926 
927  public override int GetHashCode()
928  {
929  // Start with a zero hash and use a mix that results in zero if the input is zero.
930  // This effectively truncates trailing zeros without an explicit check.
931  long h = 0;
932  for (int i = internalbits.Length; --i >= 0; )
933  {
934  h ^= internalbits[i];
935  h = (h << 1) | (Number.URShift(h, 63)); // rotate left
936  }
937  // fold leftmost bits into right and add a constant to prevent
938  // empty sets from returning 0, which is too common.
939  return (int)(((h >> 32) ^ h) + 0x98761234);
940  }
941 
942 
943  }
944 }