Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
NumericUtils.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Documents;
20 using Lucene.Net.Search;
21 using Lucene.Net.Support;
22 using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
23 
24 namespace Lucene.Net.Util
25 {
26 
65  public sealed class NumericUtils
66  {
67 
68  private NumericUtils()
69  {
70  } // no instance!
71 
75  public const int PRECISION_STEP_DEFAULT = 4;
76 
80  public static char SHIFT_START_LONG = (char) 0x20;
81 
87  public const int BUF_SIZE_LONG = 63 / 7 + 2;
88 
92  public static char SHIFT_START_INT = (char) 0x60;
93 
99  public const int BUF_SIZE_INT = 31 / 7 + 2;
100 
113  public static int LongToPrefixCoded(long val, int shift, char[] buffer)
114  {
115  if (shift > 63 || shift < 0)
116  throw new System.ArgumentException("Illegal shift value, must be 0..63");
117  int nChars = (63 - shift) / 7 + 1, len = nChars + 1;
118  buffer[0] = (char) (SHIFT_START_LONG + shift);
119  ulong sortableBits = BitConverter.ToUInt64(BitConverter.GetBytes(val), 0) ^ 0x8000000000000000L;
120  sortableBits = sortableBits >> shift;
121  while (nChars >= 1)
122  {
123  // Store 7 bits per character for good efficiency when UTF-8 encoding.
124  // The whole number is right-justified so that lucene can prefix-encode
125  // the terms more efficiently.
126  buffer[nChars--] = (char) (sortableBits & 0x7f);
127  sortableBits = sortableBits >> 7;
128  }
129  return len;
130  }
131 
139  public static System.String LongToPrefixCoded(long val, int shift)
140  {
141  char[] buffer = new char[BUF_SIZE_LONG];
142  int len = LongToPrefixCoded(val, shift, buffer);
143  return new System.String(buffer, 0, len);
144  }
145 
151  public static System.String LongToPrefixCoded(long val)
152  {
153  return LongToPrefixCoded(val, 0);
154  }
155 
168  public static int IntToPrefixCoded(int val, int shift, char[] buffer)
169  {
170  if (shift > 31 || shift < 0)
171  throw new System.ArgumentException("Illegal shift value, must be 0..31");
172  int nChars = (31 - shift) / 7 + 1, len = nChars + 1;
173  buffer[0] = (char) (SHIFT_START_INT + shift);
174  int sortableBits = val ^ unchecked((int) 0x80000000);
175  sortableBits = Number.URShift(sortableBits, shift);
176  while (nChars >= 1)
177  {
178  // Store 7 bits per character for good efficiency when UTF-8 encoding.
179  // The whole number is right-justified so that lucene can prefix-encode
180  // the terms more efficiently.
181  buffer[nChars--] = (char) (sortableBits & 0x7f);
182  sortableBits = Number.URShift(sortableBits, 7);
183  }
184  return len;
185  }
186 
194  public static System.String IntToPrefixCoded(int val, int shift)
195  {
196  char[] buffer = new char[BUF_SIZE_INT];
197  int len = IntToPrefixCoded(val, shift, buffer);
198  return new System.String(buffer, 0, len);
199  }
200 
206  public static System.String IntToPrefixCoded(int val)
207  {
208  return IntToPrefixCoded(val, 0);
209  }
210 
220  public static long PrefixCodedToLong(System.String prefixCoded)
221  {
222  int shift = prefixCoded[0] - SHIFT_START_LONG;
223  if (shift > 63 || shift < 0)
224  throw new System.FormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)");
225  ulong sortableBits = 0UL;
226  for (int i = 1, len = prefixCoded.Length; i < len; i++)
227  {
228  sortableBits <<= 7;
229  char ch = prefixCoded[i];
230  if (ch > 0x7f)
231  {
232  throw new System.FormatException("Invalid prefixCoded numerical value representation (char " + System.Convert.ToString((int) ch, 16) + " at position " + i + " is invalid)");
233  }
234  sortableBits |= (ulong) ch;
235  }
236  return BitConverter.ToInt64(BitConverter.GetBytes((sortableBits << shift) ^ 0x8000000000000000L), 0);
237  }
238 
248  public static int PrefixCodedToInt(System.String prefixCoded)
249  {
250  int shift = prefixCoded[0] - SHIFT_START_INT;
251  if (shift > 31 || shift < 0)
252  throw new System.FormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)");
253  int sortableBits = 0;
254  for (int i = 1, len = prefixCoded.Length; i < len; i++)
255  {
256  sortableBits <<= 7;
257  char ch = prefixCoded[i];
258  if (ch > 0x7f)
259  {
260  throw new System.FormatException("Invalid prefixCoded numerical value representation (char " + System.Convert.ToString((int) ch, 16) + " at position " + i + " is invalid)");
261  }
262  sortableBits |= (int) ch;
263  }
264  return (sortableBits << shift) ^ unchecked((int) 0x80000000);
265  }
266 
274  public static long DoubleToSortableLong(double val)
275  {
276  long f = BitConverter.DoubleToInt64Bits(val); // {{Aroush-2.9}} will this work the same as 'java.lang.Double.doubleToRawLongBits()'?
277  if (f < 0)
278  f ^= 0x7fffffffffffffffL;
279  return f;
280  }
281 
285  public static System.String DoubleToPrefixCoded(double val)
286  {
287  return LongToPrefixCoded(DoubleToSortableLong(val));
288  }
289 
293  public static double SortableLongToDouble(long val)
294  {
295  if (val < 0)
296  val ^= 0x7fffffffffffffffL;
297  return BitConverter.Int64BitsToDouble(val);
298  }
299 
303  public static double PrefixCodedToDouble(System.String val)
304  {
305  return SortableLongToDouble(PrefixCodedToLong(val));
306  }
307 
315  public static int FloatToSortableInt(float val)
316  {
317  int f = BitConverter.ToInt32(BitConverter.GetBytes(val), 0);
318  if (f < 0)
319  f ^= 0x7fffffff;
320  return f;
321  }
322 
326  public static System.String FloatToPrefixCoded(float val)
327  {
328  return IntToPrefixCoded(FloatToSortableInt(val));
329  }
330 
334  public static float SortableIntToFloat(int val)
335  {
336  if (val < 0)
337  val ^= 0x7fffffff;
338  return BitConverter.ToSingle(BitConverter.GetBytes(val), 0);
339  }
340 
344  public static float PrefixCodedToFloat(System.String val)
345  {
346  return SortableIntToFloat(PrefixCodedToInt(val));
347  }
348 
356  public static void SplitLongRange(LongRangeBuilder builder, int precisionStep, long minBound, long maxBound)
357  {
358  SplitRange(builder, 64, precisionStep, minBound, maxBound);
359  }
360 
368  public static void SplitIntRange(IntRangeBuilder builder, int precisionStep, int minBound, int maxBound)
369  {
370  SplitRange(builder, 32, precisionStep, (long) minBound, (long) maxBound);
371  }
372 
374  private static void SplitRange(System.Object builder, int valSize, int precisionStep, long minBound, long maxBound)
375  {
376  if (precisionStep < 1)
377  throw new System.ArgumentException("precisionStep must be >=1");
378  if (minBound > maxBound)
379  return ;
380  for (int shift = 0; ; shift += precisionStep)
381  {
382  // calculate new bounds for inner precision
383  long diff = 1L << (shift + precisionStep);
384  long mask = ((1L << precisionStep) - 1L) << shift;
385  bool hasLower = (minBound & mask) != 0L;
386  bool hasUpper = (maxBound & mask) != mask;
387  long nextMinBound = (hasLower?(minBound + diff):minBound) & ~ mask;
388  long nextMaxBound = (hasUpper?(maxBound - diff):maxBound) & ~ mask;
389  bool lowerWrapped = nextMinBound < minBound,
390  upperWrapped = nextMaxBound > maxBound;
391 
392  if (shift+precisionStep>=valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped)
393  {
394  // We are in the lowest precision or the next precision is not available.
395  AddRange(builder, valSize, minBound, maxBound, shift);
396  // exit the split recursion loop
397  break;
398  }
399 
400  if (hasLower)
401  AddRange(builder, valSize, minBound, minBound | mask, shift);
402  if (hasUpper)
403  AddRange(builder, valSize, maxBound & ~ mask, maxBound, shift);
404 
405  // recurse to next precision
406  minBound = nextMinBound;
407  maxBound = nextMaxBound;
408  }
409  }
410 
412  private static void AddRange(System.Object builder, int valSize, long minBound, long maxBound, int shift)
413  {
414  // for the max bound set all lower bits (that were shifted away):
415  // this is important for testing or other usages of the splitted range
416  // (e.g. to reconstruct the full range). The prefixEncoding will remove
417  // the bits anyway, so they do not hurt!
418  maxBound |= (1L << shift) - 1L;
419  // delegate to correct range builder
420  switch (valSize)
421  {
422 
423  case 64:
424  ((LongRangeBuilder) builder).AddRange(minBound, maxBound, shift);
425  break;
426 
427  case 32:
428  ((IntRangeBuilder) builder).AddRange((int) minBound, (int) maxBound, shift);
429  break;
430 
431  default:
432  // Should not happen!
433  throw new System.ArgumentException("valSize must be 32 or 64.");
434 
435  }
436  }
437 
443  public abstract class LongRangeBuilder
444  {
445 
449  public virtual void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded)
450  {
451  throw new System.NotSupportedException();
452  }
453 
457  public virtual void AddRange(long min, long max, int shift)
458  {
459  AddRange(Lucene.Net.Util.NumericUtils.LongToPrefixCoded(min, shift), Lucene.Net.Util.NumericUtils.LongToPrefixCoded(max, shift));
460  }
461  }
462 
468  public abstract class IntRangeBuilder
469  {
470 
474  public virtual void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded)
475  {
476  throw new System.NotSupportedException();
477  }
478 
482  public virtual void AddRange(int min, int max, int shift)
483  {
484  AddRange(Lucene.Net.Util.NumericUtils.IntToPrefixCoded(min, shift), Lucene.Net.Util.NumericUtils.IntToPrefixCoded(max, shift));
485  }
486  }
487  }
488 }