Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
IndexableBinaryStringTools.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 
21 // {{Aroush-2.9}} Port issue? Both of those were treated as: System.IO.MemoryStream
22 //using CharBuffer = java.nio.CharBuffer;
23 //using ByteBuffer = java.nio.ByteBuffer;
24 
25 namespace Lucene.Net.Util
26 {
27 
55  {
56 
57  private static readonly CodingCase[] CODING_CASES = new CodingCase[]{new CodingCase(7, 1), new CodingCase(14, 6, 2), new CodingCase(13, 5, 3), new CodingCase(12, 4, 4), new CodingCase(11, 3, 5), new CodingCase(10, 2, 6), new CodingCase(9, 1, 7), new CodingCase(8, 0)};
58 
59  // Export only static methods
61  {
62  }
63 
72  public static int GetEncodedLength(System.Collections.Generic.List<byte> original)
73  {
74  return (original.Count == 0) ? 0 : ((original.Count * 8 + 14) / 15) + 1;
75  }
76 
85  public static int GetDecodedLength(System.Collections.Generic.List<char> encoded)
86  {
87  int numChars = encoded.Count - 1;
88  if (numChars <= 0)
89  {
90  return 0;
91  }
92  else
93  {
94  int numFullBytesInFinalChar = encoded[encoded.Count - 1];
95  int numEncodedChars = numChars - 1;
96  return ((numEncodedChars * 15 + 7) / 8 + numFullBytesInFinalChar);
97  }
98  }
99 
113  public static void Encode(System.Collections.Generic.List<byte> input, System.Collections.Generic.List<char> output)
114  {
115  int outputLength = GetEncodedLength(input);
116  // only adjust capacity if needed
117  if (output.Capacity < outputLength)
118  {
119  output.Capacity = outputLength;
120  }
121 
122  // ensure the buffer we are writing into is occupied with nulls
123  if (output.Count < outputLength)
124  {
125  for (int i = output.Count; i < outputLength; i++)
126  {
127  output.Add(Char.MinValue);
128  }
129  }
130 
131  if (input.Count > 0)
132  {
133  int inputByteNum = 0;
134  int caseNum = 0;
135  int outputCharNum = 0;
136  CodingCase codingCase;
137  for (; inputByteNum + CODING_CASES[caseNum].numBytes <= input.Count; ++outputCharNum)
138  {
139  codingCase = CODING_CASES[caseNum];
140  if (2 == codingCase.numBytes)
141  {
142  output[outputCharNum] = (char)(((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((Number.URShift((input[inputByteNum + 1] & 0xFF), codingCase.finalShift)) & codingCase.finalMask) & (short)0x7FFF);
143  }
144  else
145  {
146  // numBytes is 3
147  output[outputCharNum] = (char)(((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((input[inputByteNum + 1] & 0xFF) << codingCase.middleShift) + ((Number.URShift((input[inputByteNum + 2] & 0xFF), codingCase.finalShift)) & codingCase.finalMask) & (short)0x7FFF);
148  }
149  inputByteNum += codingCase.advanceBytes;
150  if (++caseNum == CODING_CASES.Length)
151  {
152  caseNum = 0;
153  }
154  }
155  // Produce final char (if any) and trailing count chars.
156  codingCase = CODING_CASES[caseNum];
157 
158  if (inputByteNum + 1 < input.Count)
159  {
160  // codingCase.numBytes must be 3
161  output[outputCharNum++] = (char) ((((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((input[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF);
162  // Add trailing char containing the number of full bytes in final char
163  output[outputCharNum++] = (char) 1;
164  }
165  else if (inputByteNum < input.Count)
166  {
167  output[outputCharNum++] = (char) (((input[inputByteNum] & 0xFF) << codingCase.initialShift) & (short) 0x7FFF);
168  // Add trailing char containing the number of full bytes in final char
169  output[outputCharNum++] = caseNum == 0?(char) 1:(char) 0;
170  }
171  else
172  {
173  // No left over bits - last char is completely filled.
174  // Add trailing char containing the number of full bytes in final char
175  output[outputCharNum++] = (char) 1;
176  }
177  }
178  }
179 
193  public static void Decode(System.Collections.Generic.List<char> input, System.Collections.Generic.List<byte> output)
194  {
195  int numOutputBytes = GetDecodedLength(input);
196  if (output.Capacity < numOutputBytes)
197  {
198  output.Capacity = numOutputBytes;
199  }
200 
201  // ensure the buffer we are writing into is occupied with nulls
202  if (output.Count < numOutputBytes)
203  {
204  for (int i = output.Count; i < numOutputBytes; i++)
205  {
206  output.Add(Byte.MinValue);
207  }
208  }
209 
210  if (input.Count > 0)
211  {
212  int caseNum = 0;
213  int outputByteNum = 0;
214  int inputCharNum = 0;
215  short inputChar;
216  CodingCase codingCase;
217  for (; inputCharNum < input.Count - 2; ++inputCharNum)
218  {
219  codingCase = CODING_CASES[caseNum];
220  inputChar = (short) input[inputCharNum];
221  if (2 == codingCase.numBytes)
222  {
223  if (0 == caseNum)
224  {
225  output[outputByteNum] = (byte) (Number.URShift(inputChar, codingCase.initialShift));
226  }
227  else
228  {
229  output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift)));
230  }
231  output[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
232  }
233  else
234  {
235  // numBytes is 3
236  output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift)));
237  output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.middleMask), codingCase.middleShift));
238  output[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
239  }
240  outputByteNum += codingCase.advanceBytes;
241  if (++caseNum == CODING_CASES.Length)
242  {
243  caseNum = 0;
244  }
245  }
246  // Handle final char
247  inputChar = (short) input[inputCharNum];
248  codingCase = CODING_CASES[caseNum];
249  if (0 == caseNum)
250  {
251  output[outputByteNum] = 0;
252  }
253  output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift)));
254  long bytesLeft = numOutputBytes - outputByteNum;
255  if (bytesLeft > 1)
256  {
257  if (2 == codingCase.numBytes)
258  {
259  output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.finalMask), codingCase.finalShift));
260  }
261  else
262  {
263  // numBytes is 3
264  output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.middleMask), codingCase.middleShift));
265  if (bytesLeft > 2)
266  {
267  output[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
268  }
269  }
270  }
271  }
272  }
273 
287  public static System.Collections.Generic.List<byte> Decode(System.Collections.Generic.List<char> input)
288  {
289  System.Collections.Generic.List<byte> output =
290  new System.Collections.Generic.List<byte>(new byte[GetDecodedLength(input)]);
291  Decode(input, output);
292  return output;
293  }
294 
306  public static System.Collections.Generic.List<char> Encode(System.Collections.Generic.List<byte> input)
307  {
308  System.Collections.Generic.List<char> output =
309  new System.Collections.Generic.List<char>(new char[GetEncodedLength(input)]);
310  Encode(input, output);
311  return output;
312  }
313 
314  internal class CodingCase
315  {
316  internal int numBytes, initialShift, middleShift, finalShift, advanceBytes = 2;
317  internal short middleMask, finalMask;
318 
319  internal CodingCase(int initialShift, int middleShift, int finalShift)
320  {
321  this.numBytes = 3;
322  this.initialShift = initialShift;
323  this.middleShift = middleShift;
324  this.finalShift = finalShift;
325  this.finalMask = (short) (Number.URShift((short) 0xFF, finalShift));
326  this.middleMask = (short) ((short) 0xFF << middleShift);
327  }
328 
329  internal CodingCase(int initialShift, int finalShift)
330  {
331  this.numBytes = 2;
332  this.initialShift = initialShift;
333  this.finalShift = finalShift;
334  this.finalMask = (short) (Number.URShift((short) 0xFF, finalShift));
335  if (finalShift != 0)
336  {
337  advanceBytes = 1;
338  }
339  }
340  }
341  }
342 }