20 namespace Lucene.Net.Util
65 public const int UNI_SUR_HIGH_START = 0xD800;
66 public const int UNI_SUR_HIGH_END = 0xDBFF;
67 public const int UNI_SUR_LOW_START = 0xDC00;
68 public const int UNI_SUR_LOW_END = 0xDFFF;
69 public const int UNI_REPLACEMENT_CHAR = 0xFFFD;
71 private const long UNI_MAX_BMP = 0x0000FFFF;
73 private const int HALF_BASE = 0x0010000;
74 private const long HALF_SHIFT = 10;
75 private const long HALF_MASK = 0x3FFL;
79 public byte[] result =
new byte[10];
82 public void SetLength(
int newLength)
84 if (result.Length < newLength)
86 byte[] newArray =
new byte[(int) (1.5 * newLength)];
87 Array.Copy(result, 0, newArray, 0, length);
96 public char[] result =
new char[10];
97 public int[] offsets =
new int[10];
100 public void SetLength(
int newLength)
102 if (result.Length < newLength)
104 char[] newArray =
new char[(int) (1.5 * newLength)];
105 Array.Copy(result, 0, newArray, 0, length);
114 Array.Copy(other.
result, 0, result, 0, length);
122 public static void UTF16toUTF8(
char[] source,
int offset, UTF8Result result)
127 byte[] out_Renamed = result.result;
132 int code = (int) source[i++];
134 if (upto + 4 > out_Renamed.Length)
136 byte[] newOut =
new byte[2 * out_Renamed.Length];
137 System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
138 Array.Copy(out_Renamed, 0, newOut, 0, upto);
139 result.result = out_Renamed = newOut;
142 out_Renamed[upto++] = (byte) code;
143 else if (code < 0x800)
145 out_Renamed[upto++] = (byte) (0xC0 | (code >> 6));
146 out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
148 else if (code < 0xD800 || code > 0xDFFF)
153 out_Renamed[upto++] = (byte) (0xE0 | (code >> 12));
154 out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
155 out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
161 if (code < 0xDC00 && source[i] != 0xffff)
163 int utf32 = (int) source[i];
165 if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
167 utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
169 out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18));
170 out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
171 out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
172 out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F));
178 out_Renamed[upto++] = (byte) (0xEF);
179 out_Renamed[upto++] = (byte) (0xBF);
180 out_Renamed[upto++] = (byte) (0xBD);
184 result.length = upto;
191 public static void UTF16toUTF8(
char[] source,
int offset,
int length, UTF8Result result)
196 int end = offset + length;
197 byte[] out_Renamed = result.result;
202 int code = (int) source[i++];
204 if (upto + 4 > out_Renamed.Length)
206 byte[] newOut =
new byte[2 * out_Renamed.Length];
207 System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
208 Array.Copy(out_Renamed, 0, newOut, 0, upto);
209 result.result = out_Renamed = newOut;
212 out_Renamed[upto++] = (byte) code;
213 else if (code < 0x800)
215 out_Renamed[upto++] = (byte) (0xC0 | (code >> 6));
216 out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
218 else if (code < 0xD800 || code > 0xDFFF)
220 out_Renamed[upto++] = (byte) (0xE0 | (code >> 12));
221 out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
222 out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
228 if (code < 0xDC00 && i < end && source[i] != 0xffff)
230 int utf32 = (int) source[i];
232 if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
234 utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
236 out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18));
237 out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
238 out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
239 out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F));
245 out_Renamed[upto++] = (byte) (0xEF);
246 out_Renamed[upto++] = (byte) (0xBF);
247 out_Renamed[upto++] = (byte) (0xBD);
251 result.length = upto;
258 public static void UTF16toUTF8(System.String s,
int offset,
int length, UTF8Result result)
260 int end = offset + length;
262 byte[] out_Renamed = result.result;
265 for (
int i = offset; i < end; i++)
267 int code = (int) s[i];
269 if (upto + 4 > out_Renamed.Length)
271 byte[] newOut =
new byte[2 * out_Renamed.Length];
272 System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
273 Array.Copy(out_Renamed, 0, newOut, 0, upto);
274 result.result = out_Renamed = newOut;
277 out_Renamed[upto++] = (byte) code;
278 else if (code < 0x800)
280 out_Renamed[upto++] = (byte) (0xC0 | (code >> 6));
281 out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
283 else if (code < 0xD800 || code > 0xDFFF)
285 out_Renamed[upto++] = (byte) (0xE0 | (code >> 12));
286 out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
287 out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
293 if (code < 0xDC00 && (i < end - 1))
295 int utf32 = (int) s[i + 1];
297 if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
299 utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
301 out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18));
302 out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
303 out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
304 out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F));
310 out_Renamed[upto++] = (byte) (0xEF);
311 out_Renamed[upto++] = (byte) (0xBF);
312 out_Renamed[upto++] = (byte) (0xBD);
316 result.length = upto;
324 public static void UTF8toUTF16(byte[] utf8,
int offset,
int length, UTF16Result result)
327 int end = offset + length;
328 char[] out_Renamed = result.result;
329 if (result.offsets.Length <= end)
331 int[] newOffsets =
new int[2 * end];
332 Array.Copy(result.offsets, 0, newOffsets, 0, result.offsets.Length);
333 result.offsets = newOffsets;
335 int[] offsets = result.offsets;
340 while (offsets[upto] == - 1)
343 int outUpto = offsets[upto];
346 if (outUpto + length >= out_Renamed.Length)
348 char[] newOut =
new char[2 * (outUpto + length)];
349 Array.Copy(out_Renamed, 0, newOut, 0, outUpto);
350 result.result = out_Renamed = newOut;
356 int b = utf8[upto] & 0xff;
359 offsets[upto++] = outUpto;
363 System.Diagnostics.Debug.Assert(b < 0x80);
368 ch = ((b & 0x1f) << 6) + (utf8[upto] & 0x3f);
369 offsets[upto++] = - 1;
373 ch = ((b & 0xf) << 12) + ((utf8[upto] & 0x3f) << 6) + (utf8[upto + 1] & 0x3f);
374 offsets[upto++] = - 1;
375 offsets[upto++] = - 1;
379 System.Diagnostics.Debug.Assert(b < 0xf8);
380 ch = ((b & 0x7) << 18) + ((utf8[upto] & 0x3f) << 12) + ((utf8[upto + 1] & 0x3f) << 6) + (utf8[upto + 2] & 0x3f);
381 offsets[upto++] = - 1;
382 offsets[upto++] = - 1;
383 offsets[upto++] = - 1;
386 if (ch <= UNI_MAX_BMP)
389 out_Renamed[outUpto++] = (char) ch;
394 int chHalf = ch - HALF_BASE;
395 out_Renamed[outUpto++] = (char) ((chHalf >> (
int) HALF_SHIFT) + UNI_SUR_HIGH_START);
396 out_Renamed[outUpto++] = (char) ((chHalf & HALF_MASK) + UNI_SUR_LOW_START);
400 offsets[upto] = outUpto;
401 result.length = outUpto;