Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
IndexInput.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 
21 namespace Lucene.Net.Store
22 {
23 
24  /// <summary>Abstract base class for input from a file in a <see cref="Directory" />. A
25  /// random-access input stream. Used for all Lucene index input operations.
26  /// </summary>
27  /// <seealso cref="Directory">
28  /// </seealso>
29  public abstract class IndexInput : System.ICloneable, IDisposable
30  {
31  private bool preUTF8Strings; // true if we are reading old (modified UTF8) string format
32 
33  /// <summary>Reads and returns a single byte.</summary>
34  /// <seealso cref="IndexOutput.WriteByte(byte)">
35  /// </seealso>
36  public abstract byte ReadByte();
37 
38  /// <summary>Reads a specified number of bytes into an array at the specified offset.</summary>
39  /// <param name="b">the array to read bytes into
40  /// </param>
41  /// <param name="offset">the offset in the array to start storing bytes
42  /// </param>
43  /// <param name="len">the number of bytes to read
44  /// </param>
45  /// <seealso cref="IndexOutput.WriteBytes(byte[],int)">
46  /// </seealso>
47  public abstract void ReadBytes(byte[] b, int offset, int len);
48 
49  /// <summary>Reads a specified number of bytes into an array at the
50  /// specified offset with control over whether the read
51  /// should be buffered (callers who have their own buffer
52  /// should pass in "false" for useBuffer). Currently only
53  /// <see cref="BufferedIndexInput" /> respects this parameter.
54  /// </summary>
55  /// <param name="b">the array to read bytes into
56  /// </param>
57  /// <param name="offset">the offset in the array to start storing bytes
58  /// </param>
59  /// <param name="len">the number of bytes to read
60  /// </param>
61  /// <param name="useBuffer">set to false if the caller will handle
62  /// buffering.
63  /// </param>
64  /// <seealso cref="IndexOutput.WriteBytes(byte[],int)">
65  /// </seealso>
66  public virtual void ReadBytes(byte[] b, int offset, int len, bool useBuffer)
67  {
68  // Default to ignoring useBuffer entirely
69  ReadBytes(b, offset, len);
70  }
71 
72  /// <summary>Reads four bytes and returns an int.</summary>
73  /// <seealso cref="IndexOutput.WriteInt(int)">
74  /// </seealso>
75  public virtual int ReadInt()
76  {
77  return ((ReadByte() & 0xFF) << 24) | ((ReadByte() & 0xFF) << 16) | ((ReadByte() & 0xFF) << 8) | (ReadByte() & 0xFF);
78  }
79 
80  /// <summary>Reads an int stored in variable-length format. Reads between one and
81  /// five bytes. Smaller values take fewer bytes. Negative numbers are not
82  /// supported.
83  /// </summary>
84  /// <seealso cref="IndexOutput.WriteVInt(int)">
85  /// </seealso>
86  public virtual int ReadVInt()
87  {
88  byte b = ReadByte();
89  int i = b & 0x7F;
90  for (int shift = 7; (b & 0x80) != 0; shift += 7)
91  {
92  b = ReadByte();
93  i |= (b & 0x7F) << shift;
94  }
95  return i;
96  }
97 
98  /// <summary>Reads eight bytes and returns a long.</summary>
99  /// <seealso cref="IndexOutput.WriteLong(long)">
100  /// </seealso>
101  public virtual long ReadLong()
102  {
103  return (((long) ReadInt()) << 32) | (ReadInt() & 0xFFFFFFFFL);
104  }
105 
106  /// <summary>Reads a long stored in variable-length format. Reads between one and
107  /// nine bytes. Smaller values take fewer bytes. Negative numbers are not
108  /// supported.
109  /// </summary>
110  public virtual long ReadVLong()
111  {
112  byte b = ReadByte();
113  long i = b & 0x7F;
114  for (int shift = 7; (b & 0x80) != 0; shift += 7)
115  {
116  b = ReadByte();
117  i |= (b & 0x7FL) << shift;
118  }
119  return i;
120  }
121 
122  /// <summary>Call this if readString should read characters stored
123  /// in the old modified UTF8 format (length in java chars
124  /// and java's modified UTF8 encoding). This is used for
125  /// indices written pre-2.4 See LUCENE-510 for details.
126  /// </summary>
127  public virtual void SetModifiedUTF8StringsMode()
128  {
129  preUTF8Strings = true;
130  }
131 
132  /// <summary>Reads a string.</summary>
133  /// <seealso cref="IndexOutput.WriteString(String)">
134  /// </seealso>
135  public virtual System.String ReadString()
136  {
137  if (preUTF8Strings)
138  return ReadModifiedUTF8String();
139  int length = ReadVInt();
140  byte[] bytes = new byte[length];
141  ReadBytes(bytes, 0, length);
142  return System.Text.Encoding.UTF8.GetString(bytes, 0, length);
143  }
144 
145  private System.String ReadModifiedUTF8String()
146  {
147  int length = ReadVInt();
148  char[] chars = new char[length];
149  ReadChars(chars, 0, length);
150  return new System.String(chars, 0, length);
151  }
152 
153  /// <summary>Reads Lucene's old "modified UTF-8" encoded
154  /// characters into an array.
155  /// </summary>
156  /// <param name="buffer">the array to read characters into
157  /// </param>
158  /// <param name="start">the offset in the array to start storing characters
159  /// </param>
160  /// <param name="length">the number of characters to read
161  /// </param>
162  /// <seealso cref="IndexOutput.WriteChars(String,int,int)">
163  /// </seealso>
164  /// <deprecated> -- please use readString or readBytes
165  /// instead, and construct the string
166  /// from those utf8 bytes
167  /// </deprecated>
168  [Obsolete("-- please use ReadString or ReadBytes instead, and construct the string from those utf8 bytes")]
169  public virtual void ReadChars(char[] buffer, int start, int length)
170  {
171  int end = start + length;
172  for (int i = start; i < end; i++)
173  {
174  byte b = ReadByte();
175  if ((b & 0x80) == 0)
176  buffer[i] = (char) (b & 0x7F);
177  else if ((b & 0xE0) != 0xE0)
178  {
179  buffer[i] = (char) (((b & 0x1F) << 6) | (ReadByte() & 0x3F));
180  }
181  else
182  buffer[i] = (char) (((b & 0x0F) << 12) | ((ReadByte() & 0x3F) << 6) | (ReadByte() & 0x3F));
183  }
184  }
185 
186  /// <summary> Expert
187  ///
188  /// Similar to <see cref="ReadChars(char[], int, int)" /> but does not do any conversion operations on the bytes it is reading in. It still
189  /// has to invoke <see cref="ReadByte()" /> just as <see cref="ReadChars(char[], int, int)" /> does, but it does not need a buffer to store anything
190  /// and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
191  /// how many more bytes to read
192  /// </summary>
193  /// <param name="length">The number of chars to read
194  /// </param>
195  /// <deprecated> this method operates on old "modified utf8" encoded
196  /// strings
197  /// </deprecated>
198  [Obsolete("this method operates on old \"modified utf8\" encoded strings")]
199  public virtual void SkipChars(int length)
200  {
201  for (int i = 0; i < length; i++)
202  {
203  byte b = ReadByte();
204  if ((b & 0x80) == 0)
205  {
206  //do nothing, we only need one byte
207  }
208  else if ((b & 0xE0) != 0xE0)
209  {
210  ReadByte(); //read an additional byte
211  }
212  else
213  {
214  //read two additional bytes.
215  ReadByte();
216  ReadByte();
217  }
218  }
219  }
220 
221  [Obsolete("Use Dispose() instead.")]
222  public void Close()
223  {
224  Dispose();
225  }
226 
227  /// <summary>Closes the stream to futher operations. </summary>
228  public void Dispose()
229  {
230  Dispose(true);
231  }
232 
233  protected abstract void Dispose(bool disposing);
234 
235  /// <summary>Returns the current position in this file, where the next read will
236  /// occur.
237  /// </summary>
238  /// <seealso cref="Seek(long)">
239  /// </seealso>
240  public abstract long FilePointer { get; }
241 
242  /// <summary>Sets current position in this file, where the next read will occur.</summary>
243  /// <seealso cref="FilePointer">
244  /// </seealso>
245  public abstract void Seek(long pos);
246 
247  /// <summary>The number of bytes in the file. </summary>
248  public abstract long Length();
249 
250  /// <summary>Returns a clone of this stream.
251  ///
252  /// <p/>Clones of a stream access the same data, and are positioned at the same
253  /// point as the stream they were cloned from.
254  ///
255  /// <p/>Expert: Subclasses must ensure that clones may be positioned at
256  /// different points in the input from each other and from the stream they
257  /// were cloned from.
258  /// </summary>
259  public virtual System.Object Clone()
260  {
261  IndexInput clone = null;
262  try
263  {
264  clone = (IndexInput) base.MemberwiseClone();
265  }
266  catch (System.Exception)
267  {
268  }
269 
270  return clone;
271  }
272 
273  // returns Map<String, String>
274  public virtual System.Collections.Generic.IDictionary<string,string> ReadStringStringMap()
275  {
276  var map = new HashMap<string, string>();
277  int count = ReadInt();
278  for (int i = 0; i < count; i++)
279  {
280  System.String key = ReadString();
281  System.String val = ReadString();
282  map[key] = val;
283  }
284 
285  return map;
286  }
287 
288  /*public abstract void Dispose();*/
289  }
290 }