Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
CompoundFileWriter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using Directory = Lucene.Net.Store.Directory;
21 using IndexInput = Lucene.Net.Store.IndexInput;
22 using IndexOutput = Lucene.Net.Store.IndexOutput;
23 
24 namespace Lucene.Net.Index
25 {
26 
27 
28  /// <summary> Combines multiple files into a single compound file.
29  /// The file format:<br/>
30  /// <list type="bullet">
31  /// <item>VInt fileCount</item>
32  /// <item>{Directory}
33  /// fileCount entries with the following structure:</item>
34  /// <list type="bullet">
35  /// <item>long dataOffset</item>
36  /// <item>String fileName</item>
37  /// </list>
38  /// <item>{File Data}
39  /// fileCount entries with the raw data of the corresponding file</item>
40  /// </list>
41  ///
42  /// The fileCount integer indicates how many files are contained in this compound
43  /// file. The {directory} that follows has that many entries. Each directory entry
44  /// contains a long pointer to the start of this file's data section, and a String
45  /// with that file's name.
46  /// </summary>
47  public sealed class CompoundFileWriter : IDisposable
48  {
49 
50  private sealed class FileEntry
51  {
52  /// <summary>source file </summary>
53  internal System.String file;
54 
55  /// <summary>temporary holder for the start of directory entry for this file </summary>
56  internal long directoryOffset;
57 
58  /// <summary>temporary holder for the start of this file's data section </summary>
59  internal long dataOffset;
60  }
61 
62 
63  private readonly Directory directory;
64  private readonly String fileName;
65  private readonly HashSet<string> ids;
66  private readonly LinkedList<FileEntry> entries;
67  private bool merged = false;
68  private readonly SegmentMerger.CheckAbort checkAbort;
69 
70  /// <summary>Create the compound stream in the specified file. The file name is the
71  /// entire name (no extensions are added).
72  /// </summary>
73  /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws>
74  public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null)
75  {
76  }
77 
78  internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort)
79  {
80  if (dir == null)
81  throw new ArgumentNullException("dir");
82  if (name == null)
83  throw new ArgumentNullException("name");
84  this.checkAbort = checkAbort;
85  directory = dir;
86  fileName = name;
87  ids = new HashSet<string>();
88  entries = new LinkedList<FileEntry>();
89  }
90 
91  /// <summary>Returns the directory of the compound file. </summary>
92  public Directory Directory
93  {
94  get { return directory; }
95  }
96 
97  /// <summary>Returns the name of the compound file. </summary>
98  public string Name
99  {
100  get { return fileName; }
101  }
102 
103  /// <summary>Add a source stream. <c>file</c> is the string by which the
104  /// sub-stream will be known in the compound stream.
105  ///
106  /// </summary>
107  /// <throws> IllegalStateException if this writer is closed </throws>
108  /// <throws> NullPointerException if <c>file</c> is null </throws>
109  /// <throws> IllegalArgumentException if a file with the same name </throws>
110  /// <summary> has been added already
111  /// </summary>
112  public void AddFile(String file)
113  {
114  if (merged)
115  throw new InvalidOperationException("Can't add extensions after merge has been called");
116 
117  if (file == null)
118  throw new ArgumentNullException("file");
119 
120  try
121  {
122  ids.Add(file);
123  }
124  catch (Exception)
125  {
126  throw new ArgumentException("File " + file + " already added");
127  }
128 
129  var entry = new FileEntry {file = file};
130  entries.AddLast(entry);
131  }
132 
133  [Obsolete("Use Dispose() instead")]
134  public void Close()
135  {
136  Dispose();
137  }
138 
139  /// <summary>Merge files with the extensions added up to now.
140  /// All files with these extensions are combined sequentially into the
141  /// compound stream. After successful merge, the source files
142  /// are deleted.
143  /// </summary>
144  /// <throws> IllegalStateException if close() had been called before or </throws>
145  /// <summary> if no file has been added to this object
146  /// </summary>
147  public void Dispose()
148  {
149  // Extract into protected method if class ever becomes unsealed
150 
151  // TODO: Dispose shouldn't throw exceptions!
152  if (merged)
153  throw new SystemException("Merge already performed");
154 
155  if ((entries.Count == 0))
156  throw new SystemException("No entries to merge have been defined");
157 
158  merged = true;
159 
160  // open the compound stream
161  IndexOutput os = null;
162  try
163  {
164  os = directory.CreateOutput(fileName);
165 
166  // Write the number of entries
167  os.WriteVInt(entries.Count);
168 
169  // Write the directory with all offsets at 0.
170  // Remember the positions of directory entries so that we can
171  // adjust the offsets later
172  long totalSize = 0;
173  foreach (FileEntry fe in entries)
174  {
175  fe.directoryOffset = os.FilePointer;
176  os.WriteLong(0); // for now
177  os.WriteString(fe.file);
178  totalSize += directory.FileLength(fe.file);
179  }
180 
181  // Pre-allocate size of file as optimization --
182  // this can potentially help IO performance as
183  // we write the file and also later during
184  // searching. It also uncovers a disk-full
185  // situation earlier and hopefully without
186  // actually filling disk to 100%:
187  long finalLength = totalSize + os.FilePointer;
188  os.SetLength(finalLength);
189 
190  // Open the files and copy their data into the stream.
191  // Remember the locations of each file's data section.
192  var buffer = new byte[16384];
193  foreach (FileEntry fe in entries)
194  {
195  fe.dataOffset = os.FilePointer;
196  CopyFile(fe, os, buffer);
197  }
198 
199  // Write the data offsets into the directory of the compound stream
200  foreach (FileEntry fe in entries)
201  {
202  os.Seek(fe.directoryOffset);
203  os.WriteLong(fe.dataOffset);
204  }
205 
206  System.Diagnostics.Debug.Assert(finalLength == os.Length);
207 
208  // Close the output stream. Set the os to null before trying to
209  // close so that if an exception occurs during the close, the
210  // finally clause below will not attempt to close the stream
211  // the second time.
212  IndexOutput tmp = os;
213  os = null;
214  tmp.Close();
215  }
216  finally
217  {
218  if (os != null)
219  try
220  {
221  os.Close();
222  }
223  catch (System.IO.IOException)
224  {
225  }
226  }
227  }
228 
229 
230  /// <summary>Copy the contents of the file with specified extension into the
231  /// provided output stream. Use the provided buffer for moving data
232  /// to reduce memory allocation.
233  /// </summary>
234  private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
235  {
236  IndexInput isRenamed = null;
237  try
238  {
239  long startPtr = os.FilePointer;
240 
241  isRenamed = directory.OpenInput(source.file);
242  long length = isRenamed.Length();
243  long remainder = length;
244  int chunk = buffer.Length;
245 
246  while (remainder > 0)
247  {
248  var len = (int) Math.Min(chunk, remainder);
249  isRenamed.ReadBytes(buffer, 0, len, false);
250  os.WriteBytes(buffer, len);
251  remainder -= len;
252  if (checkAbort != null)
253  // Roughly every 2 MB we will check if
254  // it's time to abort
255  checkAbort.Work(80);
256  }
257 
258  // Verify that remainder is 0
259  if (remainder != 0)
260  throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
261 
262  // Verify that the output length diff is equal to original file
263  long endPtr = os.FilePointer;
264  long diff = endPtr - startPtr;
265  if (diff != length)
266  throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
267  }
268  finally
269  {
270  if (isRenamed != null)
271  isRenamed.Close();
272  }
273  }
274  }
275 }