Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
MergePolicy.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using Directory = Lucene.Net.Store.Directory;
21 
22 namespace Lucene.Net.Index
23 {
24 
25  /// <summary> <p/>Expert: a MergePolicy determines the sequence of
26  /// primitive merge operations to be used for overall merge
27  /// and optimize operations.<p/>
28  ///
29  /// <p/>Whenever the segments in an index have been altered by
30  /// <see cref="IndexWriter" />, either the addition of a newly
31  /// flushed segment, addition of many segments from
32  /// addIndexes* calls, or a previous merge that may now need
33  /// to cascade, <see cref="IndexWriter" /> invokes <see cref="FindMerges" />
34  /// to give the MergePolicy a chance to pick
35  /// merges that are now required. This method returns a
36  /// <see cref="MergeSpecification" /> instance describing the set of
37  /// merges that should be done, or null if no merges are
38  /// necessary. When IndexWriter.optimize is called, it calls
39  /// <see cref="FindMergesForOptimize" /> and the MergePolicy should
40  /// then return the necessary merges.<p/>
41  ///
42  /// <p/>Note that the policy can return more than one merge at
43  /// a time. In this case, if the writer is using <see cref="SerialMergeScheduler" />
44  ///, the merges will be run
45  /// sequentially but if it is using <see cref="ConcurrentMergeScheduler" />
46  /// they will be run concurrently.<p/>
47  ///
48  /// <p/>The default MergePolicy is <see cref="LogByteSizeMergePolicy" />
49  ///.<p/>
50  ///
51  /// <p/><b>NOTE:</b> This API is new and still experimental
52  /// (subject to change suddenly in the next release)<p/>
53  ///
54  /// <p/><b>NOTE</b>: This class typically requires access to
55  /// package-private APIs (e.g. <c>SegmentInfos</c>) to do its job;
56  /// if you implement your own MergePolicy, you'll need to put
57  /// it in package Lucene.Net.Index in order to use
58  /// these APIs.
59  /// </summary>
60 
61  public abstract class MergePolicy : IDisposable
62  {
63 
64  /// <summary>OneMerge provides the information necessary to perform
65  /// an individual primitive merge operation, resulting in
66  /// a single new segment. The merge spec includes the
67  /// subset of segments to be merged as well as whether the
68  /// new segment should use the compound file format.
69  /// </summary>
70 
71  public class OneMerge
72  {
73 
74  internal SegmentInfo info; // used by IndexWriter
75  internal bool mergeDocStores; // used by IndexWriter
76  internal bool optimize; // used by IndexWriter
77  internal bool registerDone; // used by IndexWriter
78  internal long mergeGen; // used by IndexWriter
79  internal bool isExternal; // used by IndexWriter
80  internal int maxNumSegmentsOptimize; // used by IndexWriter
81  internal SegmentReader[] readers; // used by IndexWriter
82  internal SegmentReader[] readersClone; // used by IndexWriter
83  internal SegmentInfos segments;
84  internal bool useCompoundFile;
85  internal bool aborted;
86  internal System.Exception error;
87 
88  public OneMerge(SegmentInfos segments, bool useCompoundFile)
89  {
90  if (0 == segments.Count)
91  throw new ArgumentException("segments must include at least one segment", "segments");
92  this.segments = segments;
93  this.useCompoundFile = useCompoundFile;
94  }
95 
96  /// <summary>Record that an exception occurred while executing
97  /// this merge
98  /// </summary>
99  internal virtual void SetException(System.Exception error)
100  {
101  lock (this)
102  {
103  this.error = error;
104  }
105  }
106 
107  /// <summary>Retrieve previous exception set by <see cref="SetException" />
108  ///.
109  /// </summary>
110  internal virtual System.Exception GetException()
111  {
112  lock (this)
113  {
114  return error;
115  }
116  }
117 
118  /// <summary>Mark this merge as aborted. If this is called
119  /// before the merge is committed then the merge will
120  /// not be committed.
121  /// </summary>
122  internal virtual void Abort()
123  {
124  lock (this)
125  {
126  aborted = true;
127  }
128  }
129 
130  /// <summary>Returns true if this merge was aborted. </summary>
131  internal virtual bool IsAborted()
132  {
133  lock (this)
134  {
135  return aborted;
136  }
137  }
138 
139  internal virtual void CheckAborted(Directory dir)
140  {
141  lock (this)
142  {
143  if (aborted)
144  throw new MergeAbortedException("merge is aborted: " + SegString(dir));
145  }
146  }
147 
148  internal virtual String SegString(Directory dir)
149  {
150  var b = new System.Text.StringBuilder();
151  int numSegments = segments.Count;
152  for (int i = 0; i < numSegments; i++)
153  {
154  if (i > 0)
155  b.Append(' ');
156  b.Append(segments.Info(i).SegString(dir));
157  }
158  if (info != null)
159  b.Append(" into ").Append(info.name);
160  if (optimize)
161  b.Append(" [optimize]");
162  if (mergeDocStores)
163  {
164  b.Append(" [mergeDocStores]");
165  }
166  return b.ToString();
167  }
168 
169  public SegmentInfos segments_ForNUnit
170  {
171  get { return segments; }
172  }
173  }
174 
175  /// <summary> A MergeSpecification instance provides the information
176  /// necessary to perform multiple merges. It simply
177  /// contains a list of <see cref="OneMerge" /> instances.
178  /// </summary>
179 
180  public class MergeSpecification
181  {
182 
183  /// <summary> The subset of segments to be included in the primitive merge.</summary>
184 
185  public IList<OneMerge> merges = new List<OneMerge>();
186 
187  public virtual void Add(OneMerge merge)
188  {
189  merges.Add(merge);
190  }
191 
192  public virtual String SegString(Directory dir)
193  {
194  var b = new System.Text.StringBuilder();
195  b.Append("MergeSpec:\n");
196  int count = merges.Count;
197  for (int i = 0; i < count; i++)
198  b.Append(" ").Append(1 + i).Append(": ").Append(merges[i].SegString(dir));
199  return b.ToString();
200  }
201  }
202 
203  /// <summary>Exception thrown if there are any problems while
204  /// executing a merge.
205  /// </summary>
206  [Serializable]
207  public class MergeException:System.SystemException
208  {
209  private readonly Directory dir;
210 
211  public MergeException(System.String message, Directory dir):base(message)
212  {
213  this.dir = dir;
214  }
215 
216  public MergeException(System.Exception exc, Directory dir):base(null, exc)
217  {
218  this.dir = dir;
219  }
220 
221  /// <summary>Returns the <see cref="Directory" /> of the index that hit
222  /// the exception.
223  /// </summary>
224  public virtual Directory Directory
225  {
226  get { return dir; }
227  }
228  }
229 
230  [Serializable]
231  public class MergeAbortedException:System.IO.IOException
232  {
233  public MergeAbortedException():base("merge is aborted")
234  {
235  }
236  public MergeAbortedException(System.String message):base(message)
237  {
238  }
239  }
240 
241  protected internal IndexWriter writer;
242 
243  protected MergePolicy(IndexWriter writer)
244  {
245  this.writer = writer;
246  }
247 
248  /// <summary> Determine what set of merge operations are now necessary on the index.
249  /// <see cref="IndexWriter" /> calls this whenever there is a change to the segments.
250  /// This call is always synchronized on the <see cref="IndexWriter" /> instance so
251  /// only one thread at a time will call this method.
252  ///
253  /// </summary>
254  /// <param name="segmentInfos">the total set of segments in the index
255  /// </param>
256  public abstract MergeSpecification FindMerges(SegmentInfos segmentInfos);
257 
258  /// <summary> Determine what set of merge operations is necessary in order to optimize
259  /// the index. <see cref="IndexWriter" /> calls this when its
260  /// <see cref="IndexWriter.Optimize()" /> method is called. This call is always
261  /// synchronized on the <see cref="IndexWriter" /> instance so only one thread at a
262  /// time will call this method.
263  ///
264  /// </summary>
265  /// <param name="segmentInfos">the total set of segments in the index
266  /// </param>
267  /// <param name="maxSegmentCount">requested maximum number of segments in the index (currently this
268  /// is always 1)
269  /// </param>
270  /// <param name="segmentsToOptimize">contains the specific SegmentInfo instances that must be merged
271  /// away. This may be a subset of all SegmentInfos.
272  /// </param>
273  public abstract MergeSpecification FindMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount,
274  ISet<SegmentInfo> segmentsToOptimize);
275 
276  /// <summary> Determine what set of merge operations is necessary in order to expunge all
277  /// deletes from the index.
278  ///
279  /// </summary>
280  /// <param name="segmentInfos">the total set of segments in the index
281  /// </param>
282  public abstract MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos);
283 
284  /// <summary> Release all resources for the policy.</summary>
285  [Obsolete("Use Dispose() instead")]
286  public void Close()
287  {
288  Dispose();
289  }
290 
291  /// <summary> Release all resources for the policy.</summary>
292  public void Dispose()
293  {
294  Dispose(true);
295  }
296 
297  protected abstract void Dispose(bool disposing);
298 
299  /// <summary> Returns true if a newly flushed (not from merge)
300  /// segment should use the compound file format.
301  /// </summary>
302  public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
303 
304  /// <summary> Returns true if the doc store files should use the
305  /// compound file format.
306  /// </summary>
307  public abstract bool UseCompoundDocStore(SegmentInfos segments);
308  }
309 }