Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
MergeDocIDRemapper.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 
21 namespace Lucene.Net.Index
22 {
23 
24  /// <summary>Remaps docIDs after a merge has completed, where the
25  /// merged segments had at least one deletion. This is used
26  /// to renumber the buffered deletes in IndexWriter when a
27  /// merge of segments with deletions commits.
28  /// </summary>
29 
30  sealed class MergeDocIDRemapper
31  {
32  internal int[] starts; // used for binary search of mapped docID
33  internal int[] newStarts; // starts, minus the deletes
34  internal int[][] docMaps; // maps docIDs in the merged set
35  internal int minDocID; // minimum docID that needs renumbering
36  internal int maxDocID; // 1+ the max docID that needs renumbering
37  internal int docShift; // total # deleted docs that were compacted by this merge
38 
39  public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
40  {
41  this.docMaps = docMaps;
42  SegmentInfo firstSegment = merge.segments.Info(0);
43  int i = 0;
44  while (true)
45  {
46  SegmentInfo info = infos.Info(i);
47  if (info.Equals(firstSegment))
48  break;
49  minDocID += info.docCount;
50  i++;
51  }
52 
53  int numDocs = 0;
54  for (int j = 0; j < docMaps.Length; i++, j++)
55  {
56  numDocs += infos.Info(i).docCount;
57  System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
58  }
59  maxDocID = minDocID + numDocs;
60 
61  starts = new int[docMaps.Length];
62  newStarts = new int[docMaps.Length];
63 
64  starts[0] = minDocID;
65  newStarts[0] = minDocID;
66  for (i = 1; i < docMaps.Length; i++)
67  {
68  int lastDocCount = merge.segments.Info(i - 1).docCount;
69  starts[i] = starts[i - 1] + lastDocCount;
70  newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
71  }
72  docShift = numDocs - mergedDocCount;
73 
74  // There are rare cases when docShift is 0. It happens
75  // if you try to delete a docID that's out of bounds,
76  // because the SegmentReader still allocates deletedDocs
77  // and pretends it has deletions ... so we can't make
78  // this assert here
79  // assert docShift > 0;
80 
81  // Make sure it all adds up:
82  System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
83  }
84 
85  public int Remap(int oldDocID)
86  {
87  if (oldDocID < minDocID)
88  // Unaffected by merge
89  return oldDocID;
90  else if (oldDocID >= maxDocID)
91  // This doc was "after" the merge, so simple shift
92  return oldDocID - docShift;
93  else
94  {
95  // Binary search to locate this document & find its new docID
96  int lo = 0; // search starts array
97  int hi = docMaps.Length - 1; // for first element less
98 
99  while (hi >= lo)
100  {
101  int mid = Number.URShift((lo + hi), 1);
102  int midValue = starts[mid];
103  if (oldDocID < midValue)
104  hi = mid - 1;
105  else if (oldDocID > midValue)
106  lo = mid + 1;
107  else
108  {
109  // found a match
110  while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue)
111  {
112  mid++; // scan to last match
113  }
114  if (docMaps[mid] != null)
115  return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]];
116  else
117  return newStarts[mid] + oldDocID - starts[mid];
118  }
119  }
120  if (docMaps[hi] != null)
121  return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]];
122  else
123  return newStarts[hi] + oldDocID - starts[hi];
124  }
125  }
126  }
127 }