Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
TermInfosReader.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 using Lucene.Net.Util;
21 using Lucene.Net.Util.Cache;
22 using Directory = Lucene.Net.Store.Directory;
23 
24 namespace Lucene.Net.Index
25 {
26 
31 
32  sealed class TermInfosReader : IDisposable
33  {
34  private readonly Directory directory;
35  private readonly String segment;
36  private readonly FieldInfos fieldInfos;
37 
38  private bool isDisposed;
39 
41  private readonly SegmentTermEnum origEnum;
42  private readonly long size;
43 
44  private readonly Term[] indexTerms;
45  private readonly TermInfo[] indexInfos;
46  private readonly long[] indexPointers;
47 
48  private readonly int totalIndexInterval;
49 
50  private const int DEFAULT_CACHE_SIZE = 1024;
51 
53  private sealed class ThreadResources
54  {
55  internal SegmentTermEnum termEnum;
56 
57  // Used for caching the least recently looked-up Terms
58  internal Cache<Term, TermInfo> termInfoCache;
59  }
60 
61  internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
62  {
63  bool success = false;
64 
65  if (indexDivisor < 1 && indexDivisor != - 1)
66  {
67  throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
68  }
69 
70  try
71  {
72  directory = dir;
73  segment = seg;
74  fieldInfos = fis;
75 
76  origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
77  size = origEnum.size;
78 
79 
80  if (indexDivisor != - 1)
81  {
82  // Load terms index
83  totalIndexInterval = origEnum.indexInterval * indexDivisor;
84  var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
85 
86  try
87  {
88  int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
89 
90  indexTerms = new Term[indexSize];
91  indexInfos = new TermInfo[indexSize];
92  indexPointers = new long[indexSize];
93 
94  for (int i = 0; indexEnum.Next(); i++)
95  {
96  indexTerms[i] = indexEnum.Term;
97  indexInfos[i] = indexEnum.TermInfo();
98  indexPointers[i] = indexEnum.indexPointer;
99 
100  for (int j = 1; j < indexDivisor; j++)
101  if (!indexEnum.Next())
102  break;
103  }
104  }
105  finally
106  {
107  indexEnum.Close();
108  }
109  }
110  else
111  {
112  // Do not load terms index:
113  totalIndexInterval = - 1;
114  indexTerms = null;
115  indexInfos = null;
116  indexPointers = null;
117  }
118  success = true;
119  }
120  finally
121  {
122  // With lock-less commits, it's entirely possible (and
123  // fine) to hit a FileNotFound exception above. In
124  // this case, we want to explicitly close any subset
125  // of things that were opened so that we don't have to
126  // wait for a GC to do so.
127  if (!success)
128  {
129  Dispose();
130  }
131  }
132  }
133 
134  public int SkipInterval
135  {
136  get { return origEnum.skipInterval; }
137  }
138 
139  public int MaxSkipLevels
140  {
141  get { return origEnum.maxSkipLevels; }
142  }
143 
144  public void Dispose()
145  {
146  if (isDisposed) return;
147 
148  // Move to protected method if class becomes unsealed
149  if (origEnum != null)
150  origEnum.Dispose();
151  threadResources.Dispose();
152 
153  isDisposed = true;
154  }
155 
157  internal long Size()
158  {
159  return size;
160  }
161 
162  private ThreadResources GetThreadResources()
163  {
164  ThreadResources resources = threadResources.Get();
165  if (resources == null)
166  {
167  resources = new ThreadResources
168  {termEnum = Terms(), termInfoCache = new SimpleLRUCache<Term, TermInfo>(DEFAULT_CACHE_SIZE)};
169  // Cache does not have to be thread-safe, it is only used by one thread at the same time
170  threadResources.Set(resources);
171  }
172  return resources;
173  }
174 
175 
177  private int GetIndexOffset(Term term)
178  {
179  int lo = 0; // binary search indexTerms[]
180  int hi = indexTerms.Length - 1;
181 
182  while (hi >= lo)
183  {
184  int mid = Number.URShift((lo + hi), 1);
185  int delta = term.CompareTo(indexTerms[mid]);
186  if (delta < 0)
187  hi = mid - 1;
188  else if (delta > 0)
189  lo = mid + 1;
190  else
191  return mid;
192  }
193  return hi;
194  }
195 
196  private void SeekEnum(SegmentTermEnum enumerator, int indexOffset)
197  {
198  enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
199  }
200 
202  internal TermInfo Get(Term term)
203  {
204  return Get(term, true);
205  }
206 
208  private TermInfo Get(Term term, bool useCache)
209  {
210  if (size == 0)
211  return null;
212 
213  EnsureIndexIsRead();
214 
215  TermInfo ti;
216  ThreadResources resources = GetThreadResources();
217  Cache<Term, TermInfo> cache = null;
218 
219  if (useCache)
220  {
221  cache = resources.termInfoCache;
222  // check the cache first if the term was recently looked up
223  ti = cache.Get(term);
224  if (ti != null)
225  {
226  return ti;
227  }
228  }
229 
230  // optimize sequential access: first try scanning cached enum w/o seeking
231  SegmentTermEnum enumerator = resources.termEnum;
232  if (enumerator.Term != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term) >= 0))
233  {
234  int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1;
235  if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
236  {
237  // no need to seek
238 
239  int numScans = enumerator.ScanTo(term);
240  if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
241  {
242  ti = enumerator.TermInfo();
243  if (cache != null && numScans > 1)
244  {
245  // we only want to put this TermInfo into the cache if
246  // scanEnum skipped more than one dictionary entry.
247  // This prevents RangeQueries or WildcardQueries to
248  // wipe out the cache when they iterate over a large numbers
249  // of terms in order
250  cache.Put(term, ti);
251  }
252  }
253  else
254  {
255  ti = null;
256  }
257 
258  return ti;
259  }
260  }
261 
262  // random-access: must seek
263  SeekEnum(enumerator, GetIndexOffset(term));
264  enumerator.ScanTo(term);
265  if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
266  {
267  ti = enumerator.TermInfo();
268  if (cache != null)
269  {
270  cache.Put(term, ti);
271  }
272  }
273  else
274  {
275  ti = null;
276  }
277  return ti;
278  }
279 
280  private void EnsureIndexIsRead()
281  {
282  if (indexTerms == null)
283  {
284  throw new SystemException("terms index was not loaded when this reader was created");
285  }
286  }
287 
289  internal long GetPosition(Term term)
290  {
291  if (size == 0)
292  return - 1;
293 
294  EnsureIndexIsRead();
295  int indexOffset = GetIndexOffset(term);
296 
297  SegmentTermEnum enumerator = GetThreadResources().termEnum;
298  SeekEnum(enumerator, indexOffset);
299 
300  while (term.CompareTo(enumerator.Term) > 0 && enumerator.Next())
301  {
302  }
303 
304  if (term.CompareTo(enumerator.Term) == 0)
305  return enumerator.position;
306  else
307  return - 1;
308  }
309 
311  public SegmentTermEnum Terms()
312  {
313  return (SegmentTermEnum) origEnum.Clone();
314  }
315 
317  public SegmentTermEnum Terms(Term term)
318  {
319  // don't use the cache in this call because we want to reposition the
320  // enumeration
321  Get(term, false);
322  return (SegmentTermEnum) GetThreadResources().termEnum.Clone();
323  }
324  }
325 }