Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
FieldCacheSanityChecker.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 
21 using IndexReader = Lucene.Net.Index.IndexReader;
22 using FieldCache = Lucene.Net.Search.FieldCache;
23 using CacheEntry = Lucene.Net.Search.CacheEntry;
24 
25 namespace Lucene.Net.Util
26 {
27 
28  /// <summary> Provides methods for sanity checking that entries in the FieldCache
29  /// are not wasteful or inconsistent.
30  /// <p/>
31  /// <p/>
32  /// Lucene 2.9 Introduced numerous enhancements into how the FieldCache
33  /// is used by the low levels of Lucene searching (for Sorting and
34  /// ValueSourceQueries) to improve both the speed for Sorting, as well
35  /// as reopening of IndexReaders. But these changes have shifted the
36  /// usage of FieldCache from "top level" IndexReaders (frequently a
37  /// MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
38  /// As a result, existing applications that directly access the FieldCache
39  /// may find RAM usage increase significantly when upgrading to 2.9 or
40  /// Later. This class provides an API for these applications (or their
41  /// Unit tests) to check at run time if the FieldCache contains "insane"
42  /// usages of the FieldCache.
43  /// <p/>
44  /// <p/>
45  /// <b>EXPERIMENTAL API:</b> This API is considered extremely advanced and
46  /// experimental. It may be removed or altered w/o warning in future releases
47  /// of Lucene.
48  /// <p/>
49  /// </summary>
50  /// <seealso cref="FieldCache">
51  /// </seealso>
52  /// <seealso cref="FieldCacheSanityChecker.Insanity">
53  /// </seealso>
54  /// <seealso cref="FieldCacheSanityChecker.InsanityType">
55  /// </seealso>
56  public sealed class FieldCacheSanityChecker
57  {
58 
59  private RamUsageEstimator ramCalc = null;
61  {
62  /* NOOP */
63  }
64  /// <summary> If set, will be used to estimate size for all CacheEntry objects
65  /// dealt with.
66  /// </summary>
67  public void SetRamUsageEstimator(RamUsageEstimator r)
68  {
69  ramCalc = r;
70  }
71 
72 
73  /// <summary> Quick and dirty convenience method</summary>
74  /// <seealso cref="Check">
75  /// </seealso>
76  public static Insanity[] CheckSanity(FieldCache cache)
77  {
78  return CheckSanity(cache.GetCacheEntries());
79  }
80 
81  /// <summary> Quick and dirty convenience method that instantiates an instance with
82  /// "good defaults" and uses it to test the CacheEntrys
83  /// </summary>
84  /// <seealso cref="Check">
85  /// </seealso>
86  public static Insanity[] CheckSanity(params CacheEntry[] cacheEntries)
87  {
89  // doesn't check for interned
90  sanityChecker.SetRamUsageEstimator(new RamUsageEstimator(false));
91  return sanityChecker.Check(cacheEntries);
92  }
93 
94 
95  /// <summary> Tests a CacheEntry[] for indication of "insane" cache usage.
96  /// <p/>
97  /// NOTE:FieldCache CreationPlaceholder objects are ignored.
98  /// (:TODO: is this a bad idea? are we masking a real problem?)
99  /// <p/>
100  /// </summary>
101  public Insanity[] Check(params CacheEntry[] cacheEntries)
102  {
103  if (null == cacheEntries || 0 == cacheEntries.Length)
104  return new Insanity[0];
105 
106  if (null != ramCalc)
107  {
108  for (int i = 0; i < cacheEntries.Length; i++)
109  {
110  cacheEntries[i].EstimateSize(ramCalc);
111  }
112  }
113 
114  // the indirect mapping lets MapOfSet dedup identical valIds for us
115  //
116  // maps the (valId) identityhashCode of cache values to
117  // sets of CacheEntry instances
118  MapOfSets<int,CacheEntry> valIdToItems = new MapOfSets<int,CacheEntry>(new Dictionary<int,HashSet<CacheEntry>>(17));
119  // maps ReaderField keys to Sets of ValueIds
120  MapOfSets<ReaderField,int> readerFieldToValIds = new MapOfSets<ReaderField,int>(new Dictionary<ReaderField,HashSet<int>>(17));
121  //
122 
123  // any keys that we know result in more then one valId
124  HashSet<ReaderField> valMismatchKeys = new HashSet<ReaderField>();
125 
126  // iterate over all the cacheEntries to get the mappings we'll need
127  for (int i = 0; i < cacheEntries.Length; i++)
128  {
129  CacheEntry item = cacheEntries[i];
130  System.Object val = item.Value;
131 
132  if (val is Lucene.Net.Search.CreationPlaceholder)
133  continue;
134 
135  ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName);
136 
137  System.Int32 valId = val.GetHashCode();
138 
139  // indirect mapping, so the MapOfSet will dedup identical valIds for us
140  valIdToItems.Put(valId, item);
141  if (1 < readerFieldToValIds.Put(rf, valId))
142  {
143  valMismatchKeys.Add(rf);
144  }
145  }
146 
147  List<Insanity> insanity = new List<Insanity>(valMismatchKeys.Count * 3);
148 
149  insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys));
150  insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds));
151 
152  return insanity.ToArray();
153  }
154 
155  /// <summary> Internal helper method used by check that iterates over
156  /// valMismatchKeys and generates a Collection of Insanity
157  /// instances accordingly. The MapOfSets are used to populate
158  /// the Insantiy objects.
159  /// </summary>
160  /// <seealso cref="InsanityType.VALUEMISMATCH">
161  /// </seealso>
162  private List<Insanity> CheckValueMismatch(MapOfSets<int,CacheEntry> valIdToItems,
163  MapOfSets<ReaderField,int> readerFieldToValIds,
164  HashSet<ReaderField> valMismatchKeys)
165  {
166 
167  List<Insanity> insanity = new List<Insanity>(valMismatchKeys.Count * 3);
168 
169  if (!(valMismatchKeys.Count == 0))
170  {
171  // we have multiple values for some ReaderFields
172 
173  IDictionary<ReaderField,HashSet<int>> rfMap = readerFieldToValIds.Map;
174  IDictionary<int,HashSet<CacheEntry>> valMap = valIdToItems.Map;
175  foreach (ReaderField rf in valMismatchKeys)
176  {
177  List<CacheEntry> badEntries = new List<CacheEntry>(valMismatchKeys.Count * 2);
178  foreach (int val in rfMap[rf])
179  {
180  foreach (CacheEntry entry in valMap[val])
181  {
182  badEntries.Add(entry);
183  }
184  }
185 
186  insanity.Add(new Insanity(InsanityType.VALUEMISMATCH, "Multiple distinct value objects for " + rf.ToString(), badEntries.ToArray()));
187  }
188  }
189  return insanity;
190  }
191 
192  /// <summary> Internal helper method used by check that iterates over
193  /// the keys of readerFieldToValIds and generates a Collection
194  /// of Insanity instances whenever two (or more) ReaderField instances are
195  /// found that have an ancestery relationships.
196  ///
197  /// </summary>
198  /// <seealso cref="InsanityType.SUBREADER">
199  /// </seealso>
200  private List<Insanity> CheckSubreaders(MapOfSets<int,CacheEntry> valIdToItems,
201  MapOfSets<ReaderField,int> readerFieldToValIds)
202  {
203  List<Insanity> insanity = new List<Insanity>(23);
204 
205  Dictionary<ReaderField, HashSet<ReaderField>> badChildren = new Dictionary<ReaderField, HashSet<ReaderField>>(17);
206  MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<ReaderField, ReaderField>(badChildren); // wrapper
207 
208  IDictionary<int, HashSet<CacheEntry>> viToItemSets = valIdToItems.Map;
209  IDictionary<ReaderField, HashSet<int>> rfToValIdSets = readerFieldToValIds.Map;
210 
211  HashSet<ReaderField> seen = new HashSet<ReaderField>();
212 
213  foreach (ReaderField rf in rfToValIdSets.Keys)
214  {
215  if (seen.Contains(rf))
216  continue;
217 
218  System.Collections.IList kids = GetAllDecendentReaderKeys(rf.readerKey);
219  foreach (Object kidKey in kids)
220  {
221  ReaderField kid = new ReaderField(kidKey, rf.fieldName);
222 
223  if (badChildren.ContainsKey(kid))
224  {
225  // we've already process this kid as RF and found other problems
226  // track those problems as our own
227  badKids.Put(rf, kid);
228  badKids.PutAll(rf, badChildren[kid]);
229  badChildren.Remove(kid);
230  }
231  else if (rfToValIdSets.ContainsKey(kid))
232  {
233  // we have cache entries for the kid
234  badKids.Put(rf, kid);
235  }
236  seen.Add(kid);
237  }
238  seen.Add(rf);
239  }
240 
241  // every mapping in badKids represents an Insanity
242  foreach (ReaderField parent in badChildren.Keys)
243  {
244  HashSet<ReaderField> kids = badChildren[parent];
245 
246  List<CacheEntry> badEntries = new List<CacheEntry>(kids.Count * 2);
247 
248  // put parent entr(ies) in first
249  {
250  foreach (int val in rfToValIdSets[parent])
251  {
252  badEntries.AddRange(viToItemSets[val]);
253  }
254  }
255 
256  // now the entries for the descendants
257  foreach (ReaderField kid in kids)
258  {
259  foreach (int val in rfToValIdSets[kid])
260  {
261  badEntries.AddRange(viToItemSets[val]);
262  }
263  }
264 
265  insanity.Add(new Insanity(InsanityType.SUBREADER, "Found caches for decendents of " + parent.ToString(), badEntries.ToArray()));
266  }
267 
268  return insanity;
269  }
270 
271  /// <summary> Checks if the seed is an IndexReader, and if so will walk
272  /// the hierarchy of subReaders building up a list of the objects
273  /// returned by obj.getFieldCacheKey()
274  /// </summary>
275  private System.Collections.IList GetAllDecendentReaderKeys(System.Object seed)
276  {
277  List<object> all = new List<object>(17); // will grow as we iter
278  all.Add(seed);
279  for (int i = 0; i < all.Count; i++)
280  {
281  System.Object obj = all[i];
282  if (obj is IndexReader)
283  {
284  IndexReader[] subs = ((IndexReader) obj).GetSequentialSubReaders();
285  for (int j = 0; (null != subs) && (j < subs.Length); j++)
286  {
287  all.Add(subs[j].FieldCacheKey);
288  }
289  }
290  }
291  // need to skip the first, because it was the seed
292  return all.GetRange(1, all.Count - 1);
293  }
294 
295  /// <summary> Simple pair object for using "readerKey + fieldName" a Map key</summary>
296  private sealed class ReaderField
297  {
298  public System.Object readerKey;
299  public System.String fieldName;
300  public ReaderField(System.Object readerKey, System.String fieldName)
301  {
302  this.readerKey = readerKey;
303  this.fieldName = fieldName;
304  }
305  public override int GetHashCode()
306  {
307  return readerKey.GetHashCode() * fieldName.GetHashCode();
308  }
309  public override bool Equals(System.Object that)
310  {
311  if (!(that is ReaderField))
312  return false;
313 
314  ReaderField other = (ReaderField) that;
315  return (this.readerKey == other.readerKey && this.fieldName.Equals(other.fieldName));
316  }
317  public override System.String ToString()
318  {
319  return readerKey.ToString() + "+" + fieldName;
320  }
321  }
322 
323  /// <summary> Simple container for a collection of related CacheEntry objects that
324  /// in conjunction with eachother represent some "insane" usage of the
325  /// FieldCache.
326  /// </summary>
327  public sealed class Insanity
328  {
329  private InsanityType type;
330  private System.String msg;
331  private CacheEntry[] entries;
332  public Insanity(InsanityType type, System.String msg, params CacheEntry[] entries)
333  {
334  if (null == type)
335  {
336  throw new System.ArgumentException("Insanity requires non-null InsanityType");
337  }
338  if (null == entries || 0 == entries.Length)
339  {
340  throw new System.ArgumentException("Insanity requires non-null/non-empty CacheEntry[]");
341  }
342  this.type = type;
343  this.msg = msg;
344  this.entries = entries;
345  }
346 
347  /// <summary> Type of insane behavior this object represents</summary>
348  public InsanityType Type
349  {
350  get { return type; }
351  }
352 
353  /// <summary> Description of hte insane behavior</summary>
354  public string Msg
355  {
356  get { return msg; }
357  }
358 
359  /// <summary> CacheEntry objects which suggest a problem</summary>
360  public CacheEntry[] GetCacheEntries()
361  {
362  return entries;
363  }
364  /// <summary> Multi-Line representation of this Insanity object, starting with
365  /// the Type and Msg, followed by each CacheEntry.toString() on it's
366  /// own line prefaced by a tab character
367  /// </summary>
368  public override System.String ToString()
369  {
370  System.Text.StringBuilder buf = new System.Text.StringBuilder();
371  buf.Append(Type).Append(": ");
372 
373  System.String m = Msg;
374  if (null != m)
375  buf.Append(m);
376 
377  buf.Append('\n');
378 
379  CacheEntry[] ce = GetCacheEntries();
380  for (int i = 0; i < ce.Length; i++)
381  {
382  buf.Append('\t').Append(ce[i].ToString()).Append('\n');
383  }
384 
385  return buf.ToString();
386  }
387  }
388 
389  /// <summary> An Enumaration of the differnet types of "insane" behavior that
390  /// may be detected in a FieldCache.
391  ///
392  /// </summary>
393  /// <seealso cref="InsanityType.SUBREADER">
394  /// </seealso>
395  /// <seealso cref="InsanityType.VALUEMISMATCH">
396  /// </seealso>
397  /// <seealso cref="InsanityType.EXPECTED">
398  /// </seealso>
399  public sealed class InsanityType
400  {
401  private System.String label;
402  internal InsanityType(System.String label)
403  {
404  this.label = label;
405  }
406  public override System.String ToString()
407  {
408  return label;
409  }
410 
411  /// <summary> Indicates an overlap in cache usage on a given field
412  /// in sub/super readers.
413  /// </summary>
414  public static readonly InsanityType SUBREADER = new InsanityType("SUBREADER");
415 
416  /// <summary> <p/>
417  /// Indicates entries have the same reader+fieldname but
418  /// different cached values. This can happen if different datatypes,
419  /// or parsers are used -- and while it's not necessarily a bug
420  /// it's typically an indication of a possible problem.
421  /// <p/>
422  /// <p/>
423  /// PNOTE: Only the reader, fieldname, and cached value are actually
424  /// tested -- if two cache entries have different parsers or datatypes but
425  /// the cached values are the same Object (== not just equal()) this method
426  /// does not consider that a red flag. This allows for subtle variations
427  /// in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...)
428  /// <p/>
429  /// </summary>
430  public static readonly InsanityType VALUEMISMATCH = new InsanityType("VALUEMISMATCH");
431 
432  /// <summary> Indicates an expected bit of "insanity". This may be useful for
433  /// clients that wish to preserve/log information about insane usage
434  /// but indicate that it was expected.
435  /// </summary>
436  public static readonly InsanityType EXPECTED = new InsanityType("EXPECTED");
437  }
438  }
439 }