Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
RecursivePrefixTreeFilter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Diagnostics;
21 using Lucene.Net.Search;
22 using Lucene.Net.Spatial.Prefix.Tree;
23 using Lucene.Net.Spatial.Util;
24 using Lucene.Net.Util;
25 using Spatial4n.Core.Shapes;
26 
27 namespace Lucene.Net.Spatial.Prefix
28 {
29  /// <summary>
30  /// Performs a spatial intersection filter against a field indexed with {@link SpatialPrefixTree}, a Trie.
31  /// SPT yields terms (grids) at length 1 and at greater lengths corresponding to greater precisions.
32  /// This filter recursively traverses each grid length and uses methods on {@link Shape} to efficiently know
33  /// that all points at a prefix fit in the shape or not to either short-circuit unnecessary traversals or to efficiently
34  /// load all enclosed points.
35  /// </summary>
37  {
38  /* TODOs for future:
39 
40 Can a polygon query shape be optimized / made-simpler at recursive depths (e.g. intersection of shape + cell box)
41 
42 RE "scan" threshold:
43 // IF configured to do so, we could use term.freq() as an estimate on the number of places at this depth. OR, perhaps
44 // make estimates based on the total known term count at this level?
45 if (!scan) {
46  //Make some estimations on how many points there are at this level and how few there would need to be to set
47  // !scan to false.
48  long termsThreshold = (long) estimateNumberIndexedTerms(cell.length(),queryShape.getDocFreqExpenseThreshold(cell));
49  long thisOrd = termsEnum.ord();
50  scan = (termsEnum.seek(thisOrd+termsThreshold+1) == TermsEnum.SeekStatus.END
51  || !cell.contains(termsEnum.term()));
52  termsEnum.seek(thisOrd);//return to last position
53 }
54 
55 */
56 
57  private readonly String fieldName;
58  private readonly SpatialPrefixTree grid;
59  private readonly Shape queryShape;
60  private readonly int prefixGridScanLevel;//at least one less than grid.getMaxLevels()
61  private readonly int detailLevel;
62 
63  public RecursivePrefixTreeFilter(String fieldName, SpatialPrefixTree grid, Shape queryShape, int prefixGridScanLevel,
64  int detailLevel)
65  {
66  this.fieldName = fieldName;
67  this.grid = grid;
68  this.queryShape = queryShape;
69  this.prefixGridScanLevel = Math.Max(1, Math.Min(prefixGridScanLevel, grid.GetMaxLevels() - 1));
70  this.detailLevel = detailLevel;
71  Debug.Assert(detailLevel <= grid.GetMaxLevels());
72  }
73 
74  public override DocIdSet GetDocIdSet(Index.IndexReader reader /*, Bits acceptDocs*/)
75  {
76  var bits = new OpenBitSet(reader.MaxDoc);
77  var terms = new TermsEnumCompatibility(reader, fieldName);
78  var term = terms.Next();
79  if (term == null)
80  return null;
81  Node scanCell = null;
82 
83  //cells is treated like a stack. LinkedList conveniently has bulk add to beginning. It's in sorted order so that we
84  // always advance forward through the termsEnum index.
85  var cells = new LinkedList<Node>(
86  grid.GetWorldNode().GetSubCells(queryShape));
87 
88  //This is a recursive algorithm that starts with one or more "big" cells, and then recursively dives down into the
89  // first such cell that intersects with the query shape. It's a depth first traversal because we don't move onto
90  // the next big cell (breadth) until we're completely done considering all smaller cells beneath it. For a given
91  // cell, if it's *within* the query shape then we can conveniently short-circuit the depth traversal and
92  // grab all documents assigned to this cell/term. For an intersection of the cell and query shape, we either
93  // recursively step down another grid level or we decide heuristically (via prefixGridScanLevel) that there aren't
94  // that many points, and so we scan through all terms within this cell (i.e. the term starts with the cell's term),
95  // seeing which ones are within the query shape.
96  while (cells.Count > 0)
97  {
98  Node cell = cells.First.Value; cells.RemoveFirst();
99  var cellTerm = cell.GetTokenString();
100  var seekStat = terms.Seek(cellTerm);
101  if (seekStat == TermsEnumCompatibility.SeekStatus.END)
102  break;
103  if (seekStat == TermsEnumCompatibility.SeekStatus.NOT_FOUND)
104  continue;
105  if (cell.GetLevel() == detailLevel || cell.IsLeaf())
106  {
107  terms.Docs(bits);
108  }
109  else
110  {//any other intersection
111  //If the next indexed term is the leaf marker, then add all of them
112  var nextCellTerm = terms.Next();
113  Debug.Assert(nextCellTerm.Text.StartsWith(cellTerm));
114  scanCell = grid.GetNode(nextCellTerm.Text, scanCell);
115  if (scanCell.IsLeaf())
116  {
117  terms.Docs(bits);
118  term = terms.Next();//move pointer to avoid potential redundant addDocs() below
119  }
120 
121  //Decide whether to continue to divide & conquer, or whether it's time to scan through terms beneath this cell.
122  // Scanning is a performance optimization trade-off.
123  bool scan = cell.GetLevel() >= prefixGridScanLevel;//simple heuristic
124 
125  if (!scan)
126  {
127  //Divide & conquer
128  var lst = cell.GetSubCells(queryShape);
129  for (var i = lst.Count - 1; i >= 0; i--) //add to beginning
130  {
131  cells.AddFirst(lst[i]);
132  }
133  }
134  else
135  {
136  //Scan through all terms within this cell to see if they are within the queryShape. No seek()s.
137  for (var t = terms.Term(); t != null && t.Text.StartsWith(cellTerm); t = terms.Next())
138  {
139  scanCell = grid.GetNode(t.Text, scanCell);
140  int termLevel = scanCell.GetLevel();
141  if (termLevel > detailLevel)
142  continue;
143  if (termLevel == detailLevel || scanCell.IsLeaf())
144  {
145  //TODO should put more thought into implications of box vs point
146  Shape cShape = termLevel == grid.GetMaxLevels() ? scanCell.GetCenter() : scanCell.GetShape();
147  if (queryShape.Relate(cShape) == SpatialRelation.DISJOINT)
148  continue;
149 
150  terms.Docs(bits);
151  }
152  }//term loop
153  }
154  }
155  }//cell loop
156 
157  return bits;
158  }
159 
160  public override string ToString()
161  {
162  return "GeoFilter{fieldName='" + fieldName + '\'' + ", shape=" + queryShape + '}';
163  }
164 
165  public override bool Equals(object o)
166  {
167  if (this == o) return true;
168  var that = o as RecursivePrefixTreeFilter;
169 
170  if (that == null) return false;
171 
172  if (!fieldName.Equals(that.fieldName)) return false;
173  //note that we don't need to look at grid since for the same field it should be the same
174  if (prefixGridScanLevel != that.prefixGridScanLevel) return false;
175  if (detailLevel != that.detailLevel) return false;
176  if (!queryShape.Equals(that.queryShape)) return false;
177 
178  return true;
179  }
180 
181  public override int GetHashCode()
182  {
183  int result = fieldName.GetHashCode();
184  result = 31 * result + queryShape.GetHashCode();
185  result = 31 * result + detailLevel;
186  return result;
187  }
188  }
189 }