Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
TopDocsCollector.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Util;
20 
21 namespace Lucene.Net.Search
22 {
23  /// <summary> A base class for all collectors that return a <see cref="Lucene.Net.Search.TopDocs" /> output. This
24  /// collector allows easy extension by providing a single constructor which
25  /// accepts a <see cref="PriorityQueue{T}" /> as well as protected members for that
26  /// priority queue and a counter of the number of total hits.<br/>
27  /// Extending classes can override <see cref="TopDocs(int, int)" /> and
28  /// <see cref="TotalHits" /> in order to provide their own implementation.
29  /// </summary>
30  public abstract class TopDocsCollector<T> : Collector where T : ScoreDoc
31  {
32 
33  // This is used in case topDocs() is called with illegal parameters, or there
34  // simply aren't (enough) results.
35  protected internal static readonly TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], System.Single.NaN);
36 
37  /// <summary> The priority queue which holds the top documents. Note that different
38  /// implementations of PriorityQueue give different meaning to 'top documents'.
39  /// HitQueue for example aggregates the top scoring documents, while other PQ
40  /// implementations may hold documents sorted by other criteria.
41  /// </summary>
42  protected internal PriorityQueue<T> pq;
43 
44  /// <summary>The total number of documents that the collector encountered. </summary>
45  protected internal int internalTotalHits;
46 
47  protected internal TopDocsCollector(PriorityQueue<T> pq)
48  {
49  this.pq = pq;
50  }
51 
52  /// <summary> Populates the results array with the ScoreDoc instaces. This can be
53  /// overridden in case a different ScoreDoc type should be returned.
54  /// </summary>
55  protected internal virtual void PopulateResults(ScoreDoc[] results, int howMany)
56  {
57  for (int i = howMany - 1; i >= 0; i--)
58  {
59  results[i] = pq.Pop();
60  }
61  }
62 
63  /// <summary> Returns a <see cref="Lucene.Net.Search.TopDocs" /> instance containing the given results. If
64  /// <c>results</c> is null it means there are no results to return,
65  /// either because there were 0 calls to collect() or because the arguments to
66  /// topDocs were invalid.
67  /// </summary>
68  public /*protected internal*/ virtual TopDocs NewTopDocs(ScoreDoc[] results, int start)
69  {
70  return results == null?EMPTY_TOPDOCS:new TopDocs(internalTotalHits, results);
71  }
72 
73  /// <summary>The total number of documents that matched this query. </summary>
74  public virtual int TotalHits
75  {
76  get { return internalTotalHits; }
77  }
78 
79  /// <summary>Returns the top docs that were collected by this collector. </summary>
80  public TopDocs TopDocs()
81  {
82  // In case pq was populated with sentinel values, there might be less
83  // results than pq.size(). Therefore return all results until either
84  // pq.size() or totalHits.
85  return TopDocs(0, internalTotalHits < pq.Size()?internalTotalHits:pq.Size());
86  }
87 
88  /// <summary> Returns the documents in the rage [start .. pq.size()) that were collected
89  /// by this collector. Note that if start >= pq.size(), an empty TopDocs is
90  /// returned.<br/>
91  /// This method is convenient to call if the application allways asks for the
92  /// last results, starting from the last 'page'.<br/>
93  /// <b>NOTE:</b> you cannot call this method more than once for each search
94  /// execution. If you need to call it more than once, passing each time a
95  /// different <c>start</c>, you should call <see cref="TopDocs()" /> and work
96  /// with the returned <see cref="Lucene.Net.Search.TopDocs" /> object, which will contain all the
97  /// results this search execution collected.
98  /// </summary>
99  public TopDocs TopDocs(int start)
100  {
101  // In case pq was populated with sentinel values, there might be less
102  // results than pq.size(). Therefore return all results until either
103  // pq.size() or totalHits.
104  return TopDocs(start, internalTotalHits < pq.Size()?internalTotalHits:pq.Size());
105  }
106 
107  /// <summary> Returns the documents in the rage [start .. start+howMany) that were
108  /// collected by this collector. Note that if start >= pq.size(), an empty
109  /// TopDocs is returned, and if pq.size() - start &lt; howMany, then only the
110  /// available documents in [start .. pq.size()) are returned.<br/>
111  /// This method is useful to call in case pagination of search results is
112  /// allowed by the search application, as well as it attempts to optimize the
113  /// memory used by allocating only as much as requested by howMany.<br/>
114  /// <b>NOTE:</b> you cannot call this method more than once for each search
115  /// execution. If you need to call it more than once, passing each time a
116  /// different range, you should call <see cref="TopDocs()" /> and work with the
117  /// returned <see cref="Lucene.Net.Search.TopDocs" /> object, which will contain all the results this
118  /// search execution collected.
119  /// </summary>
120  public TopDocs TopDocs(int start, int howMany)
121  {
122 
123  // In case pq was populated with sentinel values, there might be less
124  // results than pq.size(). Therefore return all results until either
125  // pq.size() or totalHits.
126  int size = internalTotalHits < pq.Size()?internalTotalHits:pq.Size();
127 
128  // Don't bother to throw an exception, just return an empty TopDocs in case
129  // the parameters are invalid or out of range.
130  if (start < 0 || start >= size || howMany <= 0)
131  {
132  return NewTopDocs(null, start);
133  }
134 
135  // We know that start < pqsize, so just fix howMany.
136  howMany = System.Math.Min(size - start, howMany);
137  ScoreDoc[] results = new ScoreDoc[howMany];
138 
139  // pq's pop() returns the 'least' element in the queue, therefore need
140  // to discard the first ones, until we reach the requested range.
141  // Note that this loop will usually not be executed, since the common usage
142  // should be that the caller asks for the last howMany results. However it's
143  // needed here for completeness.
144  for (int i = pq.Size() - start - howMany; i > 0; i--)
145  {
146  pq.Pop();
147  }
148 
149  // Get the requested results from pq.
150  PopulateResults(results, howMany);
151 
152  return NewTopDocs(results, start);
153  }
154  }
155 }