Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
Collector.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 
20 using IndexReader = Lucene.Net.Index.IndexReader;
21 
22 namespace Lucene.Net.Search
23 {
24 
25  /// <summary> <p/>Expert: Collectors are primarily meant to be used to
26  /// gather raw results from a search, and implement sorting
27  /// or custom result filtering, collation, etc. <p/>
28  ///
29  /// <p/>Lucene's core collectors are derived from Collector.
30  /// Likely your application can use one of these classes, or
31  /// subclass <see cref="TopDocsCollector{T}" />, instead of
32  /// implementing Collector directly:
33  ///
34  /// <list type="bullet">
35  ///
36  /// <item><see cref="TopDocsCollector{T}" /> is an abstract base class
37  /// that assumes you will retrieve the top N docs,
38  /// according to some criteria, after collection is
39  /// done. </item>
40  ///
41  /// <item><see cref="TopScoreDocCollector" /> is a concrete subclass
42  /// <see cref="TopDocsCollector{T}" /> and sorts according to score +
43  /// docID. This is used internally by the <see cref="IndexSearcher" />
44  /// search methods that do not take an
45  /// explicit <see cref="Sort" />. It is likely the most frequently
46  /// used collector.</item>
47  ///
48  /// <item><see cref="TopFieldCollector" /> subclasses <see cref="TopDocsCollector{T}" />
49  /// and sorts according to a specified
50  /// <see cref="Sort" /> object (sort by field). This is used
51  /// internally by the <see cref="IndexSearcher" /> search methods
52  /// that take an explicit <see cref="Sort" />.</item>
53  ///
54  /// <item><see cref="TimeLimitingCollector" />, which wraps any other
55  /// Collector and aborts the search if it's taken too much
56  /// time.</item>
57  ///
58  /// <item><see cref="PositiveScoresOnlyCollector" /> wraps any other
59  /// Collector and prevents collection of hits whose score
60  /// is &lt;= 0.0</item>
61  ///
62  /// </list>
63  ///
64  /// <p/>Collector decouples the score from the collected doc:
65  /// the score computation is skipped entirely if it's not
66  /// needed. Collectors that do need the score should
67  /// implement the <see cref="SetScorer" /> method, to hold onto the
68  /// passed <see cref="Scorer" /> instance, and call <see cref="Scorer.Score()" />
69  /// within the collect method to compute the
70  /// current hit's score. If your collector may request the
71  /// score for a single hit multiple times, you should use
72  /// <see cref="ScoreCachingWrappingScorer" />. <p/>
73  ///
74  /// <p/><b>NOTE:</b> The doc that is passed to the collect
75  /// method is relative to the current reader. If your
76  /// collector needs to resolve this to the docID space of the
77  /// Multi*Reader, you must re-base it by recording the
78  /// docBase from the most recent setNextReader call. Here's
79  /// a simple example showing how to collect docIDs into a
80  /// BitSet:<p/>
81  ///
82  /// <code>
83  /// Searcher searcher = new IndexSearcher(indexReader);
84  /// final BitSet bits = new BitSet(indexReader.MaxDoc);
85  /// searcher.search(query, new Collector() {
86  /// private int docBase;
87  ///
88  /// <em>// ignore scorer</em>
89  /// public void setScorer(Scorer scorer) {
90  /// }
91  ///
92  /// <em>// accept docs out of order (for a BitSet it doesn't matter)</em>
93  /// public boolean acceptsDocsOutOfOrder() {
94  /// return true;
95  /// }
96  ///
97  /// public void collect(int doc) {
98  /// bits.set(doc + docBase);
99  /// }
100  ///
101  /// public void setNextReader(IndexReader reader, int docBase) {
102  /// this.docBase = docBase;
103  /// }
104  /// });
105  /// </code>
106  ///
107  /// <p/>Not all collectors will need to rebase the docID. For
108  /// example, a collector that simply counts the total number
109  /// of hits would skip it.<p/>
110  ///
111  /// <p/><b>NOTE:</b> Prior to 2.9, Lucene silently filtered
112  /// out hits with score &lt;= 0. As of 2.9, the core Collectors
113  /// no longer do that. It's very unusual to have such hits
114  /// (a negative query boost, or function query returning
115  /// negative custom scores, could cause it to happen). If
116  /// you need that behavior, use <see cref="PositiveScoresOnlyCollector" />
117  ///.<p/>
118  ///
119  /// <p/><b>NOTE:</b> This API is experimental and might change
120  /// in incompatible ways in the next release.<p/>
121  ///
122  /// </summary>
123  /// <since> 2.9
124  /// </since>
125  public abstract class Collector
126  {
127 
128  /// <summary> Called before successive calls to <see cref="Collect(int)" />. Implementations
129  /// that need the score of the current document (passed-in to
130  /// <see cref="Collect(int)" />), should save the passed-in Scorer and call
131  /// scorer.score() when needed.
132  /// </summary>
133  public abstract void SetScorer(Scorer scorer);
134 
135  /// <summary> Called once for every document matching a query, with the unbased document
136  /// number.
137  ///
138  /// <p/>
139  /// Note: This is called in an inner search loop. For good search performance,
140  /// implementations of this method should not call <see cref="Searcher.Doc(int)" /> or
141  /// <see cref="Lucene.Net.Index.IndexReader.Document(int)" /> on every hit.
142  /// Doing so can slow searches by an order of magnitude or more.
143  /// </summary>
144  public abstract void Collect(int doc);
145 
146  /// <summary> Called before collecting from each IndexReader. All doc ids in
147  /// <see cref="Collect(int)" /> will correspond to reader.
148  ///
149  /// Add docBase to the current IndexReaders internal document id to re-base ids
150  /// in <see cref="Collect(int)" />.
151  ///
152  /// </summary>
153  /// <param name="reader">next IndexReader
154  /// </param>
155  /// <param name="docBase">
156  /// </param>
157  public abstract void SetNextReader(IndexReader reader, int docBase);
158 
159  /// <summary>
160  /// Return <c>true</c> if this collector does not
161  /// require the matching docIDs to be delivered in int sort
162  /// order (smallest to largest) to <see cref="Collect" />.
163  /// <p/> Most Lucene Query implementations will visit
164  /// matching docIDs in order. However, some queries
165  /// (currently limited to certain cases of <see cref="BooleanQuery" />)
166  /// can achieve faster searching if the
167  /// <c>Collector</c> allows them to deliver the
168  /// docIDs out of order.
169  /// <p/> Many collectors don't mind getting docIDs out of
170  /// order, so it's important to return <c>true</c>
171  /// here.
172  /// </summary>
173  /// <value> </value>
174  public abstract bool AcceptsDocsOutOfOrder { get; }
175  }
176 }