Lucene.Net
3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
Main Page
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Properties
Pages
core
Search
Collector.cs
Go to the documentation of this file.
1
/*
2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*/
17
18
using
System;
19
20
using
IndexReader
= Lucene.Net.Index.IndexReader;
21
22
namespace
Lucene.Net.Search
23
{
24
25
/// <summary> <p/>Expert: Collectors are primarily meant to be used to
26
/// gather raw results from a search, and implement sorting
27
/// or custom result filtering, collation, etc. <p/>
28
///
29
/// <p/>Lucene's core collectors are derived from Collector.
30
/// Likely your application can use one of these classes, or
31
/// subclass <see cref="TopDocsCollector{T}" />, instead of
32
/// implementing Collector directly:
33
///
34
/// <list type="bullet">
35
///
36
/// <item><see cref="TopDocsCollector{T}" /> is an abstract base class
37
/// that assumes you will retrieve the top N docs,
38
/// according to some criteria, after collection is
39
/// done. </item>
40
///
41
/// <item><see cref="TopScoreDocCollector" /> is a concrete subclass
42
/// <see cref="TopDocsCollector{T}" /> and sorts according to score +
43
/// docID. This is used internally by the <see cref="IndexSearcher" />
44
/// search methods that do not take an
45
/// explicit <see cref="Sort" />. It is likely the most frequently
46
/// used collector.</item>
47
///
48
/// <item><see cref="TopFieldCollector" /> subclasses <see cref="TopDocsCollector{T}" />
49
/// and sorts according to a specified
50
/// <see cref="Sort" /> object (sort by field). This is used
51
/// internally by the <see cref="IndexSearcher" /> search methods
52
/// that take an explicit <see cref="Sort" />.</item>
53
///
54
/// <item><see cref="TimeLimitingCollector" />, which wraps any other
55
/// Collector and aborts the search if it's taken too much
56
/// time.</item>
57
///
58
/// <item><see cref="PositiveScoresOnlyCollector" /> wraps any other
59
/// Collector and prevents collection of hits whose score
60
/// is <= 0.0</item>
61
///
62
/// </list>
63
///
64
/// <p/>Collector decouples the score from the collected doc:
65
/// the score computation is skipped entirely if it's not
66
/// needed. Collectors that do need the score should
67
/// implement the <see cref="SetScorer" /> method, to hold onto the
68
/// passed <see cref="Scorer" /> instance, and call <see cref="Scorer.Score()" />
69
/// within the collect method to compute the
70
/// current hit's score. If your collector may request the
71
/// score for a single hit multiple times, you should use
72
/// <see cref="ScoreCachingWrappingScorer" />. <p/>
73
///
74
/// <p/><b>NOTE:</b> The doc that is passed to the collect
75
/// method is relative to the current reader. If your
76
/// collector needs to resolve this to the docID space of the
77
/// Multi*Reader, you must re-base it by recording the
78
/// docBase from the most recent setNextReader call. Here's
79
/// a simple example showing how to collect docIDs into a
80
/// BitSet:<p/>
81
///
82
/// <code>
83
/// Searcher searcher = new IndexSearcher(indexReader);
84
/// final BitSet bits = new BitSet(indexReader.MaxDoc);
85
/// searcher.search(query, new Collector() {
86
/// private int docBase;
87
///
88
/// <em>// ignore scorer</em>
89
/// public void setScorer(Scorer scorer) {
90
/// }
91
///
92
/// <em>// accept docs out of order (for a BitSet it doesn't matter)</em>
93
/// public boolean acceptsDocsOutOfOrder() {
94
/// return true;
95
/// }
96
///
97
/// public void collect(int doc) {
98
/// bits.set(doc + docBase);
99
/// }
100
///
101
/// public void setNextReader(IndexReader reader, int docBase) {
102
/// this.docBase = docBase;
103
/// }
104
/// });
105
/// </code>
106
///
107
/// <p/>Not all collectors will need to rebase the docID. For
108
/// example, a collector that simply counts the total number
109
/// of hits would skip it.<p/>
110
///
111
/// <p/><b>NOTE:</b> Prior to 2.9, Lucene silently filtered
112
/// out hits with score <= 0. As of 2.9, the core Collectors
113
/// no longer do that. It's very unusual to have such hits
114
/// (a negative query boost, or function query returning
115
/// negative custom scores, could cause it to happen). If
116
/// you need that behavior, use <see cref="PositiveScoresOnlyCollector" />
117
///.<p/>
118
///
119
/// <p/><b>NOTE:</b> This API is experimental and might change
120
/// in incompatible ways in the next release.<p/>
121
///
122
/// </summary>
123
/// <since> 2.9
124
/// </since>
125
public
abstract
class
Collector
126
{
127
128
/// <summary> Called before successive calls to <see cref="Collect(int)" />. Implementations
129
/// that need the score of the current document (passed-in to
130
/// <see cref="Collect(int)" />), should save the passed-in Scorer and call
131
/// scorer.score() when needed.
132
/// </summary>
133
public
abstract
void
SetScorer(
Scorer
scorer);
134
135
/// <summary> Called once for every document matching a query, with the unbased document
136
/// number.
137
///
138
/// <p/>
139
/// Note: This is called in an inner search loop. For good search performance,
140
/// implementations of this method should not call <see cref="Searcher.Doc(int)" /> or
141
/// <see cref="Lucene.Net.Index.IndexReader.Document(int)" /> on every hit.
142
/// Doing so can slow searches by an order of magnitude or more.
143
/// </summary>
144
public
abstract
void
Collect(
int
doc);
145
146
/// <summary> Called before collecting from each IndexReader. All doc ids in
147
/// <see cref="Collect(int)" /> will correspond to reader.
148
///
149
/// Add docBase to the current IndexReaders internal document id to re-base ids
150
/// in <see cref="Collect(int)" />.
151
///
152
/// </summary>
153
/// <param name="reader">next IndexReader
154
/// </param>
155
/// <param name="docBase">
156
/// </param>
157
public
abstract
void
SetNextReader(
IndexReader
reader,
int
docBase);
158
159
/// <summary>
160
/// Return <c>true</c> if this collector does not
161
/// require the matching docIDs to be delivered in int sort
162
/// order (smallest to largest) to <see cref="Collect" />.
163
/// <p/> Most Lucene Query implementations will visit
164
/// matching docIDs in order. However, some queries
165
/// (currently limited to certain cases of <see cref="BooleanQuery" />)
166
/// can achieve faster searching if the
167
/// <c>Collector</c> allows them to deliver the
168
/// docIDs out of order.
169
/// <p/> Many collectors don't mind getting docIDs out of
170
/// order, so it's important to return <c>true</c>
171
/// here.
172
/// </summary>
173
/// <value> </value>
174
public
abstract
bool
AcceptsDocsOutOfOrder {
get
; }
175
}
176
}
Generated on Thu Jan 3 2013 02:34:12 for Lucene.Net by
1.8.3