Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
BooleanScorer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 
20 using IndexReader = Lucene.Net.Index.IndexReader;
21 
22 namespace Lucene.Net.Search
23 {
24 
25  /* Description from Doug Cutting (excerpted from
26  * LUCENE-1483):
27  *
28  * BooleanScorer uses a ~16k array to score windows of
29  * docs. So it scores docs 0-16k first, then docs 16-32k,
30  * etc. For each window it iterates through all query terms
31  * and accumulates a score in table[doc%16k]. It also stores
32  * in the table a bitmask representing which terms
33  * contributed to the score. Non-zero scores are chained in
34  * a linked list. At the end of scoring each window it then
35  * iterates through the linked list and, if the bitmask
36  * matches the boolean constraints, collects a hit. For
37  * boolean queries with lots of frequent terms this can be
38  * much faster, since it does not need to update a priority
39  * queue for each posting, instead performing constant-time
40  * operations per posting. The only downside is that it
41  * results in hits being delivered out-of-order within the
42  * window, which means it cannot be nested within other
43  * scorers. But it works well as a top-level scorer.
44  *
45  * The new BooleanScorer2 implementation instead works by
46  * merging priority queues of postings, albeit with some
47  * clever tricks. For example, a pure conjunction (all terms
48  * required) does not require a priority queue. Instead it
49  * sorts the posting streams at the start, then repeatedly
50  * skips the first to to the last. If the first ever equals
51  * the last, then there's a hit. When some terms are
52  * required and some terms are optional, the conjunction can
53  * be evaluated first, then the optional terms can all skip
54  * to the match and be added to the score. Thus the
55  * conjunction can reduce the number of priority queue
56  * updates for the optional terms. */
57 
58  public sealed class BooleanScorer:Scorer
59  {
60  private void InitBlock()
61  {
62  bucketTable = new BucketTable();
63  }
64 
65  private sealed class BooleanScorerCollector:Collector
66  {
67  private BucketTable bucketTable;
68  private int mask;
69  private Scorer scorer;
70 
71  public BooleanScorerCollector(int mask, BucketTable bucketTable)
72  {
73  this.mask = mask;
74  this.bucketTable = bucketTable;
75  }
76  public override void Collect(int doc)
77  {
78  BucketTable table = bucketTable;
79  int i = doc & Lucene.Net.Search.BooleanScorer.BucketTable.MASK;
80  Bucket bucket = table.buckets[i];
81  if (bucket == null)
82  table.buckets[i] = bucket = new Bucket();
83 
84  if (bucket.doc != doc)
85  {
86  // invalid bucket
87  bucket.doc = doc; // set doc
88  bucket.score = scorer.Score(); // initialize score
89  bucket.bits = mask; // initialize mask
90  bucket.coord = 1; // initialize coord
91 
92  bucket.next = table.first; // push onto valid list
93  table.first = bucket;
94  }
95  else
96  {
97  // valid bucket
98  bucket.score += scorer.Score(); // increment score
99  bucket.bits |= mask; // add bits in mask
100  bucket.coord++; // increment coord
101  }
102  }
103 
104  public override void SetNextReader(IndexReader reader, int docBase)
105  {
106  // not needed by this implementation
107  }
108 
109  public override void SetScorer(Scorer scorer)
110  {
111  this.scorer = scorer;
112  }
113 
114  public override bool AcceptsDocsOutOfOrder
115  {
116  get { return true; }
117  }
118  }
119 
120  // An internal class which is used in score(Collector, int) for setting the
121  // current score. This is required since Collector exposes a setScorer method
122  // and implementations that need the score will call scorer.score().
123  // Therefore the only methods that are implemented are score() and doc().
124  private sealed class BucketScorer:Scorer
125  {
126 
127  internal float score;
128  internal int doc = NO_MORE_DOCS;
129 
130  public BucketScorer():base(null)
131  {
132  }
133 
134  public override int Advance(int target)
135  {
136  return NO_MORE_DOCS;
137  }
138 
139  public override int DocID()
140  {
141  return doc;
142  }
143 
144  public override int NextDoc()
145  {
146  return NO_MORE_DOCS;
147  }
148 
149  public override float Score()
150  {
151  return score;
152  }
153  }
154 
155  internal sealed class Bucket
156  {
157  internal int doc = - 1; // tells if bucket is valid
158  internal float score; // incremental score
159  internal int bits; // used for bool constraints
160  internal int coord; // count of terms in score
161  internal Bucket next; // next valid bucket
162  }
163 
165  internal sealed class BucketTable
166  {
167  private void InitBlock()
168  {
169  buckets = new Bucket[SIZE];
170  }
171  public const int SIZE = 1 << 11;
172  public static readonly int MASK;
173 
174  internal Bucket[] buckets;
175  internal Bucket first = null; // head of valid list
176 
177  public BucketTable()
178  {
179  InitBlock();
180  }
181 
182  public Collector NewCollector(int mask)
183  {
184  return new BooleanScorerCollector(mask, this);
185  }
186 
187  public int Size()
188  {
189  return SIZE;
190  }
191  static BucketTable()
192  {
193  MASK = SIZE - 1;
194  }
195  }
196 
197  internal sealed class SubScorer
198  {
199  public Scorer scorer;
200  public bool required = false;
201  public bool prohibited = false;
202  public Collector collector;
203  public SubScorer next;
204 
205  public SubScorer(Scorer scorer, bool required, bool prohibited, Collector collector, SubScorer next)
206  {
207  this.scorer = scorer;
208  this.required = required;
209  this.prohibited = prohibited;
210  this.collector = collector;
211  this.next = next;
212  }
213  }
214 
215  private SubScorer scorers = null;
216  private BucketTable bucketTable;
217  private int maxCoord = 1;
218  private float[] coordFactors;
219  private int requiredMask = 0;
220  private int prohibitedMask = 0;
221  private int nextMask = 1;
222  private int minNrShouldMatch;
223  private int end;
224  private Bucket current;
225  private int doc = - 1;
226 
227  public /*internal*/ BooleanScorer(Similarity similarity, int minNrShouldMatch,
228  System.Collections.Generic.List<Scorer> optionalScorers, System.Collections.Generic.List<Scorer> prohibitedScorers)
229  : base(similarity)
230  {
231  InitBlock();
232  this.minNrShouldMatch = minNrShouldMatch;
233 
234  if (optionalScorers != null && optionalScorers.Count > 0)
235  {
236  foreach (Scorer scorer in optionalScorers)
237  {
238  maxCoord++;
239  if (scorer.NextDoc() != NO_MORE_DOCS)
240  {
241  scorers = new SubScorer(scorer, false, false, bucketTable.NewCollector(0), scorers);
242  }
243  }
244  }
245 
246  if (prohibitedScorers != null && prohibitedScorers.Count > 0)
247  {
248  foreach(Scorer scorer in prohibitedScorers)
249  {
250  int mask = nextMask;
251  nextMask = nextMask << 1;
252  prohibitedMask |= mask; // update prohibited mask
253  if (scorer.NextDoc() != NO_MORE_DOCS)
254  {
255  scorers = new SubScorer(scorer, false, true, bucketTable.NewCollector(mask), scorers);
256  }
257  }
258  }
259 
260  coordFactors = new float[maxCoord];
261  Similarity sim = Similarity;
262  for (int i = 0; i < maxCoord; i++)
263  {
264  coordFactors[i] = sim.Coord(i, maxCoord - 1);
265  }
266  }
267 
268  // firstDocID is ignored since nextDoc() initializes 'current'
269  public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID)
270  {
271  bool more;
272  Bucket tmp;
273  BucketScorer bs = new BucketScorer();
274  // The internal loop will set the score and doc before calling collect.
275  collector.SetScorer(bs);
276  do
277  {
278  bucketTable.first = null;
279 
280  while (current != null)
281  {
282  // more queued
283 
284  // check prohibited & required
285  if ((current.bits & prohibitedMask) == 0 && (current.bits & requiredMask) == requiredMask)
286  {
287 
288  if (current.doc >= max)
289  {
290  tmp = current;
291  current = current.next;
292  tmp.next = bucketTable.first;
293  bucketTable.first = tmp;
294  continue;
295  }
296 
297  if (current.coord >= minNrShouldMatch)
298  {
299  bs.score = current.score * coordFactors[current.coord];
300  bs.doc = current.doc;
301  collector.Collect(current.doc);
302  }
303  }
304 
305  current = current.next; // pop the queue
306  }
307 
308  if (bucketTable.first != null)
309  {
310  current = bucketTable.first;
311  bucketTable.first = current.next;
312  return true;
313  }
314 
315  // refill the queue
316  more = false;
317  end += BucketTable.SIZE;
318  for (SubScorer sub = scorers; sub != null; sub = sub.next)
319  {
320  int subScorerDocID = sub.scorer.DocID();
321  if (subScorerDocID != NO_MORE_DOCS)
322  {
323  more |= sub.scorer.Score(sub.collector, end, subScorerDocID);
324  }
325  }
326  current = bucketTable.first;
327  }
328  while (current != null || more);
329 
330  return false;
331  }
332 
333  public override int Advance(int target)
334  {
335  throw new System.NotSupportedException();
336  }
337 
338  public override int DocID()
339  {
340  return doc;
341  }
342 
343  public override int NextDoc()
344  {
345  bool more;
346  do
347  {
348  while (bucketTable.first != null)
349  {
350  // more queued
351  current = bucketTable.first;
352  bucketTable.first = current.next; // pop the queue
353 
354  // check prohibited & required, and minNrShouldMatch
355  if ((current.bits & prohibitedMask) == 0 && (current.bits & requiredMask) == requiredMask && current.coord >= minNrShouldMatch)
356  {
357  return doc = current.doc;
358  }
359  }
360 
361  // refill the queue
362  more = false;
363  end += BucketTable.SIZE;
364  for (SubScorer sub = scorers; sub != null; sub = sub.next)
365  {
366  Scorer scorer = sub.scorer;
367  sub.collector.SetScorer(scorer);
368  int doc = scorer.DocID();
369  while (doc < end)
370  {
371  sub.collector.Collect(doc);
372  doc = scorer.NextDoc();
373  }
374  more |= (doc != NO_MORE_DOCS);
375  }
376  }
377  while (bucketTable.first != null || more);
378 
379  return this.doc = NO_MORE_DOCS;
380  }
381 
382  public override float Score()
383  {
384  return current.score * coordFactors[current.coord];
385  }
386 
387  public override void Score(Collector collector)
388  {
389  Score(collector, System.Int32.MaxValue, NextDoc());
390  }
391 
392  public override System.String ToString()
393  {
394  System.Text.StringBuilder buffer = new System.Text.StringBuilder();
395  buffer.Append("boolean(");
396  for (SubScorer sub = scorers; sub != null; sub = sub.next)
397  {
398  buffer.Append(sub.scorer.ToString());
399  buffer.Append(" ");
400  }
401  buffer.Append(")");
402  return buffer.ToString();
403  }
404  }
405 }