Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
BooleanQuery.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections;
20 using Lucene.Net.Index;
21 using Lucene.Net.Support;
22 using IndexReader = Lucene.Net.Index.IndexReader;
23 using ToStringUtils = Lucene.Net.Util.ToStringUtils;
24 using Occur = Lucene.Net.Search.Occur;
25 
26 namespace Lucene.Net.Search
27 {
28 
29  /// <summary>A Query that matches documents matching boolean combinations of other
30  /// queries, e.g. <see cref="TermQuery" />s, <see cref="PhraseQuery" />s or other
31  /// BooleanQuerys.
32  /// </summary>
33  [Serializable]
34  public class BooleanQuery : Query, System.Collections.Generic.IEnumerable<BooleanClause>, System.ICloneable
35  {
36  [Serializable]
37  private class AnonymousClassSimilarityDelegator:SimilarityDelegator
38  {
39  private void InitBlock(BooleanQuery enclosingInstance)
40  {
41  this.enclosingInstance = enclosingInstance;
42  }
43  private BooleanQuery enclosingInstance;
44  public BooleanQuery Enclosing_Instance
45  {
46  get
47  {
48  return enclosingInstance;
49  }
50 
51  }
52  internal AnonymousClassSimilarityDelegator(BooleanQuery enclosingInstance, Lucene.Net.Search.Similarity Param1):base(Param1)
53  {
54  InitBlock(enclosingInstance);
55  }
56  public override float Coord(int overlap, int maxOverlap)
57  {
58  return 1.0f;
59  }
60  }
61 
62  private static int _maxClauses = 1024;
63 
64  /// <summary>Thrown when an attempt is made to add more than <see cref="MaxClauseCount" />
65  /// clauses. This typically happens if
66  /// a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery
67  /// is expanded to many terms during search.
68  /// </summary>
69  [Serializable]
70  public class TooManyClauses:System.SystemException
71  {
72  public override System.String Message
73  {
74  get
75  {
76  return "maxClauseCount is set to " + Lucene.Net.Search.BooleanQuery._maxClauses;
77  }
78 
79  }
80  }
81 
82  /// <summary>Gets or sets the maximum number of clauses permitted, 1024 by default.
83  /// Attempts to add more than the permitted number of clauses cause <see cref="TooManyClauses" />
84  /// to be thrown.
85  /// </summary>
86  public static int MaxClauseCount
87  {
88  get { return _maxClauses; }
89  set
90  {
91  if (value < 1)
92  throw new ArgumentException("maxClauseCount must be >= 1");
93  _maxClauses = value;
94  }
95  }
96 
97  private EquatableList<BooleanClause> clauses = new EquatableList<BooleanClause>();
98  private bool disableCoord;
99 
100  /// <summary>Constructs an empty boolean query. </summary>
101  public BooleanQuery()
102  {
103  }
104 
105  /// <summary>Constructs an empty boolean query.
106  ///
107  /// <see cref="Similarity.Coord(int,int)" /> may be disabled in scoring, as
108  /// appropriate. For example, this score factor does not make sense for most
109  /// automatically generated queries, like <see cref="WildcardQuery" /> and <see cref="FuzzyQuery" />
110  ///.
111  ///
112  /// </summary>
113  /// <param name="disableCoord">disables <see cref="Similarity.Coord(int,int)" /> in scoring.
114  /// </param>
115  public BooleanQuery(bool disableCoord)
116  {
117  this.disableCoord = disableCoord;
118  }
119 
120  /// <summary>Returns true iff <see cref="Similarity.Coord(int,int)" /> is disabled in
121  /// scoring for this query instance.
122  /// </summary>
123  /// <seealso cref="BooleanQuery(bool)">
124  /// </seealso>
125  public virtual bool IsCoordDisabled()
126  {
127  return disableCoord;
128  }
129 
130  // Implement coord disabling.
131  // Inherit javadoc.
132  public override Similarity GetSimilarity(Searcher searcher)
133  {
134  Similarity result = base.GetSimilarity(searcher);
135  if (disableCoord)
136  {
137  // disable coord as requested
138  result = new AnonymousClassSimilarityDelegator(this, result);
139  }
140  return result;
141  }
142 
143  protected internal int minNrShouldMatch = 0;
144 
145  /// <summary>
146  /// Specifies a minimum number of the optional BooleanClauses
147  /// which must be satisfied.
148  /// <para>
149  /// By default no optional clauses are necessary for a match
150  /// (unless there are no required clauses). If this method is used,
151  /// then the specified number of clauses is required.
152  /// </para>
153  /// <para>
154  /// Use of this method is totally independent of specifying that
155  /// any specific clauses are required (or prohibited). This number will
156  /// only be compared against the number of matching optional clauses.
157  /// </para>
158  /// </summary>
159  public virtual int MinimumNumberShouldMatch
160  {
161  set { this.minNrShouldMatch = value; }
162  get { return minNrShouldMatch; }
163  }
164 
165  /// <summary>Adds a clause to a boolean query.
166  ///
167  /// </summary>
168  /// <throws> TooManyClauses if the new number of clauses exceeds the maximum clause number </throws>
169  /// <seealso cref="MaxClauseCount">
170  /// </seealso>
171  public virtual void Add(Query query, Occur occur)
172  {
173  Add(new BooleanClause(query, occur));
174  }
175 
176  /// <summary>Adds a clause to a boolean query.</summary>
177  /// <throws> TooManyClauses if the new number of clauses exceeds the maximum clause number </throws>
178  /// <seealso cref="MaxClauseCount">
179  /// </seealso>
180  public virtual void Add(BooleanClause clause)
181  {
182  if (clauses.Count >= _maxClauses)
183  throw new TooManyClauses();
184 
185  clauses.Add(clause);
186  }
187 
188  /// <summary>Returns the set of clauses in this query. </summary>
189  public virtual BooleanClause[] GetClauses()
190  {
191  return clauses.ToArray();
192  }
193 
194  /// <summary>Returns the list of clauses in this query. </summary>
195  public virtual System.Collections.Generic.List<BooleanClause> Clauses
196  {
197  get { return clauses; }
198  }
199 
200  /// <summary>
201  /// Returns an iterator on the clauses in this query.
202  /// </summary>
203  /// <returns></returns>
204  public System.Collections.Generic.IEnumerator<BooleanClause> GetEnumerator()
205  {
206  return clauses.GetEnumerator();
207  }
208  /// <summary> Expert: the Weight for BooleanQuery, used to
209  /// normalize, score and explain these queries.
210  ///
211  /// <p/>NOTE: this API and implementation is subject to
212  /// change suddenly in the next release.<p/>
213  /// </summary>
214  [Serializable]
215  protected internal class BooleanWeight:Weight
216  {
217  private void InitBlock(BooleanQuery enclosingInstance)
218  {
219  this.enclosingInstance = enclosingInstance;
220  }
221  private BooleanQuery enclosingInstance;
222  public BooleanQuery Enclosing_Instance
223  {
224  get
225  {
226  return enclosingInstance;
227  }
228 
229  }
230  /// <summary>The Similarity implementation. </summary>
231  protected internal Similarity similarity;
232  protected internal System.Collections.Generic.List<Weight> weights;
233 
234  public BooleanWeight(BooleanQuery enclosingInstance, Searcher searcher)
235  {
236  InitBlock(enclosingInstance);
237  this.similarity = Enclosing_Instance.GetSimilarity(searcher);
238  weights = new System.Collections.Generic.List<Weight>(Enclosing_Instance.clauses.Count);
239  for (int i = 0; i < Enclosing_Instance.clauses.Count; i++)
240  {
241  weights.Add(Enclosing_Instance.clauses[i].Query.CreateWeight(searcher));
242  }
243  }
244 
245  public override Query Query
246  {
247  get { return Enclosing_Instance; }
248  }
249 
250  public override float Value
251  {
252  get { return Enclosing_Instance.Boost; }
253  }
254 
255  public override float GetSumOfSquaredWeights()
256  {
257  float sum = 0.0f;
258  for (int i = 0; i < weights.Count; i++)
259  {
260  // call sumOfSquaredWeights for all clauses in case of side effects
261  float s = weights[i].GetSumOfSquaredWeights(); // sum sub weights
262  if (!Enclosing_Instance.clauses[i].IsProhibited)
263  // only add to sum for non-prohibited clauses
264  sum += s;
265  }
266 
267  sum *= Enclosing_Instance.Boost*Enclosing_Instance.Boost; // boost each sub-weight
268 
269  return sum;
270  }
271 
272 
273  public override void Normalize(float norm)
274  {
275  norm *= Enclosing_Instance.Boost; // incorporate boost
276  foreach (Weight w in weights)
277  {
278  // normalize all clauses, (even if prohibited in case of side affects)
279  w.Normalize(norm);
280  }
281  }
282 
283  public override Explanation Explain(IndexReader reader, int doc)
284  {
285  int minShouldMatch = Enclosing_Instance.MinimumNumberShouldMatch;
286  ComplexExplanation sumExpl = new ComplexExplanation();
287  sumExpl.Description = "sum of:";
288  int coord = 0;
289  int maxCoord = 0;
290  float sum = 0.0f;
291  bool fail = false;
292  int shouldMatchCount = 0;
293  System.Collections.Generic.IEnumerator<BooleanClause> cIter = Enclosing_Instance.clauses.GetEnumerator();
294  for (System.Collections.Generic.IEnumerator<Weight> wIter = weights.GetEnumerator(); wIter.MoveNext(); )
295  {
296  cIter.MoveNext();
297  Weight w = wIter.Current;
298  BooleanClause c = cIter.Current;
299  if (w.Scorer(reader, true, true) == null)
300  {
301  continue;
302  }
303  Explanation e = w.Explain(reader, doc);
304  if (!c.IsProhibited)
305  maxCoord++;
306  if (e.IsMatch)
307  {
308  if (!c.IsProhibited)
309  {
310  sumExpl.AddDetail(e);
311  sum += e.Value;
312  coord++;
313  }
314  else
315  {
316  Explanation r = new Explanation(0.0f, "match on prohibited clause (" + c.Query.ToString() + ")");
317  r.AddDetail(e);
318  sumExpl.AddDetail(r);
319  fail = true;
320  }
321  if (c.Occur == Occur.SHOULD)
322  shouldMatchCount++;
323  }
324  else if (c.IsRequired)
325  {
326  Explanation r = new Explanation(0.0f, "no match on required clause (" + c.Query.ToString() + ")");
327  r.AddDetail(e);
328  sumExpl.AddDetail(r);
329  fail = true;
330  }
331  }
332  if (fail)
333  {
334  System.Boolean tempAux = false;
335  sumExpl.Match = tempAux;
336  sumExpl.Value = 0.0f;
337  sumExpl.Description = "Failure to meet condition(s) of required/prohibited clause(s)";
338  return sumExpl;
339  }
340  else if (shouldMatchCount < minShouldMatch)
341  {
342  System.Boolean tempAux2 = false;
343  sumExpl.Match = tempAux2;
344  sumExpl.Value = 0.0f;
345  sumExpl.Description = "Failure to match minimum number " + "of optional clauses: " + minShouldMatch;
346  return sumExpl;
347  }
348 
349  sumExpl.Match = 0 < coord?true:false;
350  sumExpl.Value = sum;
351 
352  float coordFactor = similarity.Coord(coord, maxCoord);
353  if (coordFactor == 1.0f)
354  // coord is no-op
355  return sumExpl;
356  // eliminate wrapper
357  else
358  {
359  ComplexExplanation result = new ComplexExplanation(sumExpl.IsMatch, sum * coordFactor, "product of:");
360  result.AddDetail(sumExpl);
361  result.AddDetail(new Explanation(coordFactor, "coord(" + coord + "/" + maxCoord + ")"));
362  return result;
363  }
364  }
365 
366  public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
367  {
368  var required = new System.Collections.Generic.List<Scorer>();
369  var prohibited = new System.Collections.Generic.List<Scorer>();
370  var optional = new System.Collections.Generic.List<Scorer>();
371 
372  System.Collections.Generic.IEnumerator<BooleanClause> cIter = Enclosing_Instance.clauses.GetEnumerator();
373  foreach (Weight w in weights)
374  {
375  cIter.MoveNext();
376  BooleanClause c = (BooleanClause) cIter.Current;
377  Scorer subScorer = w.Scorer(reader, true, false);
378  if (subScorer == null)
379  {
380  if (c.IsRequired)
381  {
382  return null;
383  }
384  }
385  else if (c.IsRequired)
386  {
387  required.Add(subScorer);
388  }
389  else if (c.IsProhibited)
390  {
391  prohibited.Add(subScorer);
392  }
393  else
394  {
395  optional.Add(subScorer);
396  }
397  }
398 
399  // Check if we can return a BooleanScorer
400  if (!scoreDocsInOrder && topScorer && required.Count == 0 && prohibited.Count < 32)
401  {
402  return new BooleanScorer(similarity, Enclosing_Instance.minNrShouldMatch, optional, prohibited);
403  }
404 
405  if (required.Count == 0 && optional.Count == 0)
406  {
407  // no required and optional clauses.
408  return null;
409  }
410  else if (optional.Count < Enclosing_Instance.minNrShouldMatch)
411  {
412  // either >1 req scorer, or there are 0 req scorers and at least 1
413  // optional scorer. Therefore if there are not enough optional scorers
414  // no documents will be matched by the query
415  return null;
416  }
417 
418  // Return a BooleanScorer2
419  return new BooleanScorer2(similarity, Enclosing_Instance.minNrShouldMatch, required, prohibited, optional);
420  }
421 
422  public override bool GetScoresDocsOutOfOrder()
423  {
424  int numProhibited = 0;
425  foreach (BooleanClause c in Enclosing_Instance.clauses)
426  {
427  if (c.IsRequired)
428  {
429  return false; // BS2 (in-order) will be used by scorer()
430  }
431  else if (c.IsProhibited)
432  {
433  ++numProhibited;
434  }
435  }
436 
437  if (numProhibited > 32)
438  {
439  // cannot use BS
440  return false;
441  }
442 
443  // scorer() will return an out-of-order scorer if requested.
444  return true;
445  }
446  }
447 
448  public override Weight CreateWeight(Searcher searcher)
449  {
450  return new BooleanWeight(this, searcher);
451  }
452 
453  public override Query Rewrite(IndexReader reader)
454  {
455  if (minNrShouldMatch == 0 && clauses.Count == 1)
456  {
457  // optimize 1-clause queries
458  BooleanClause c = clauses[0];
459  if (!c.IsProhibited)
460  {
461  // just return clause
462 
463  Query query = c.Query.Rewrite(reader); // rewrite first
464 
465  if (Boost != 1.0f)
466  {
467  // incorporate boost
468  if (query == c.Query)
469  // if rewrite was no-op
470  query = (Query) query.Clone(); // then clone before boost
471  query.Boost = Boost * query.Boost;
472  }
473 
474  return query;
475  }
476  }
477 
478  BooleanQuery clone = null; // recursively rewrite
479  for (int i = 0; i < clauses.Count; i++)
480  {
481  BooleanClause c = clauses[i];
482  Query query = c.Query.Rewrite(reader);
483  if (query != c.Query)
484  {
485  // clause rewrote: must clone
486  if (clone == null)
487  clone = (BooleanQuery) this.Clone();
488  clone.clauses[i] = new BooleanClause(query, c.Occur);
489  }
490  }
491  if (clone != null)
492  {
493  return clone; // some clauses rewrote
494  }
495  else
496  return this; // no clauses rewrote
497  }
498 
499  // inherit javadoc
500  public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
501  {
502  foreach(BooleanClause clause in clauses)
503  {
504  clause.Query.ExtractTerms(terms);
505  }
506  }
507 
508  public override System.Object Clone()
509  {
510  BooleanQuery clone = (BooleanQuery) base.Clone();
511  clone.clauses = (EquatableList<BooleanClause>) this.clauses.Clone();
512  return clone;
513  }
514 
515  /// <summary>Prints a user-readable version of this query. </summary>
516  public override System.String ToString(System.String field)
517  {
518  System.Text.StringBuilder buffer = new System.Text.StringBuilder();
519  bool needParens = (Boost != 1.0) || (MinimumNumberShouldMatch > 0);
520  if (needParens)
521  {
522  buffer.Append("(");
523  }
524 
525  for (int i = 0; i < clauses.Count; i++)
526  {
527  BooleanClause c = clauses[i];
528  if (c.IsProhibited)
529  buffer.Append("-");
530  else if (c.IsRequired)
531  buffer.Append("+");
532 
533  Query subQuery = c.Query;
534  if (subQuery != null)
535  {
536  if (subQuery is BooleanQuery)
537  {
538  // wrap sub-bools in parens
539  buffer.Append("(");
540  buffer.Append(subQuery.ToString(field));
541  buffer.Append(")");
542  }
543  else
544  {
545  buffer.Append(subQuery.ToString(field));
546  }
547  }
548  else
549  {
550  buffer.Append("null");
551  }
552 
553  if (i != clauses.Count - 1)
554  buffer.Append(" ");
555  }
556 
557  if (needParens)
558  {
559  buffer.Append(")");
560  }
561 
562  if (MinimumNumberShouldMatch > 0)
563  {
564  buffer.Append('~');
565  buffer.Append(MinimumNumberShouldMatch);
566  }
567 
568  if (Boost != 1.0f)
569  {
570  buffer.Append(ToStringUtils.Boost(Boost));
571  }
572 
573  return buffer.ToString();
574  }
575 
576  /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
577  public override bool Equals(System.Object o)
578  {
579  if (!(o is BooleanQuery))
580  return false;
581  BooleanQuery other = (BooleanQuery)o;
582  return (this.Boost == other.Boost)
583  && this.clauses.Equals(other.clauses)
584  && this.MinimumNumberShouldMatch == other.MinimumNumberShouldMatch
585  && this.disableCoord == other.disableCoord;
586  }
587 
588  /// <summary>Returns a hash code value for this object.</summary>
589  public override int GetHashCode()
590  {
591  return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ clauses.GetHashCode() + MinimumNumberShouldMatch + (disableCoord ? 17 : 0);
592  }
593 
594  IEnumerator IEnumerable.GetEnumerator()
595  {
596  return GetEnumerator();
597  }
598  }
599 }