Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
DisjunctionMaxQuery.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Index;
20 using Lucene.Net.Support;
21 using IndexReader = Lucene.Net.Index.IndexReader;
22 
23 namespace Lucene.Net.Search
24 {
25 
26  /// <summary> A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
27  /// score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
28  /// This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be
29  /// combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
30  /// not the sum of the field scores (as BooleanQuery would give).
31  /// If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
32  /// another gets a higher score than "albino" matching both fields.
33  /// To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
34  /// each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
35  /// The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
36  /// include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
37  /// in the multiple fields.
38  /// </summary>
39  [Serializable]
40  public class DisjunctionMaxQuery : Query, System.Collections.Generic.IEnumerable<Query>, System.ICloneable
41  {
42 
43  /* The subqueries */
44  private EquatableList<Query> disjuncts = new EquatableList<Query>();
45 
46  /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
47  private float tieBreakerMultiplier = 0.0f;
48 
49  /// <summary>Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.</summary>
50  /// <param name="tieBreakerMultiplier">the score of each non-maximum disjunct for a document is multiplied by this weight
51  /// and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
52  /// 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
53  /// word in the lower scored field (i.e., one that is not in any higher scored field.
54  /// </param>
55  public DisjunctionMaxQuery(float tieBreakerMultiplier)
56  {
57  this.tieBreakerMultiplier = tieBreakerMultiplier;
58  }
59 
60  /// <summary> Creates a new DisjunctionMaxQuery</summary>
61  /// <param name="disjuncts">a Collection&lt;Query&gt; of all the disjuncts to add
62  /// </param>
63  /// <param name="tieBreakerMultiplier"> the weight to give to each matching non-maximum disjunct
64  /// </param>
65  public DisjunctionMaxQuery(System.Collections.Generic.ICollection<Query> disjuncts, float tieBreakerMultiplier)
66  {
67  this.tieBreakerMultiplier = tieBreakerMultiplier;
68  Add(disjuncts);
69  }
70 
71  /// <summary>Add a subquery to this disjunction</summary>
72  /// <param name="query">the disjunct added
73  /// </param>
74  public virtual void Add(Query query)
75  {
76  disjuncts.Add(query);
77  }
78 
79  /// <summary>Add a collection of disjuncts to this disjunction
80  /// via Iterable
81  /// </summary>
82  public virtual void Add(System.Collections.Generic.ICollection<Query> disjuncts)
83  {
84  this.disjuncts.AddRange(disjuncts);
85  }
86 
87  /// <summary>An Iterator&lt;Query&gt; over the disjuncts </summary>
88  public virtual System.Collections.Generic.IEnumerator<Query> GetEnumerator()
89  {
90  return disjuncts.GetEnumerator();
91  }
92 
93  System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
94  {
95  return GetEnumerator();
96  }
97 
98  /// <summary> Expert: the Weight for DisjunctionMaxQuery, used to
99  /// normalize, score and explain these queries.
100  ///
101  /// <p/>NOTE: this API and implementation is subject to
102  /// change suddenly in the next release.<p/>
103  /// </summary>
104  [Serializable]
105  protected internal class DisjunctionMaxWeight:Weight
106  {
107  private void InitBlock(DisjunctionMaxQuery enclosingInstance)
108  {
109  this.enclosingInstance = enclosingInstance;
110  }
111  private DisjunctionMaxQuery enclosingInstance;
112  public DisjunctionMaxQuery Enclosing_Instance
113  {
114  get
115  {
116  return enclosingInstance;
117  }
118 
119  }
120  /// <summary>The Similarity implementation. </summary>
121  protected internal Similarity similarity;
122 
123  /// <summary>The Weights for our subqueries, in 1-1 correspondence with disjuncts </summary>
124  protected internal System.Collections.Generic.List<Weight> weights = new System.Collections.Generic.List<Weight>(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
125 
126  /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
127  public DisjunctionMaxWeight(DisjunctionMaxQuery enclosingInstance, Searcher searcher)
128  {
129  InitBlock(enclosingInstance);
130  this.similarity = searcher.Similarity;
131  foreach(Query disjunctQuery in enclosingInstance.disjuncts)
132  {
133  weights.Add(disjunctQuery.CreateWeight(searcher));
134  }
135  }
136 
137  /* Return our associated DisjunctionMaxQuery */
138 
139  public override Query Query
140  {
141  get { return Enclosing_Instance; }
142  }
143 
144  /* Return our boost */
145 
146  public override float Value
147  {
148  get { return Enclosing_Instance.Boost; }
149  }
150 
151  /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
152 
153  public override float GetSumOfSquaredWeights()
154  {
155  float max = 0.0f, sum = 0.0f;
156  foreach (Weight currentWeight in weights)
157  {
158  float sub = currentWeight.GetSumOfSquaredWeights();
159  sum += sub;
160  max = System.Math.Max(max, sub);
161  }
162  float boost = Enclosing_Instance.Boost;
163  return (((sum - max) * Enclosing_Instance.tieBreakerMultiplier * Enclosing_Instance.tieBreakerMultiplier) + max) *
164  boost * boost;
165  }
166 
167  /* Apply the computed normalization factor to our subqueries */
168  public override void Normalize(float norm)
169  {
170  norm *= Enclosing_Instance.Boost; // Incorporate our boost
171  foreach(Weight wt in weights)
172  {
173  wt.Normalize(norm);
174  }
175  }
176 
177  /* Create the scorer used to score our associated DisjunctionMaxQuery */
178  public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
179  {
180  Scorer[] scorers = new Scorer[weights.Count];
181  int idx = 0;
182  foreach(Weight w in weights)
183  {
184  Scorer subScorer = w.Scorer(reader, true, false);
185  if (subScorer != null && subScorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
186  {
187  scorers[idx++] = subScorer;
188  }
189  }
190  if (idx == 0)
191  return null; // all scorers did not have documents
192  DisjunctionMaxScorer result = new DisjunctionMaxScorer(Enclosing_Instance.tieBreakerMultiplier, similarity, scorers, idx);
193  return result;
194  }
195 
196  /* Explain the score we computed for doc */
197  public override Explanation Explain(IndexReader reader, int doc)
198  {
199  if (Enclosing_Instance.disjuncts.Count == 1)
200  return weights[0].Explain(reader, doc);
201  ComplexExplanation result = new ComplexExplanation();
202  float max = 0.0f, sum = 0.0f;
203  result.Description = Enclosing_Instance.tieBreakerMultiplier == 0.0f?"max of:":"max plus " + Enclosing_Instance.tieBreakerMultiplier + " times others of:";
204  foreach(Weight wt in weights)
205  {
206  Explanation e = wt.Explain(reader, doc);
207  if (e.IsMatch)
208  {
209  System.Boolean tempAux = true;
210  result.Match = tempAux;
211  result.AddDetail(e);
212  sum += e.Value;
213  max = System.Math.Max(max, e.Value);
214  }
215  }
216  result.Value = max + (sum - max) * Enclosing_Instance.tieBreakerMultiplier;
217  return result;
218  }
219  } // end of DisjunctionMaxWeight inner class
220 
221  /* Create the Weight used to score us */
222  public override Weight CreateWeight(Searcher searcher)
223  {
224  return new DisjunctionMaxWeight(this, searcher);
225  }
226 
227  /// <summary>Optimize our representation and our subqueries representations</summary>
228  /// <param name="reader">the IndexReader we query
229  /// </param>
230  /// <returns> an optimized copy of us (which may not be a copy if there is nothing to optimize)
231  /// </returns>
232  public override Query Rewrite(IndexReader reader)
233  {
234  int numDisjunctions = disjuncts.Count;
235  if (numDisjunctions == 1)
236  {
237  Query singleton = disjuncts[0];
238  Query result = singleton.Rewrite(reader);
239  if (Boost != 1.0f)
240  {
241  if (result == singleton)
242  result = (Query) result.Clone();
243  result.Boost = Boost * result.Boost;
244  }
245  return result;
246  }
247  DisjunctionMaxQuery clone = null;
248  for (int i = 0; i < numDisjunctions; i++)
249  {
250  Query clause = disjuncts[i];
251  Query rewrite = clause.Rewrite(reader);
252  if (rewrite != clause)
253  {
254  if (clone == null)
255  clone = (DisjunctionMaxQuery) this.Clone();
256  clone.disjuncts[i] = rewrite;
257  }
258  }
259  if (clone != null)
260  return clone;
261  else
262  return this;
263  }
264 
265  /// <summary>Create a shallow copy of us -- used in rewriting if necessary</summary>
266  /// <returns> a copy of us (but reuse, don't copy, our subqueries)
267  /// </returns>
268  public override System.Object Clone()
269  {
271  clone.disjuncts = (EquatableList<Query>) this.disjuncts.Clone();
272  return clone;
273  }
274 
275  // inherit javadoc
276  public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
277  {
278  foreach(Query query in disjuncts)
279  {
280  query.ExtractTerms(terms);
281  }
282  }
283 
284  /// <summary>Prettyprint us.</summary>
285  /// <param name="field">the field to which we are applied
286  /// </param>
287  /// <returns> a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
288  /// </returns>
289  public override System.String ToString(System.String field)
290  {
291  System.Text.StringBuilder buffer = new System.Text.StringBuilder();
292  buffer.Append("(");
293  int numDisjunctions = disjuncts.Count;
294  for (int i = 0; i < numDisjunctions; i++)
295  {
296  Query subquery = disjuncts[i];
297  if (subquery is BooleanQuery)
298  {
299  // wrap sub-bools in parens
300  buffer.Append("(");
301  buffer.Append(subquery.ToString(field));
302  buffer.Append(")");
303  }
304  else
305  buffer.Append(subquery.ToString(field));
306  if (i != numDisjunctions - 1)
307  buffer.Append(" | ");
308  }
309  buffer.Append(")");
310  if (tieBreakerMultiplier != 0.0f)
311  {
312  buffer.Append("~");
313  buffer.Append(tieBreakerMultiplier);
314  }
315  if (Boost != 1.0)
316  {
317  buffer.Append("^");
318  buffer.Append(Boost);
319  }
320  return buffer.ToString();
321  }
322 
323  /// <summary>Return true iff we represent the same query as o</summary>
324  /// <param name="o">another object
325  /// </param>
326  /// <returns> true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
327  /// </returns>
328  public override bool Equals(System.Object o)
329  {
330  if (!(o is DisjunctionMaxQuery))
331  return false;
332  DisjunctionMaxQuery other = (DisjunctionMaxQuery) o;
333  return this.Boost == other.Boost && this.tieBreakerMultiplier == other.tieBreakerMultiplier && this.disjuncts.Equals(other.disjuncts);
334  }
335 
336  /// <summary>Compute a hash code for hashing us</summary>
337  /// <returns> the hash code
338  /// </returns>
339  public override int GetHashCode()
340  {
341  return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) + BitConverter.ToInt32(BitConverter.GetBytes(tieBreakerMultiplier), 0) + disjuncts.GetHashCode();
342  }
343  }
344 }