Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
MultiPhraseQuery.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 using IndexReader = Lucene.Net.Index.IndexReader;
21 using MultipleTermPositions = Lucene.Net.Index.MultipleTermPositions;
22 using Term = Lucene.Net.Index.Term;
23 using TermPositions = Lucene.Net.Index.TermPositions;
24 using ToStringUtils = Lucene.Net.Util.ToStringUtils;
25 
26 namespace Lucene.Net.Search
27 {
28 
29  /// <summary> MultiPhraseQuery is a generalized version of PhraseQuery, with an added
30  /// method <see cref="Add(Term[])" />.
31  /// To use this class, to search for the phrase "Microsoft app*" first use
32  /// add(Term) on the term "Microsoft", then find all terms that have "app" as
33  /// prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
34  /// terms) to add them to the query.
35  ///
36  /// </summary>
37  /// <version> 1.0
38  /// </version>
39  [Serializable]
40  public class MultiPhraseQuery:Query
41  {
42  private System.String field;
43  private System.Collections.Generic.List<Term[]> termArrays = new System.Collections.Generic.List<Term[]>();
44  private System.Collections.Generic.List<int> positions = new System.Collections.Generic.List<int>();
45 
46  private int slop = 0;
47 
48  /// <summary>Gets or sets the phrase slop for this query.</summary>
49  /// <seealso cref="PhraseQuery.Slop">
50  /// </seealso>
51  public virtual int Slop
52  {
53  get { return slop; }
54  set { slop = value; }
55  }
56 
57  /// <summary>Add a single term at the next position in the phrase.</summary>
58  /// <seealso cref="PhraseQuery.Add(Term)">
59  /// </seealso>
60  public virtual void Add(Term term)
61  {
62  Add(new Term[]{term});
63  }
64 
65  /// <summary>Add multiple terms at the next position in the phrase. Any of the terms
66  /// may match.
67  ///
68  /// </summary>
69  /// <seealso cref="PhraseQuery.Add(Term)">
70  /// </seealso>
71  public virtual void Add(Term[] terms)
72  {
73  int position = 0;
74  if (positions.Count > 0)
75  position = positions[positions.Count - 1] + 1;
76 
77  Add(terms, position);
78  }
79 
80  /// <summary> Allows to specify the relative position of terms within the phrase.
81  ///
82  /// </summary>
83  /// <seealso cref="PhraseQuery.Add(Term, int)">
84  /// </seealso>
85  /// <param name="terms">
86  /// </param>
87  /// <param name="position">
88  /// </param>
89  public virtual void Add(Term[] terms, int position)
90  {
91  if (termArrays.Count == 0)
92  field = terms[0].Field;
93 
94  for (int i = 0; i < terms.Length; i++)
95  {
96  if ((System.Object) terms[i].Field != (System.Object) field)
97  {
98  throw new System.ArgumentException("All phrase terms must be in the same field (" + field + "): " + terms[i]);
99  }
100  }
101 
102  termArrays.Add(terms);
103  positions.Add(position);
104  }
105 
106  /// <summary> Returns a List&lt;Term[]&gt; of the terms in the multiphrase.
107  /// Do not modify the List or its contents.
108  /// </summary>
109  [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
110  public virtual System.Collections.Generic.IList<Term[]> GetTermArrays()
111  {
112  return termArrays.AsReadOnly();
113  }
114 
115  /// <summary> Returns the relative positions of terms in this phrase.</summary>
116  public virtual int[] GetPositions()
117  {
118  int[] result = new int[positions.Count];
119  for (int i = 0; i < positions.Count; i++)
120  result[i] = positions[i];
121  return result;
122  }
123 
124  // inherit javadoc
125  public override void ExtractTerms(System.Collections.Generic.ISet<Term> terms)
126  {
127  foreach(Term[] arr in termArrays)
128  {
129  terms.UnionWith(arr);
130  }
131  }
132 
133 
134  [Serializable]
135  private class MultiPhraseWeight:Weight
136  {
137  private void InitBlock(MultiPhraseQuery enclosingInstance)
138  {
139  this.enclosingInstance = enclosingInstance;
140  }
141  private MultiPhraseQuery enclosingInstance;
142  public MultiPhraseQuery Enclosing_Instance
143  {
144  get
145  {
146  return enclosingInstance;
147  }
148 
149  }
150  private Similarity similarity;
151  private float value_Renamed;
152  private float idf;
153  private float queryNorm;
154  private float queryWeight;
155 
156  public MultiPhraseWeight(MultiPhraseQuery enclosingInstance, Searcher searcher)
157  {
158  InitBlock(enclosingInstance);
159  this.similarity = Enclosing_Instance.GetSimilarity(searcher);
160 
161  // compute idf
162  int maxDoc = searcher.MaxDoc;
163  foreach (Term[] terms in enclosingInstance.termArrays)
164  {
165  foreach (Term term in terms)
166  {
167  idf += similarity.Idf(searcher.DocFreq(term), maxDoc);
168  }
169  }
170  }
171 
172  public override Query Query
173  {
174  get { return Enclosing_Instance; }
175  }
176 
177  public override float Value
178  {
179  get { return value_Renamed; }
180  }
181 
182  public override float GetSumOfSquaredWeights()
183  {
184  queryWeight = idf*Enclosing_Instance.Boost; // compute query weight
185  return queryWeight*queryWeight; // square it
186  }
187 
188  public override void Normalize(float queryNorm)
189  {
190  this.queryNorm = queryNorm;
191  queryWeight *= queryNorm; // normalize query weight
192  value_Renamed = queryWeight * idf; // idf for document
193  }
194 
195  public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
196  {
197  if (Enclosing_Instance.termArrays.Count == 0)
198  // optimize zero-term case
199  return null;
200 
201  TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count];
202  for (int i = 0; i < tps.Length; i++)
203  {
204  Term[] terms = Enclosing_Instance.termArrays[i];
205 
206  TermPositions p;
207  if (terms.Length > 1)
208  p = new MultipleTermPositions(reader, terms);
209  else
210  p = reader.TermPositions(terms[0]);
211 
212  if (p == null)
213  return null;
214 
215  tps[i] = p;
216  }
217 
218  if (Enclosing_Instance.slop == 0)
219  return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
220  else
221  return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
222  }
223 
224  public override Explanation Explain(IndexReader reader, int doc)
225  {
226  ComplexExplanation result = new ComplexExplanation();
227  result.Description = "weight(" + Query + " in " + doc + "), product of:";
228 
229  Explanation idfExpl = new Explanation(idf, "idf(" + Query + ")");
230 
231  // explain query weight
232  Explanation queryExpl = new Explanation();
233  queryExpl.Description = "queryWeight(" + Query + "), product of:";
234 
235  Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");
236  if (Enclosing_Instance.Boost != 1.0f)
237  queryExpl.AddDetail(boostExpl);
238 
239  queryExpl.AddDetail(idfExpl);
240 
241  Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
242  queryExpl.AddDetail(queryNormExpl);
243 
244  queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value;
245 
246  result.AddDetail(queryExpl);
247 
248  // explain field weight
249  ComplexExplanation fieldExpl = new ComplexExplanation();
250  fieldExpl.Description = "fieldWeight(" + Query + " in " + doc + "), product of:";
251 
252  PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false);
253  if (scorer == null)
254  {
255  return new Explanation(0.0f, "no matching docs");
256  }
257  Explanation tfExplanation = new Explanation();
258  int d = scorer.Advance(doc);
259  float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f;
260  tfExplanation.Value = similarity.Tf(phraseFreq);
261  tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")";
262  fieldExpl.AddDetail(tfExplanation);
263  fieldExpl.AddDetail(idfExpl);
264 
265  Explanation fieldNormExpl = new Explanation();
266  byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
267  float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f;
268  fieldNormExpl.Value = fieldNorm;
269  fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")";
270  fieldExpl.AddDetail(fieldNormExpl);
271 
272  fieldExpl.Match = tfExplanation.IsMatch;
273  fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value;
274 
275  result.AddDetail(fieldExpl);
276  System.Boolean? tempAux = fieldExpl.Match;
277  result.Match = tempAux;
278 
279  // combine them
280  result.Value = queryExpl.Value * fieldExpl.Value;
281 
282  if (queryExpl.Value == 1.0f)
283  return fieldExpl;
284 
285  return result;
286  }
287  }
288 
289  public override Query Rewrite(IndexReader reader)
290  {
291  if (termArrays.Count == 1)
292  {
293  // optimize one-term case
294  Term[] terms = termArrays[0];
295  BooleanQuery boq = new BooleanQuery(true);
296  for (int i = 0; i < terms.Length; i++)
297  {
298  boq.Add(new TermQuery(terms[i]), Occur.SHOULD);
299  }
300  boq.Boost = Boost;
301  return boq;
302  }
303  else
304  {
305  return this;
306  }
307  }
308 
309  public override Weight CreateWeight(Searcher searcher)
310  {
311  return new MultiPhraseWeight(this, searcher);
312  }
313 
314  /// <summary>Prints a user-readable version of this query. </summary>
315  public override System.String ToString(System.String f)
316  {
317  System.Text.StringBuilder buffer = new System.Text.StringBuilder();
318  if (!field.Equals(f))
319  {
320  buffer.Append(field);
321  buffer.Append(":");
322  }
323 
324  buffer.Append("\"");
325  System.Collections.Generic.IEnumerator<Term[]> i = termArrays.GetEnumerator();
326  bool first = true;
327  while (i.MoveNext())
328  {
329  if (first)
330  {
331  first = false;
332  }
333  else
334  {
335  buffer.Append(" ");
336  }
337 
338  Term[] terms = i.Current;
339  if (terms.Length > 1)
340  {
341  buffer.Append("(");
342  for (int j = 0; j < terms.Length; j++)
343  {
344  buffer.Append(terms[j].Text);
345  if (j < terms.Length - 1)
346  buffer.Append(" ");
347  }
348  buffer.Append(")");
349  }
350  else
351  {
352  buffer.Append(terms[0].Text);
353  }
354  }
355  buffer.Append("\"");
356 
357  if (slop != 0)
358  {
359  buffer.Append("~");
360  buffer.Append(slop);
361  }
362 
363  buffer.Append(ToStringUtils.Boost(Boost));
364 
365  return buffer.ToString();
366  }
367 
368 
369  /// <summary>Returns true if <c>o</c> is equal to this. </summary>
370  public override bool Equals(System.Object o)
371  {
372  if (!(o is MultiPhraseQuery))
373  return false;
374  MultiPhraseQuery other = (MultiPhraseQuery) o;
375  bool eq = this.Boost == other.Boost && this.slop == other.slop;
376  if(!eq)
377  {
378  return false;
379  }
380  eq = this.termArrays.Count.Equals(other.termArrays.Count);
381  if (!eq)
382  {
383  return false;
384  }
385 
386  for (int i = 0; i < this.termArrays.Count; i++)
387  {
388  if (!Compare.CompareTermArrays((Term[])this.termArrays[i], (Term[])other.termArrays[i]))
389  {
390  return false;
391  }
392  }
393  if(!eq)
394  {
395  return false;
396  }
397  eq = this.positions.Count.Equals(other.positions.Count);
398  if (!eq)
399  {
400  return false;
401  }
402  for (int i = 0; i < this.positions.Count; i++)
403  {
404  if (!((int)this.positions[i] == (int)other.positions[i]))
405  {
406  return false;
407  }
408  }
409  return true;
410  }
411 
412  /// <summary>Returns a hash code value for this object.</summary>
413  public override int GetHashCode()
414  {
415  int posHash = 0;
416  foreach(int pos in positions)
417  {
418  posHash += pos.GetHashCode();
419  }
420  return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ slop ^ TermArraysHashCode() ^ posHash ^ 0x4AC65113;
421  }
422 
423  // Breakout calculation of the termArrays hashcode
424  private int TermArraysHashCode()
425  {
426  int hashCode = 1;
427  foreach(Term[] termArray in termArrays)
428  {
429  // Java uses Arrays.hashCode(termArray)
430  hashCode = 31*hashCode + (termArray == null ? 0 : ArraysHashCode(termArray));
431  }
432  return hashCode;
433  }
434 
435  private int ArraysHashCode(Term[] termArray)
436  {
437  if (termArray == null)
438  return 0;
439 
440  int result = 1;
441 
442  for (int i = 0; i < termArray.Length; i++)
443  {
444  Term term = termArray[i];
445  result = 31 * result + (term == null?0:term.GetHashCode());
446  }
447 
448  return result;
449  }
450 
451  // Breakout calculation of the termArrays equals
452  private bool TermArraysEquals(System.Collections.Generic.List<Term[]> termArrays1, System.Collections.Generic.List<Term[]> termArrays2)
453  {
454  if (termArrays1.Count != termArrays2.Count)
455  {
456  return false;
457  }
458  var iterator1 = termArrays1.GetEnumerator();
459  var iterator2 = termArrays2.GetEnumerator();
460  while (iterator1.MoveNext())
461  {
462  Term[] termArray1 = iterator1.Current;
463  Term[] termArray2 = iterator2.Current;
464  if (!(termArray1 == null ? termArray2 == null : TermEquals(termArray1, termArray2)))
465  {
466  return false;
467  }
468  }
469  return true;
470  }
471 
472  public static bool TermEquals(System.Array array1, System.Array array2)
473  {
474  bool result = false;
475  if ((array1 == null) && (array2 == null))
476  result = true;
477  else if ((array1 != null) && (array2 != null))
478  {
479  if (array1.Length == array2.Length)
480  {
481  int length = array1.Length;
482  result = true;
483  for (int index = 0; index < length; index++)
484  {
485  if (!(array1.GetValue(index).Equals(array2.GetValue(index))))
486  {
487  result = false;
488  break;
489  }
490  }
491  }
492  }
493  return result;
494  }
495  }
496 }