Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
PhraseQuery.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Support;
20 using IndexReader = Lucene.Net.Index.IndexReader;
21 using Term = Lucene.Net.Index.Term;
22 using TermPositions = Lucene.Net.Index.TermPositions;
23 using ToStringUtils = Lucene.Net.Util.ToStringUtils;
24 using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation;
25 
26 namespace Lucene.Net.Search
27 {
28 
29  /// <summary>A Query that matches documents containing a particular sequence of terms.
30  /// A PhraseQuery is built by QueryParser for input like <c>"new york"</c>.
31  ///
32  /// <p/>This query may be combined with other terms or queries with a <see cref="BooleanQuery" />.
33  /// </summary>
34  [Serializable]
35  public class PhraseQuery:Query
36  {
37  private System.String field;
38  private EquatableList<Term> terms = new EquatableList<Term>(4);
39  private EquatableList<int> positions = new EquatableList<int>(4);
40  private int maxPosition = 0;
41  private int slop = 0;
42 
43  /// <summary>Constructs an empty phrase query. </summary>
44  public PhraseQuery()
45  {
46  }
47 
48  /// <summary>Sets the number of other words permitted between words in query phrase.
49  /// If zero, then this is an exact phrase search. For larger values this works
50  /// like a <c>WITHIN</c> or <c>NEAR</c> operator.
51  /// <p/>The slop is in fact an edit-distance, where the units correspond to
52  /// moves of terms in the query phrase out of position. For example, to switch
53  /// the order of two words requires two moves (the first move places the words
54  /// atop one another), so to permit re-orderings of phrases, the slop must be
55  /// at least two.
56  /// <p/>More exact matches are scored higher than sloppier matches, thus search
57  /// results are sorted by exactness.
58  /// <p/>The slop is zero by default, requiring exact matches.
59  /// </summary>
60  public virtual int Slop
61  {
62  get { return slop; }
63  set { slop = value; }
64  }
65 
66  /// <summary> Adds a term to the end of the query phrase.
67  /// The relative position of the term is the one immediately after the last term added.
68  /// </summary>
69  public virtual void Add(Term term)
70  {
71  int position = 0;
72  if (positions.Count > 0)
73  position = positions[positions.Count - 1] + 1;
74 
75  Add(term, position);
76  }
77 
78  /// <summary> Adds a term to the end of the query phrase.
79  /// The relative position of the term within the phrase is specified explicitly.
80  /// This allows e.g. phrases with more than one term at the same position
81  /// or phrases with gaps (e.g. in connection with stopwords).
82  ///
83  /// </summary>
84  /// <param name="term">
85  /// </param>
86  /// <param name="position">
87  /// </param>
88  public virtual void Add(Term term, int position)
89  {
90  if (terms.Count == 0)
91  field = term.Field;
92  else if ((System.Object) term.Field != (System.Object) field)
93  {
94  throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
95  }
96 
97  terms.Add(term);
98  positions.Add(position);
99  if (position > maxPosition)
100  maxPosition = position;
101  }
102 
103  /// <summary>Returns the set of terms in this phrase. </summary>
104  public virtual Term[] GetTerms()
105  {
106  return terms.ToArray();
107  }
108 
109  /// <summary> Returns the relative positions of terms in this phrase.</summary>
110  public virtual int[] GetPositions()
111  {
112  int[] result = new int[positions.Count];
113  for (int i = 0; i < positions.Count; i++)
114  result[i] = positions[i];
115  return result;
116  }
117 
118  [Serializable]
119  private class PhraseWeight:Weight
120  {
121  private void InitBlock(PhraseQuery enclosingInstance)
122  {
123  this.enclosingInstance = enclosingInstance;
124  }
125  private PhraseQuery enclosingInstance;
126  public PhraseQuery Enclosing_Instance
127  {
128  get
129  {
130  return enclosingInstance;
131  }
132 
133  }
134  private Similarity similarity;
135  private float value_Renamed;
136  private float idf;
137  private float queryNorm;
138  private float queryWeight;
139  private IDFExplanation idfExp;
140 
141  public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher)
142  {
143  InitBlock(enclosingInstance);
144  this.similarity = Enclosing_Instance.GetSimilarity(searcher);
145 
146  idfExp = similarity.IdfExplain(Enclosing_Instance.terms, searcher);
147  idf = idfExp.Idf;
148  }
149 
150  public override System.String ToString()
151  {
152  return "weight(" + Enclosing_Instance + ")";
153  }
154 
155  public override Query Query
156  {
157  get { return Enclosing_Instance; }
158  }
159 
160  public override float Value
161  {
162  get { return value_Renamed; }
163  }
164 
165  public override float GetSumOfSquaredWeights()
166  {
167  queryWeight = idf*Enclosing_Instance.Boost; // compute query weight
168  return queryWeight*queryWeight; // square it
169  }
170 
171  public override void Normalize(float queryNorm)
172  {
173  this.queryNorm = queryNorm;
174  queryWeight *= queryNorm; // normalize query weight
175  value_Renamed = queryWeight * idf; // idf for document
176  }
177 
178  public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
179  {
180  if (Enclosing_Instance.terms.Count == 0)
181  // optimize zero-term case
182  return null;
183 
184  TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];
185  for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
186  {
187  TermPositions p = reader.TermPositions(Enclosing_Instance.terms[i]);
188  if (p == null)
189  return null;
190  tps[i] = p;
191  }
192 
193  if (Enclosing_Instance.slop == 0)
194  // optimize exact case
195  return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
196  else
197  return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
198  }
199 
200  public override Explanation Explain(IndexReader reader, int doc)
201  {
202 
203  Explanation result = new Explanation();
204  result.Description = "weight(" + Query + " in " + doc + "), product of:";
205 
206  System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
207  System.Text.StringBuilder query = new System.Text.StringBuilder();
208  query.Append('\"');
209  docFreqs.Append(idfExp.Explain());
210  for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
211  {
212  if (i != 0)
213  {
214  query.Append(" ");
215  }
216 
217  Term term = Enclosing_Instance.terms[i];
218 
219  query.Append(term.Text);
220  }
221  query.Append('\"');
222 
223  Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ":" + docFreqs + ")");
224 
225  // explain query weight
226  Explanation queryExpl = new Explanation();
227  queryExpl.Description = "queryWeight(" + Query + "), product of:";
228 
229  Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");
230  if (Enclosing_Instance.Boost != 1.0f)
231  queryExpl.AddDetail(boostExpl);
232  queryExpl.AddDetail(idfExpl);
233 
234  Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
235  queryExpl.AddDetail(queryNormExpl);
236 
237  queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value;
238 
239  result.AddDetail(queryExpl);
240 
241  // explain field weight
242  Explanation fieldExpl = new Explanation();
243  fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:";
244 
245  PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false);
246  if (scorer == null)
247  {
248  return new Explanation(0.0f, "no matching docs");
249  }
250  Explanation tfExplanation = new Explanation();
251  int d = scorer.Advance(doc);
252  float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f;
253  tfExplanation.Value = similarity.Tf(phraseFreq);
254  tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")";
255 
256  fieldExpl.AddDetail(tfExplanation);
257  fieldExpl.AddDetail(idfExpl);
258 
259  Explanation fieldNormExpl = new Explanation();
260  byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
261  float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f;
262  fieldNormExpl.Value = fieldNorm;
263  fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")";
264  fieldExpl.AddDetail(fieldNormExpl);
265 
266  fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value;
267 
268  result.AddDetail(fieldExpl);
269 
270  // combine them
271  result.Value = queryExpl.Value * fieldExpl.Value;
272 
273  if (queryExpl.Value == 1.0f)
274  return fieldExpl;
275 
276  return result;
277  }
278  }
279 
280  public override Weight CreateWeight(Searcher searcher)
281  {
282  if (terms.Count == 1)
283  {
284  // optimize one-term case
285  Term term = terms[0];
286  Query termQuery = new TermQuery(term);
287  termQuery.Boost = Boost;
288  return termQuery.CreateWeight(searcher);
289  }
290  return new PhraseWeight(this, searcher);
291  }
292 
293  /// <seealso cref="Lucene.Net.Search.Query.ExtractTerms(System.Collections.Generic.ISet{Term})">
294  /// </seealso>
295  public override void ExtractTerms(System.Collections.Generic.ISet<Term> queryTerms)
296  {
297  queryTerms.UnionWith(terms);
298  }
299 
300  /// <summary>Prints a user-readable version of this query. </summary>
301  public override System.String ToString(System.String f)
302  {
303  System.Text.StringBuilder buffer = new System.Text.StringBuilder();
304  if (field != null && !field.Equals(f))
305  {
306  buffer.Append(field);
307  buffer.Append(":");
308  }
309 
310  buffer.Append("\"");
311  System.String[] pieces = new System.String[maxPosition + 1];
312  for (int i = 0; i < terms.Count; i++)
313  {
314  int pos = positions[i];
315  System.String s = pieces[pos];
316  if (s == null)
317  {
318  s = terms[i].Text;
319  }
320  else
321  {
322  s = s + "|" + terms[i].Text;
323  }
324  pieces[pos] = s;
325  }
326  for (int i = 0; i < pieces.Length; i++)
327  {
328  if (i > 0)
329  {
330  buffer.Append(' ');
331  }
332  System.String s = pieces[i];
333  if (s == null)
334  {
335  buffer.Append('?');
336  }
337  else
338  {
339  buffer.Append(s);
340  }
341  }
342  buffer.Append("\"");
343 
344  if (slop != 0)
345  {
346  buffer.Append("~");
347  buffer.Append(slop);
348  }
349 
350  buffer.Append(ToStringUtils.Boost(Boost));
351 
352  return buffer.ToString();
353  }
354 
355  /// <summary>Returns true iff <c>o</c> is equal to this. </summary>
356  public override bool Equals(System.Object o)
357  {
358  if (!(o is PhraseQuery))
359  return false;
360  PhraseQuery other = (PhraseQuery) o;
361  return (this.Boost == other.Boost) && (this.slop == other.slop) && this.terms.Equals(other.terms) && this.positions.Equals(other.positions);
362  }
363 
364  /// <summary>Returns a hash code value for this object.</summary>
365  public override int GetHashCode()
366  {
367  return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ slop ^ terms.GetHashCode() ^ positions.GetHashCode();
368  }
369  }
370 }