Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
PhraseScorer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 
20 using TermPositions = Lucene.Net.Index.TermPositions;
21 
22 namespace Lucene.Net.Search
23 {
24 
25  /// <summary>Expert: Scoring functionality for phrase queries.
26  /// <br/>A document is considered matching if it contains the phrase-query terms
27  /// at "valid" positons. What "valid positions" are
28  /// depends on the type of the phrase query: for an exact phrase query terms are required
29  /// to appear in adjacent locations, while for a sloppy phrase query some distance between
30  /// the terms is allowed. The abstract method <see cref="PhraseFreq()" /> of extending classes
31  /// is invoked for each document containing all the phrase query terms, in order to
32  /// compute the frequency of the phrase query in that document. A non zero frequency
33  /// means a match.
34  /// </summary>
35  abstract class PhraseScorer:Scorer
36  {
37  private Weight weight;
38  protected internal byte[] norms;
39  protected internal float value_Renamed;
40 
41  private bool firstTime = true;
42  private bool more = true;
43  protected internal PhraseQueue pq;
44  protected internal PhrasePositions first, last;
45 
46  private float freq; //prhase frequency in current doc as computed by phraseFreq().
47 
48  internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity)
49  {
50  this.norms = norms;
51  this.weight = weight;
52  this.value_Renamed = weight.Value;
53 
54  // convert tps to a list of phrase positions.
55  // note: phrase-position differs from term-position in that its position
56  // reflects the phrase offset: pp.pos = tp.pos - offset.
57  // this allows to easily identify a matching (exact) phrase
58  // when all PhrasePositions have exactly the same position.
59  for (int i = 0; i < tps.Length; i++)
60  {
61  PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
62  if (last != null)
63  {
64  // add next to end of list
65  last.next = pp;
66  }
67  else
68  {
69  first = pp;
70  }
71  last = pp;
72  }
73 
74  pq = new PhraseQueue(tps.Length); // construct empty pq
75  first.doc = - 1;
76  }
77 
78  public override int DocID()
79  {
80  return first.doc;
81  }
82 
83  public override int NextDoc()
84  {
85  if (firstTime)
86  {
87  Init();
88  firstTime = false;
89  }
90  else if (more)
91  {
92  more = last.Next(); // trigger further scanning
93  }
94  if (!DoNext())
95  {
96  first.doc = NO_MORE_DOCS;
97  }
98  return first.doc;
99  }
100 
101  // next without initial increment
102  private bool DoNext()
103  {
104  while (more)
105  {
106  while (more && first.doc < last.doc)
107  {
108  // find doc w/ all the terms
109  more = first.SkipTo(last.doc); // skip first upto last
110  FirstToLast(); // and move it to the end
111  }
112 
113  if (more)
114  {
115  // found a doc with all of the terms
116  freq = PhraseFreq(); // check for phrase
117  if (freq == 0.0f)
118  // no match
119  more = last.Next();
120  // trigger further scanning
121  else
122  return true; // found a match
123  }
124  }
125  return false; // no more matches
126  }
127 
128  public override float Score()
129  {
130  //System.out.println("scoring " + first.doc);
131  float raw = Similarity.Tf(freq) * value_Renamed; // raw score
132  return norms == null?raw:raw * Similarity.DecodeNorm(norms[first.doc]); // normalize
133  }
134 
135  public override int Advance(int target)
136  {
137  firstTime = false;
138  for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
139  {
140  more = pp.SkipTo(target);
141  }
142  if (more)
143  {
144  Sort(); // re-sort
145  }
146  if (!DoNext())
147  {
148  first.doc = NO_MORE_DOCS;
149  }
150  return first.doc;
151  }
152 
153  /// <summary>
154  /// Phrase frequency in current doc as computed by PhraseFreq()
155  /// </summary>
156  /// <returns></returns>
157  public float CurrentFreq()
158  {
159  return freq;
160  }
161 
162  /// <summary> For a document containing all the phrase query terms, compute the
163  /// frequency of the phrase in that document.
164  /// A non zero frequency means a match.
165  /// <br/>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
166  /// </summary>
167  /// <returns> frequency of the phrase in current doc, 0 if not found.
168  /// </returns>
169  protected internal abstract float PhraseFreq();
170 
171  private void Init()
172  {
173  for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
174  {
175  more = pp.Next();
176  }
177  if (more)
178  {
179  Sort();
180  }
181  }
182 
183  private void Sort()
184  {
185  pq.Clear();
186  for (PhrasePositions pp = first; pp != null; pp = pp.next)
187  {
188  pq.Add(pp);
189  }
190  PqToList();
191  }
192 
193  protected internal void PqToList()
194  {
195  last = first = null;
196  while (pq.Top() != null)
197  {
198  PhrasePositions pp = pq.Pop();
199  if (last != null)
200  {
201  // add next to end of list
202  last.next = pp;
203  }
204  else
205  first = pp;
206  last = pp;
207  pp.next = null;
208  }
209  }
210 
211  protected internal void FirstToLast()
212  {
213  last.next = first; // move first to end of list
214  last = first;
215  first = first.next;
216  last.next = null;
217  }
218 
219  public override System.String ToString()
220  {
221  return "scorer(" + weight + ")";
222  }
223  }
224 }