Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
QueryScorer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using Lucene.Net.Analysis;
21 using Lucene.Net.Analysis.Tokenattributes;
22 using Lucene.Net.Index;
23 using Lucene.Net.Index.Memory;
24 using Lucene.Net.Search.Spans;
25 using Lucene.Net.Support;
26 using Lucene.Net.Util;
27 
28 namespace Lucene.Net.Search.Highlight
29 {
30 
31  ///<summary>
32  /// <see cref="IScorer"/> implementation which scores text fragments by the number of
33  /// unique query terms found. This class converts appropriate <see cref="Query"/>s to
34  /// <see cref="SpanQuery"/>s and attempts to score only those terms that participated in
35  /// generating the 'hit' on the document.
36  /// </summary>
37  public class QueryScorer : IScorer
38  {
39  private float totalScore;
40  private ISet<String> foundTerms;
41  private IDictionary<String, WeightedSpanTerm> fieldWeightedSpanTerms;
42  private float maxTermWeight;
43  private int position = -1;
44  private String defaultField;
45  private ITermAttribute termAtt;
46  private IPositionIncrementAttribute posIncAtt;
47  private bool expandMultiTermQuery = true;
48  private Query query;
49  private String field;
50  private IndexReader reader;
51  private bool skipInitExtractor;
52  private bool wrapToCaching = true;
53 
54  /// <summary>
55  /// Constructs a new QueryScorer instance
56  /// </summary>
57  /// <param name="query">Query to use for highlighting</param>
58  public QueryScorer(Query query)
59  {
60  Init(query, null, null, true);
61  }
62 
63  /// <summary>
64  /// Constructs a new QueryScorer instance
65  /// </summary>
66  /// <param name="query">Query to use for highlighting</param>
67  /// <param name="field">Field to highlight - pass null to ignore fields</param>
68  public QueryScorer(Query query, String field)
69  {
70  Init(query, field, null, true);
71  }
72 
73  /// <summary>
74  /// Constructs a new QueryScorer instance
75  /// </summary>
76  /// <param name="query">Query to use for highlighting</param>
77  /// <param name="reader"><see cref="IndexReader"/> to use for quasi tf/idf scoring</param>
78  /// <param name="field">Field to highlight - pass null to ignore fields</param>
79  public QueryScorer(Query query, IndexReader reader, String field)
80  {
81  Init(query, field, reader, true);
82  }
83 
84  /// <summary>
85  /// Constructs a new QueryScorer instance
86  /// </summary>
87  /// <param name="query">Query to use for highlighting</param>
88  /// <param name="reader"><see cref="IndexReader"/> to use for quasi tf/idf scoring</param>
89  /// <param name="field">Field to highlight - pass null to ignore fields</param>
90  /// <param name="defaultField">The default field for queries with the field name unspecified</param>
91  public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
92  {
93  this.defaultField = StringHelper.Intern(defaultField);
94  Init(query, field, reader, true);
95  }
96 
97 
98  /// <summary>
99  /// Constructs a new QueryScorer instance
100  /// </summary>
101  /// <param name="query">Query to use for highlighting</param>
102  /// <param name="field">Field to highlight - pass null to ignore fields</param>
103  /// <param name="defaultField">The default field for queries with the field name unspecified</param>
104  public QueryScorer(Query query, String field, String defaultField)
105  {
106  this.defaultField = StringHelper.Intern(defaultField);
107  Init(query, field, null, true);
108  }
109 
110  /// <summary>
111  /// Constructs a new QueryScorer instance
112  /// </summary>
113  /// <param name="weightedTerms">an array of pre-created <see cref="WeightedSpanTerm"/>s</param>
114  public QueryScorer(WeightedSpanTerm[] weightedTerms)
115  {
116  this.fieldWeightedSpanTerms = new HashMap<String, WeightedSpanTerm>(weightedTerms.Length);
117 
118  foreach (WeightedSpanTerm t in weightedTerms)
119  {
120  WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term];
121 
122  if ((existingTerm == null) ||
123  (existingTerm.Weight < t.Weight))
124  {
125  // if a term is defined more than once, always use the highest
126  // scoring Weight
127  fieldWeightedSpanTerms[t.Term] = t;
128  maxTermWeight = Math.Max(maxTermWeight, t.Weight);
129  }
130  }
131  skipInitExtractor = true;
132  }
133 
134  /// <seealso cref="IScorer.FragmentScore"/>
135  public float FragmentScore
136  {
137  get { return totalScore; }
138  }
139 
140  /// <summary>
141  /// The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale).
142  /// </summary>
143  public float MaxTermWeight
144  {
145  get { return maxTermWeight; }
146  }
147 
148  /// <seealso cref="IScorer.GetTokenScore"/>
149  public float GetTokenScore()
150  {
151  position += posIncAtt.PositionIncrement;
152  String termText = termAtt.Term;
153 
154  WeightedSpanTerm weightedSpanTerm;
155 
156  if ((weightedSpanTerm = fieldWeightedSpanTerms[termText]) == null)
157  {
158  return 0;
159  }
160 
161  if (weightedSpanTerm.IsPositionSensitive() &&
162  !weightedSpanTerm.CheckPosition(position))
163  {
164  return 0;
165  }
166 
167  float score = weightedSpanTerm.Weight;
168 
169  // found a query term - is it unique in this doc?
170  if (!foundTerms.Contains(termText))
171  {
172  totalScore += score;
173  foundTerms.Add(termText);
174  }
175 
176  return score;
177  }
178 
179  /// <seealso cref="IScorer.Init"/>
180  public TokenStream Init(TokenStream tokenStream)
181  {
182  position = -1;
183  termAtt = tokenStream.AddAttribute<ITermAttribute>();
184  posIncAtt = tokenStream.AddAttribute<IPositionIncrementAttribute>();
185  if (!skipInitExtractor)
186  {
187  if (fieldWeightedSpanTerms != null)
188  {
189  fieldWeightedSpanTerms.Clear();
190  }
191  return InitExtractor(tokenStream);
192  }
193  return null;
194  }
195 
196  /// <summary>
197  /// Retrieve the <see cref="WeightedSpanTerm"/> for the specified token. Useful for passing
198  /// Span information to a <see cref="IFragmenter"/>.
199  /// </summary>
200  /// <param name="token">token to get {@link WeightedSpanTerm} for</param>
201  /// <returns>WeightedSpanTerm for token</returns>
202  public WeightedSpanTerm GetWeightedSpanTerm(String token)
203  {
204  return fieldWeightedSpanTerms[token];
205  }
206 
207  private void Init(Query query, String field, IndexReader reader, bool expandMultiTermQuery)
208  {
209  this.reader = reader;
210  this.expandMultiTermQuery = expandMultiTermQuery;
211  this.query = query;
212  this.field = field;
213  }
214 
215  private TokenStream InitExtractor(TokenStream tokenStream)
216  {
217  WeightedSpanTermExtractor qse = defaultField == null
218  ? new WeightedSpanTermExtractor()
219  : new WeightedSpanTermExtractor(defaultField);
220 
221  qse.ExpandMultiTermQuery = expandMultiTermQuery;
222  qse.SetWrapIfNotCachingTokenFilter(wrapToCaching);
223  if (reader == null)
224  {
225  this.fieldWeightedSpanTerms = qse.GetWeightedSpanTerms(query,
226  tokenStream, field);
227  }
228  else
229  {
230  this.fieldWeightedSpanTerms = qse.GetWeightedSpanTermsWithScores(query,
231  tokenStream, field, reader);
232  }
233  if (qse.IsCachedTokenStream)
234  {
235  return qse.TokenStream;
236  }
237 
238  return null;
239  }
240 
241  /// <seealso cref="IScorer.StartFragment"/>
242  public void StartFragment(TextFragment newFragment)
243  {
244  foundTerms = Support.Compatibility.SetFactory.CreateHashSet<string>();
245  totalScore = 0;
246  }
247 
248  /// <summary>
249  /// Controls whether or not multi-term queries are expanded
250  /// against a <see cref="MemoryIndex"/> <see cref="IndexReader"/>.
251  /// </summary>
252  public bool IsExpandMultiTermQuery
253  {
254  get { return expandMultiTermQuery; }
255  set { this.expandMultiTermQuery = value; }
256  }
257 
258  /// <summary>
259  /// By default, <see cref="TokenStream"/>s that are not of the type
260  /// <see cref="CachingTokenFilter"/> are wrapped in a <see cref="CachingTokenFilter"/> to
261  /// ensure an efficient reset - if you are already using a different caching
262  /// <see cref="TokenStream"/> impl and you don't want it to be wrapped, set this to
263  /// false.
264  /// </summary>
265  public void SetWrapIfNotCachingTokenFilter(bool wrap)
266  {
267  this.wrapToCaching = wrap;
268  }
269  }
270 }