Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
QueryTermScorer.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.Linq;
25 using System.Text;
26 using Lucene.Net.Analysis;
27 using Lucene.Net.Analysis.Tokenattributes;
28 using Lucene.Net.Index;
29 using Lucene.Net.Support;
30 
31 namespace Lucene.Net.Search.Highlight
32 {
33  /*
34  * {@link Scorer} implementation which scores text fragments by the number of
35  * unique query terms found. This class uses the {@link QueryTermExtractor}
36  * class to process determine the query terms and their boosts to be used.
37  */
38  // TODO: provide option to boost score of fragments near beginning of document
39  // based on fragment.getFragNum()
40  public class QueryTermScorer : IScorer
41  {
42  private TextFragment currentTextFragment = null;
43  private HashSet<String> uniqueTermsInFragment;
44 
45  private float totalScore = 0;
46  private float maxTermWeight = 0;
47  private HashMap<String, WeightedTerm> termsToFind;
48 
49  private ITermAttribute termAtt;
50 
51  /*
52  *
53  * @param query a Lucene query (ideally rewritten using query.rewrite before
54  * being passed to this class and the searcher)
55  */
56 
57  public QueryTermScorer(Query query)
58  : this(QueryTermExtractor.GetTerms(query))
59  {
60  }
61 
62  /*
63  *
64  * @param query a Lucene query (ideally rewritten using query.rewrite before
65  * being passed to this class and the searcher)
66  * @param fieldName the Field name which is used to match Query terms
67  */
68 
69  public QueryTermScorer(Query query, String fieldName)
70  : this(QueryTermExtractor.GetTerms(query, false, fieldName))
71  {
72  }
73 
74  /*
75  *
76  * @param query a Lucene query (ideally rewritten using query.rewrite before
77  * being passed to this class and the searcher)
78  * @param reader used to compute IDF which can be used to a) score selected
79  * fragments better b) use graded highlights eg set font color
80  * intensity
81  * @param fieldName the field on which Inverse Document Frequency (IDF)
82  * calculations are based
83  */
84 
85  public QueryTermScorer(Query query, IndexReader reader, String fieldName)
86  : this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName))
87  {
88  }
89 
90  public QueryTermScorer(WeightedTerm[] weightedTerms)
91  {
92  termsToFind = new HashMap<String, WeightedTerm>();
93  for (int i = 0; i < weightedTerms.Length; i++)
94  {
95  WeightedTerm existingTerm = termsToFind[weightedTerms[i].Term];
96  if ((existingTerm == null)
97  || (existingTerm.Weight < weightedTerms[i].Weight))
98  {
99  // if a term is defined more than once, always use the highest scoring
100  // Weight
101  termsToFind[weightedTerms[i].Term] = weightedTerms[i];
102  maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].Weight);
103  }
104  }
105  }
106 
107  /* (non-Javadoc)
108  * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
109  */
110 
111  public TokenStream Init(TokenStream tokenStream)
112  {
113  termAtt = tokenStream.AddAttribute<ITermAttribute>();
114  return null;
115  }
116 
117  /*
118  * (non-Javadoc)
119  *
120  * @see
121  * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
122  * .lucene.search.highlight.TextFragment)
123  */
124 
125  public void StartFragment(TextFragment newFragment)
126  {
127  uniqueTermsInFragment = new HashSet<String>();
128  currentTextFragment = newFragment;
129  totalScore = 0;
130 
131  }
132 
133 
134  /* (non-Javadoc)
135  * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
136  */
137 
138  public float GetTokenScore()
139  {
140  String termText = termAtt.Term;
141 
142  WeightedTerm queryTerm = termsToFind[termText];
143  if (queryTerm == null)
144  {
145  // not a query term - return
146  return 0;
147  }
148  // found a query term - is it unique in this doc?
149  if (!uniqueTermsInFragment.Contains(termText))
150  {
151  totalScore += queryTerm.Weight;
152  uniqueTermsInFragment.Add(termText);
153  }
154  return queryTerm.Weight;
155  }
156 
157 
158  /* (non-Javadoc)
159  * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
160  */
161 
162  public float FragmentScore
163  {
164  get { return totalScore; }
165  }
166 
167  /*
168  * (non-Javadoc)
169  *
170  * @see
171  * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
172  */
173 
174  public void AllFragmentsProcessed()
175  {
176  // this class has no special operations to perform at end of processing
177  }
178 
179  /*
180  *
181  * @return The highest weighted term (useful for passing to GradientFormatter
182  * to set top end of coloring scale.
183  */
184 
185  public float MaxTermWeight
186  {
187  get { return maxTermWeight; }
188  }
189  }
190 }