docs/3.0.3/_query_term_scorer_8cs_source.html

/*

 *

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *   http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing,

 * software distributed under the License is distributed on an

 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

 * KIND, either express or implied.  See the License for the

 * specific language governing permissions and limitations

 * under the License.

 *

*/


using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using Lucene.Net.Analysis;

using Lucene.Net.Analysis.Tokenattributes;

using Lucene.Net.Index;

using Lucene.Net.Support;


namespace Lucene.Net.Search.Highlight

{

    /*

 * {@link Scorer} implementation which scores text fragments by the number of

 * unique query terms found. This class uses the {@link QueryTermExtractor}

 * class to process determine the query terms and their boosts to be used.

 */

    // TODO: provide option to boost score of fragments near beginning of document

    // based on fragment.getFragNum()

    public class QueryTermScorer : IScorer

    {

        private TextFragment currentTextFragment = null;

        private HashSet<String> uniqueTermsInFragment;


        private float totalScore = 0;

        private float maxTermWeight = 0;

        private HashMap<String, WeightedTerm> termsToFind;


        private ITermAttribute termAtt;


        /*

         *

         * @param query a Lucene query (ideally rewritten using query.rewrite before

         *        being passed to this class and the searcher)

         */


        public QueryTermScorer(Query query)

            : this(QueryTermExtractor.GetTerms(query))

        {

        }


        /*

         *

         * @param query a Lucene query (ideally rewritten using query.rewrite before

         *        being passed to this class and the searcher)

         * @param fieldName the Field name which is used to match Query terms

         */


        public QueryTermScorer(Query query, String fieldName)

            : this(QueryTermExtractor.GetTerms(query, false, fieldName))

        {

        }


        /*

         *

         * @param query a Lucene query (ideally rewritten using query.rewrite before

         *        being passed to this class and the searcher)

         * @param reader used to compute IDF which can be used to a) score selected

         *        fragments better b) use graded highlights eg set font color

         *        intensity

         * @param fieldName the field on which Inverse Document Frequency (IDF)

         *        calculations are based

         */


        public QueryTermScorer(Query query, IndexReader reader, String fieldName)

            : this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName))

        {

        }


        public QueryTermScorer(WeightedTerm[] weightedTerms)

        {

            termsToFind = new HashMap<String, WeightedTerm>();

            for (int i = 0; i < weightedTerms.Length; i++)

            {

                WeightedTerm existingTerm = termsToFind[weightedTerms[i].Term];

                if ((existingTerm == null)

                    || (existingTerm.Weight < weightedTerms[i].Weight))

                {

                    // if a term is defined more than once, always use the highest scoring

                    // Weight

                    termsToFind[weightedTerms[i].Term] = weightedTerms[i];

                    maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].Weight);

                }

            }

        }


        /* (non-Javadoc)

         * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)

         */


        public TokenStream Init(TokenStream tokenStream)

        {

            termAtt = tokenStream.AddAttribute<ITermAttribute>();

            return null;

        }


        /*

         * (non-Javadoc)

         *

         * @see

         * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache

         * .lucene.search.highlight.TextFragment)

         */


        public void StartFragment(TextFragment newFragment)

        {

            uniqueTermsInFragment = new HashSet<String>();

            currentTextFragment = newFragment;

            totalScore = 0;


        }


        /* (non-Javadoc)

         * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()

         */


        public float GetTokenScore()

        {

            String termText = termAtt.Term;


            WeightedTerm queryTerm = termsToFind[termText];

            if (queryTerm == null)

            {

                // not a query term - return

                return 0;

            }

            // found a query term - is it unique in this doc?

            if (!uniqueTermsInFragment.Contains(termText))

            {

                totalScore += queryTerm.Weight;

                uniqueTermsInFragment.Add(termText);

            }

            return queryTerm.Weight;

        }


        /* (non-Javadoc)

         * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()

         */


        public float FragmentScore

        {

            get { return totalScore; }

        }


        /*

         * (non-Javadoc)

         *

         * @see

         * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()

         */


        public void AllFragmentsProcessed()

        {

            // this class has no special operations to perform at end of processing

        }


        /*

         *

         * @return The highest weighted term (useful for passing to GradientFormatter

         *         to set top end of coloring scale.

         */


        public float MaxTermWeight

        {

            get { return maxTermWeight; }

        }

    }

}