19 using System.Collections;
20 using System.Collections.Generic;
24 using Lucene.Net.Search;
25 using Lucene.Net.Index;
26 using Lucene.Net.Analysis;
27 using Lucene.Net.Analysis.Tokenattributes;
28 using Lucene.Net.Support;
29 using Lucene.Net.Util;
31 namespace Lucene.Net.Search
51 Query rewrittenQuery = null;
52 EquatableList<FieldVals> fieldVals =
new EquatableList<FieldVals>();
56 int MAX_VARIANTS_PER_TERM = 50;
57 bool ignoreTF =
false;
58 private int maxNumTerms;
60 public override int GetHashCode()
64 result = prime * result + ((analyzer == null) ? 0 : analyzer.GetHashCode());
65 result = prime * result
66 + ((fieldVals == null) ? 0 : fieldVals.GetHashCode());
67 result = prime * result + (ignoreTF ? 1231 : 1237);
68 result = prime * result + maxNumTerms;
72 public override bool Equals(Object obj)
78 if (GetType() != obj.GetType())
83 if (other.analyzer != null)
86 else if (!analyzer.Equals(other.analyzer))
88 if (fieldVals == null)
90 if (other.fieldVals != null)
93 else if (!fieldVals.Equals(other.fieldVals))
95 if (ignoreTF != other.ignoreTF)
97 if (maxNumTerms != other.maxNumTerms)
110 q =
new ScoreTermQueue(maxNumTerms);
111 this.analyzer = analyzer;
112 this.maxNumTerms = maxNumTerms;
117 internal String queryString;
118 internal String fieldName;
119 internal float minSimilarity;
120 internal int prefixLength;
121 public FieldVals(String name,
float similarity,
int length, String queryString)
124 minSimilarity = similarity;
125 prefixLength = length;
126 this.queryString = queryString;
129 public override int GetHashCode()
133 result = prime * result
134 + ((fieldName == null) ? 0 : fieldName.GetHashCode());
135 result = prime * result + BitConverter.ToInt32(BitConverter.GetBytes(minSimilarity),0);
136 result = prime * result + prefixLength;
137 result = prime * result
138 + ((queryString == null) ? 0 : queryString.GetHashCode());
142 public override bool Equals(Object obj)
148 if (GetType() != obj.GetType())
150 FieldVals other = (FieldVals)obj;
151 if (fieldName == null)
153 if (other.fieldName != null)
156 else if (!fieldName.Equals(other.fieldName))
158 if (BitConverter.ToInt32(BitConverter.GetBytes(minSimilarity), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.minSimilarity), 0))
161 if (prefixLength != other.prefixLength)
163 if (queryString == null)
165 if (other.queryString != null)
168 else if (!queryString.Equals(other.queryString))
184 public void AddTerms(String queryString, String fieldName,
float minSimilarity,
int prefixLength)
186 fieldVals.Add(
new FieldVals(fieldName, minSimilarity, prefixLength, queryString));
190 private void AddTerms(
IndexReader reader, FieldVals f)
192 if (f.queryString == null)
return;
193 TokenStream ts = analyzer.TokenStream(f.fieldName,
new System.IO.StringReader(f.queryString));
196 int corpusNumDocs = reader.
NumDocs();
197 Term internSavingTemplateTerm =
new Term(f.fieldName);
198 HashSet<string> processedTerms =
new HashSet<string>();
201 String term = termAtt.
Term;
202 if (!processedTerms.Contains(term))
204 processedTerms.Add(term);
205 ScoreTermQueue variantsQ =
new ScoreTermQueue(MAX_VARIANTS_PER_TERM);
216 int totalVariantDocFreqs = 0;
220 if (possibleMatch != null)
223 totalVariantDocFreqs += fe.
DocFreq();
225 if (variantsQ.Size() < MAX_VARIANTS_PER_TERM || score > minScore)
227 ScoreTerm st =
new ScoreTerm(possibleMatch, score, startTerm);
228 variantsQ.InsertWithOverflow(st);
229 minScore = variantsQ.Top().Score;
236 int avgDf = totalVariantDocFreqs / numVariants;
245 int size = variantsQ.Size();
246 for (
int i = 0; i < size; i++)
248 ScoreTerm st = variantsQ.Pop();
249 st.Score = (st.Score * st.Score) * sim.Idf(df, corpusNumDocs);
250 q.InsertWithOverflow(st);
259 if (rewrittenQuery != null)
261 return rewrittenQuery;
264 foreach (FieldVals f
in fieldVals)
277 HashMap<Term, List<ScoreTerm>> variantQueries =
new HashMap<Term, List<ScoreTerm>>();
279 for (
int i = 0; i < size; i++)
281 ScoreTerm st = q.Pop();
282 var l = variantQueries[st.fuzziedSourceTerm];
285 l =
new List<ScoreTerm>();
286 variantQueries.Add(st.fuzziedSourceTerm, l);
291 foreach(var variants
in variantQueries.Values)
293 if (variants.Count == 1)
296 ScoreTerm st = variants[0];
297 TermQuery tq =
new FuzzyTermQuery(st.Term, ignoreTF);
299 bq.Add(tq,
Occur.SHOULD);
304 foreach(ScoreTerm st
in variants)
306 TermQuery tq =
new FuzzyTermQuery(st.Term, ignoreTF);
308 termVariants.Add(tq,
Occur.SHOULD);
310 bq.Add(termVariants,
Occur.SHOULD);
316 this.rewrittenQuery = bq;
323 private class ScoreTerm
326 public float Score {
get;
set; }
328 internal Term fuzziedSourceTerm;
330 public ScoreTerm(
Term term,
float score,
Term fuzziedSourceTerm)
334 this.fuzziedSourceTerm = fuzziedSourceTerm;
340 public ScoreTermQueue(
int size)
348 public override bool LessThan(ScoreTerm termA, ScoreTerm termB)
350 if (termA.Score == termB.Score)
351 return termA.Term.CompareTo(termB.Term) > 0;
353 return termA.Score < termB.Score;
363 public FuzzyTermQuery(
Term t,
bool ignoreTF): base(t)
365 this.ignoreTF = ignoreTF;
370 Similarity result = base.GetSimilarity(searcher);
371 result =
new AnonymousSimilarityDelegator(
this,result);
377 FuzzyTermQuery parent = null;
378 public AnonymousSimilarityDelegator(FuzzyTermQuery parent,
Similarity result) : base(result)
380 this.parent = parent;
383 public override float Tf(
float freq)
389 return base.Tf(freq);
392 public override float Idf(
int docFreq,
int numDocs)
404 public override String ToString(String field)
410 public bool IsIgnoreTF()
416 public void SetIgnoreTF(
bool ignoreTF)
418 this.ignoreTF = ignoreTF;