23 using System.Collections.Generic;
27 using Lucene.Net.Analysis;
28 using Lucene.Net.Index;
29 using Lucene.Net.Index.Memory;
30 using Lucene.Net.Search.Spans;
31 using Lucene.Net.Store;
32 using Lucene.Net.Support;
33 using Lucene.Net.Util;
35 namespace Lucene.Net.Search.Highlight
43 private String fieldName;
45 private IDictionary<String, IndexReader> readers =
new HashMap<String, IndexReader>(10);
46 private String defaultField;
47 private bool expandMultiTermQuery;
48 private bool cachedTokenStream;
49 private bool wrapToCaching =
true;
57 if (defaultField != null)
63 private void CloseReaders()
65 ICollection<IndexReader> readerSet = readers.Values;
85 private void Extract(
Query query, IDictionary<String, WeightedSpanTerm> terms)
91 for (
int i = 0; i < queryClauses.Length; i++)
93 if (!queryClauses[i].IsProhibited)
95 Extract(queryClauses[i].
Query, terms);
102 Term[] phraseQueryTerms = phraseQuery.GetTerms();
104 for (
int i = 0; i < phraseQueryTerms.Length; i++)
108 int slop = phraseQuery.Slop;
109 int[] positions = phraseQuery.GetPositions();
111 if (positions.Length > 0)
113 int lastPos = positions[0];
115 int sz = positions.Length;
116 for (
int i = 1; i < sz; i++)
118 int pos = positions[i];
119 int inc = pos - lastPos;
120 if (inc > largestInc)
132 bool inorder = slop == 0;
135 sp.
Boost = query.Boost;
136 ExtractWeightedSpanTerms(terms, sp);
140 ExtractWeightedTerms(terms, query);
144 ExtractWeightedSpanTerms(terms, (SpanQuery) query);
148 Extract(((FilteredQuery) query).
Query, terms);
152 foreach (var q
in ((DisjunctionMaxQuery) query))
160 if (mtq.RewriteMethod !=
MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
166 FakeReader fReader =
new FakeReader();
167 MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
168 if (fReader.Field != null)
171 Extract(query.Rewrite(ir), terms);
177 IList<Term[]> termArrays = mpq.GetTermArrays();
178 int[] positions = mpq.GetPositions();
179 if (positions.Length > 0)
182 int maxPosition = positions[positions.Length - 1];
183 for (
int i = 0; i < positions.Length - 1; ++i)
185 if (positions[i] > maxPosition)
187 maxPosition = positions[i];
191 var disjunctLists =
new List<SpanQuery>[maxPosition + 1];
192 int distinctPositions = 0;
194 for (
int i = 0; i < termArrays.Count; ++i)
196 Term[] termArray = termArrays[i];
197 List<SpanQuery> disjuncts = disjunctLists[positions[i]];
198 if (disjuncts == null)
200 disjuncts = (disjunctLists[positions[i]] =
new List<SpanQuery>(termArray.Length));
203 for (
int j = 0; j < termArray.Length; ++j)
209 int positionGaps = 0;
211 SpanQuery[] clauses =
new SpanQuery[distinctPositions];
212 for (
int i = 0; i < disjunctLists.Length; ++i)
214 List<SpanQuery> disjuncts = disjunctLists[i];
215 if (disjuncts != null)
217 clauses[position++] =
new SpanOrQuery(disjuncts.ToArray());
226 bool inorder = (slop == 0);
229 sp.
Boost = query.Boost;
230 ExtractWeightedSpanTerms(terms, sp);
240 private void ExtractWeightedSpanTerms(IDictionary<String, WeightedSpanTerm> terms, SpanQuery spanQuery)
242 HashSet<String> fieldNames;
244 if (fieldName == null)
246 fieldNames =
new HashSet<String>();
247 CollectSpanQueryFields(spanQuery, fieldNames);
251 fieldNames =
new HashSet<String>();
252 fieldNames.Add(fieldName);
255 if (defaultField != null)
257 fieldNames.Add(defaultField);
260 IDictionary<String, SpanQuery> queries =
new HashMap<String, SpanQuery>();
262 var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<
Term>();
263 bool mustRewriteQuery = MustRewriteQuery(spanQuery);
264 if (mustRewriteQuery)
266 foreach (String field
in fieldNames)
268 SpanQuery rewrittenQuery = (
SpanQuery) spanQuery.
Rewrite(GetReaderForField(field));
269 queries[field] = rewrittenQuery;
278 List<PositionSpan> spanPositions =
new List<PositionSpan>();
280 foreach (String field
in fieldNames)
285 if (mustRewriteQuery)
287 spans = queries[field].GetSpans(reader);
298 spanPositions.Add(
new PositionSpan(spans.Start(), spans.End() - 1));
303 if (spanPositions.Count == 0)
309 foreach (
Term queryTerm
in nonWeightedTerms)
312 if (FieldNameComparator(queryTerm.
Field))
314 WeightedSpanTerm weightedSpanTerm = terms[queryTerm.
Text];
316 if (weightedSpanTerm == null)
318 weightedSpanTerm =
new WeightedSpanTerm(spanQuery.
Boost, queryTerm.
Text);
319 weightedSpanTerm.AddPositionSpans(spanPositions);
320 weightedSpanTerm.SetPositionSensitive(
true);
321 terms[queryTerm.
Text] = weightedSpanTerm;
325 if (spanPositions.Count > 0)
327 weightedSpanTerm.AddPositionSpans(spanPositions);
339 private void ExtractWeightedTerms(IDictionary<String, WeightedSpanTerm> terms,
Query query)
341 var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<
Term>();
342 query.ExtractTerms(nonWeightedTerms);
344 foreach (
Term queryTerm
in nonWeightedTerms)
347 if (FieldNameComparator(queryTerm.
Field))
349 WeightedSpanTerm weightedSpanTerm =
new WeightedSpanTerm(query.Boost, queryTerm.
Text);
350 terms[queryTerm.
Text] = weightedSpanTerm;
358 private bool FieldNameComparator(String fieldNameToCheck)
360 bool rv = fieldName == null || fieldNameToCheck == fieldName
361 || fieldNameToCheck == defaultField;
365 private IndexReader GetReaderForField(String field)
370 cachedTokenStream =
true;
376 indexer.
AddField(field, tokenStream);
379 reader = searcher.IndexReader;
380 readers[field] = reader;
392 public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(
Query query,
TokenStream tokenStream)
394 return GetWeightedSpanTerms(query, tokenStream, null);
405 public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(
Query query,
TokenStream tokenStream,
408 if (fieldName != null)
414 this.fieldName = null;
417 IDictionary<String, WeightedSpanTerm> terms =
new PositionCheckingMap<String>();
418 this.tokenStream = tokenStream;
421 Extract(query, terms);
440 public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(
Query query,
TokenStream tokenStream,
443 if (fieldName != null)
449 this.fieldName = null;
451 this.tokenStream = tokenStream;
453 IDictionary<String, WeightedSpanTerm> terms =
new PositionCheckingMap<String>();
454 Extract(query, terms);
456 int totalNumDocs = reader.
NumDocs();
457 var weightedTerms = terms.Keys;
461 foreach (var wt
in weightedTerms)
464 int docFreq = reader.
DocFreq(
new Term(fieldName, weightedSpanTerm.
Term));
466 if (totalNumDocs < docFreq)
468 docFreq = totalNumDocs;
471 float idf = (float) (Math.Log((
float) totalNumDocs/(
double) (docFreq + 1)) + 1.0);
472 weightedSpanTerm.
Weight *= idf;
484 private void CollectSpanQueryFields(SpanQuery spanQuery, HashSet<String> fieldNames)
488 CollectSpanQueryFields(((FieldMaskingSpanQuery) spanQuery).MaskedQuery, fieldNames);
492 CollectSpanQueryFields(((SpanFirstQuery) spanQuery).Match, fieldNames);
496 foreach (SpanQuery clause
in ((SpanNearQuery) spanQuery).GetClauses())
498 CollectSpanQueryFields(clause, fieldNames);
503 CollectSpanQueryFields(((SpanNotQuery) spanQuery).Include, fieldNames);
507 foreach (SpanQuery clause
in ((SpanOrQuery) spanQuery).GetClauses())
509 CollectSpanQueryFields(clause, fieldNames);
514 fieldNames.Add(spanQuery.
Field);
518 private bool MustRewriteQuery(SpanQuery spanQuery)
520 if (!expandMultiTermQuery)
524 else if (spanQuery is FieldMaskingSpanQuery)
526 return MustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).MaskedQuery);
528 else if (spanQuery is SpanFirstQuery)
530 return MustRewriteQuery(((SpanFirstQuery)spanQuery).Match);
532 else if (spanQuery is SpanNearQuery)
534 foreach (SpanQuery clause
in ((SpanNearQuery) spanQuery).GetClauses())
536 if (MustRewriteQuery(clause))
543 else if (spanQuery is SpanNotQuery)
545 SpanNotQuery spanNotQuery = (SpanNotQuery) spanQuery;
546 return MustRewriteQuery(spanNotQuery.
Include) || MustRewriteQuery(spanNotQuery.
Exclude);
548 else if (spanQuery is SpanOrQuery)
550 foreach (SpanQuery clause
in ((SpanOrQuery) spanQuery).GetClauses())
552 if (MustRewriteQuery(clause))
575 private class PositionCheckingMap<K> : HashMap<K, WeightedSpanTerm>
577 public PositionCheckingMap()
582 public PositionCheckingMap(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
587 public void PutAll(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
589 foreach (var entry
in m)
591 Add(entry.Key, entry.Value);
595 public override void Add(K key, WeightedSpanTerm value)
597 base.Add(key, value);
598 WeightedSpanTerm prev =
this[key];
600 if (prev == null)
return;
602 WeightedSpanTerm prevTerm = prev;
603 WeightedSpanTerm newTerm = value;
604 if (!prevTerm.IsPositionSensitive())
606 newTerm.SetPositionSensitive(
false);
612 public bool ExpandMultiTermQuery
614 set { this.expandMultiTermQuery = value; }
615 get {
return expandMultiTermQuery; }
618 public bool IsCachedTokenStream
620 get {
return cachedTokenStream; }
625 get {
return tokenStream; }
635 public void SetWrapIfNotCachingTokenFilter(
bool wrap)
637 this.wrapToCaching = wrap;
648 public String
Field {
get;
private set; }
650 protected internal FakeReader()
651 : base(EMPTY_MEMORY_INDEX_READER)
659 if (t != null &&
Field == null)
661 return base.Terms(t);