Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
FieldQuery.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Text;
21 
22 using Lucene.Net.Search;
23 using Lucene.Net.Index;
24 using Lucene.Net.Support.Compatibility;
25 using TermInfo = Lucene.Net.Search.Vectorhighlight.FieldTermStack.TermInfo;
26 
27 namespace Lucene.Net.Search.Vectorhighlight
28 {
29  public class FieldQuery
30  {
31  bool fieldMatch;
32 
33  // fieldMatch==true, Map<fieldName,QueryPhraseMap>
34  // fieldMatch==false, Map<null,QueryPhraseMap>
35  public HashMap<String, QueryPhraseMap> rootMaps = new HashMap<String, QueryPhraseMap>();
36 
37  // fieldMatch==true, Map<fieldName,setOfTermsInQueries>
38  // fieldMatch==false, Map<null,setOfTermsInQueries>
39  public HashMap<String, List<String>> termSetMap = new HashMap<String, List<String>>();
40 
41  int termOrPhraseNumber; // used for colored tag support
42 
43  public FieldQuery(Query query, bool phraseHighlight, bool fieldMatch)
44  {
45  this.fieldMatch = fieldMatch;
46  Dictionary<Query, Query> flatQueries = new Dictionary<Query, Query>();
47  flatten(query, flatQueries);
48  SaveTerms(flatQueries);
49  Dictionary<Query, Query> expandQueries = expand(flatQueries);
50 
51  foreach (Query flatQuery in expandQueries.Keys)
52  {
53  QueryPhraseMap rootMap = getRootMap(flatQuery);
54  rootMap.Add(flatQuery);
55  if (!phraseHighlight && flatQuery is PhraseQuery)
56  {
57  PhraseQuery pq = (PhraseQuery)flatQuery;
58  if (pq.GetTerms().Length > 1)
59  {
60  foreach (Term term in pq.GetTerms())
61  rootMap.AddTerm(term.Text, flatQuery.Boost);
62  }
63  }
64  }
65  }
66 
67  public void flatten(Query sourceQuery, Dictionary<Query, Query> flatQueries)
68  {
69  if (sourceQuery is BooleanQuery)
70  {
71  BooleanQuery bq = (BooleanQuery)sourceQuery;
72  foreach (BooleanClause clause in bq.GetClauses())
73  {
74  if (!clause.IsProhibited)
75  flatten(clause.Query, flatQueries);
76  }
77  }
78  else if (sourceQuery is PrefixQuery)
79  {
80  if (!flatQueries.ContainsKey(sourceQuery))
81  flatQueries.Add(sourceQuery, sourceQuery);
82  }
83  else if (sourceQuery is DisjunctionMaxQuery)
84  {
85  DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
86  foreach (Query query in dmq)
87  {
88  flatten(query, flatQueries);
89  }
90  }
91  else if (sourceQuery is TermQuery)
92  {
93  if (!flatQueries.ContainsKey(sourceQuery))
94  flatQueries.Add(sourceQuery, sourceQuery);
95  }
96  else if (sourceQuery is PhraseQuery)
97  {
98  if (!flatQueries.ContainsKey(sourceQuery))
99  {
100  PhraseQuery pq = (PhraseQuery)sourceQuery;
101  if (pq.GetTerms().Length > 1)
102  flatQueries.Add(pq, pq);
103  else if (pq.GetTerms().Length == 1)
104  {
105  Query q = new TermQuery(pq.GetTerms()[0]);
106  flatQueries.Add(q, q);
107  }
108  }
109  }
110  else
111  {
112  // Fallback to using extracted terms
113  ISet<Term> terms = SetFactory.CreateHashSet<Term>();
114  try
115  {
116  sourceQuery.ExtractTerms(terms);
117  }
118  catch (NotSupportedException)
119  { // thrown by default impl
120  return; // ignore error and discard query
121  }
122 
123  foreach (var term in terms)
124  {
125  flatten(new TermQuery(term), flatQueries);
126  }
127  }
128  }
129 
130  /*
131  * Create expandQueries from flatQueries.
132  *
133  * expandQueries := flatQueries + overlapped phrase queries
134  *
135  * ex1) flatQueries={a,b,c}
136  * => expandQueries={a,b,c}
137  * ex2) flatQueries={a,"b c","c d"}
138  * => expandQueries={a,"b c","c d","b c d"}
139  */
140  public Dictionary<Query, Query> expand(Dictionary<Query, Query> flatQueries)
141  {
142  Dictionary<Query, Query> expandQueries = new Dictionary<Query, Query>();
143  foreach (Query query in new Dictionary<Query, Query>(flatQueries).Keys)
144  {
145  //Query query = i.next();
146  flatQueries.Remove(query);
147  expandQueries.Add(query, query);
148  if (!(query is PhraseQuery)) continue;
149  foreach (Query qj in flatQueries.Keys)
150  {
151  if (!(qj is PhraseQuery)) continue;
152  CheckOverlap(expandQueries, (PhraseQuery)query, (PhraseQuery)qj);
153  }
154  }
155  return expandQueries;
156  }
157 
158  /*
159  * Check if PhraseQuery A and B have overlapped part.
160  *
161  * ex1) A="a b", B="b c" => overlap; expandQueries={"a b c"}
162  * ex2) A="b c", B="a b" => overlap; expandQueries={"a b c"}
163  * ex3) A="a b", B="c d" => no overlap; expandQueries={}
164  */
165  private void CheckOverlap(Dictionary<Query, Query> expandQueries, PhraseQuery a, PhraseQuery b)
166  {
167  if (a.Slop != b.Slop) return;
168  Term[] ats = a.GetTerms();
169  Term[] bts = b.GetTerms();
170  if (fieldMatch && !ats[0].Field.Equals(bts[0].Field)) return;
171  CheckOverlap(expandQueries, ats, bts, a.Slop, a.Boost);
172  CheckOverlap(expandQueries, bts, ats, b.Slop, b.Boost);
173  }
174 
175  /*
176  * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
177  *
178  * ex1) src="a b", dest="c d" => no overlap
179  * ex2) src="a b", dest="a b c" => no overlap
180  * ex3) src="a b", dest="b c" => overlap; expandQueries={"a b c"}
181  * ex4) src="a b c", dest="b c d" => overlap; expandQueries={"a b c d"}
182  * ex5) src="a b c", dest="b c" => no overlap
183  * ex6) src="a b c", dest="b" => no overlap
184  * ex7) src="a a a a", dest="a a a" => overlap;
185  * expandQueries={"a a a a a","a a a a a a"}
186  * ex8) src="a b c d", dest="b c" => no overlap
187  */
188  private void CheckOverlap(Dictionary<Query, Query> expandQueries, Term[] src, Term[] dest, int slop, float boost)
189  {
190  // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
191  // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
192  // converts PhraseQuery to TermQuery)
193  for (int i = 1; i < src.Length; i++)
194  {
195  bool overlap = true;
196  for (int j = i; j < src.Length; j++)
197  {
198  if ((j - i) < dest.Length && !src[j].Text.Equals(dest[j - i].Text))
199  {
200  overlap = false;
201  break;
202  }
203  }
204  if (overlap && src.Length - i < dest.Length)
205  {
206  PhraseQuery pq = new PhraseQuery();
207  foreach (Term srcTerm in src)
208  pq.Add(srcTerm);
209  for (int k = src.Length - i; k < dest.Length; k++)
210  {
211  pq.Add(new Term(src[0].Field, dest[k].Text));
212  }
213  pq.Slop = slop;
214  pq.Boost = boost;
215  if (!expandQueries.ContainsKey(pq))
216  expandQueries.Add(pq, pq);
217  }
218  }
219  }
220 
221  public QueryPhraseMap getRootMap(Query query)
222  {
223  String key = GetKey(query);
224  QueryPhraseMap map = rootMaps.Get(key);
225  if (map == null)
226  {
227  map = new QueryPhraseMap(this);
228  rootMaps.Put(key, map);
229  }
230  return map;
231  }
232 
233  /*
234  * Return 'key' string. 'key' is the field name of the Query.
235  * If not fieldMatch, 'key' will be null.
236  */
237  private String GetKey(Query query)
238  {
239  if (!fieldMatch) return null;
240  if (query is TermQuery)
241  return ((TermQuery)query).Term.Field;
242 
243  if (query is PrefixQuery)
244  return ((PrefixQuery)query).Prefix.Field;
245 
246  if (query is PhraseQuery)
247  {
248  PhraseQuery pq = (PhraseQuery)query;
249  Term[] terms = pq.GetTerms();
250  return terms[0].Field;
251  }
252 
253  throw new ApplicationException("query \"" + query + "\" must be flatten first.");
254  }
255 
256  /*
257  * Save the set of terms in the queries to termSetMap.
258  *
259  * ex1) q=name:john
260  * - fieldMatch==true
261  * termSetMap=Map<"name",Set<"john">>
262  * - fieldMatch==false
263  * termSetMap=Map<null,Set<"john">>
264  *
265  * ex2) q=name:john title:manager
266  * - fieldMatch==true
267  * termSetMap=Map<"name",Set<"john">,
268  * "title",Set<"manager">>
269  * - fieldMatch==false
270  * termSetMap=Map<null,Set<"john","manager">>
271  *
272  * ex3) q=name:"john lennon"
273  * - fieldMatch==true
274  * termSetMap=Map<"name",Set<"john","lennon">>
275  * - fieldMatch==false
276  * termSetMap=Map<null,Set<"john","lennon">>
277  */
278  void SaveTerms(Dictionary<Query, Query> flatQueries)
279  {
280  foreach (Query query in flatQueries.Keys)
281  {
282  List<String> termSet = GetTermSet(query);
283  if (query is TermQuery)
284  termSet.Add(((TermQuery)query).Term.Text);
285  else if (query is PrefixQuery)
286  termSet.Add(((PrefixQuery)query).Prefix.Text + "*");
287  else if (query is PhraseQuery)
288  {
289  foreach (Term term in ((PhraseQuery)query).GetTerms())
290  termSet.Add(term.Text);
291  }
292  else
293  throw new System.ApplicationException("query \"" + query.ToString() + "\" must be flatten first.");
294  }
295  }
296 
297  private List<String> GetTermSet(Query query)
298  {
299  String key = GetKey(query);
300  List<String> set = termSetMap.Get(key);
301  if (set == null)
302  {
303  set = new List<String>();
304  termSetMap.Put(key, set);
305  }
306  return set;
307  }
308 
309  public List<String> getTermSet(String field)
310  {
311  return termSetMap.Get(fieldMatch ? field : null);
312  }
313 
314  /*
315  *
316  * <param name="fieldName"></param>
317  * <param name="term"></param>
318  * <returns>QueryPhraseMap</returns>
319  */
320  public QueryPhraseMap GetFieldTermMap(String fieldName, String term)
321  {
322  QueryPhraseMap rootMap = GetRootMap(fieldName);
323  return rootMap == null ? null : RetrieveQueryFromSubMap(rootMap, term);
324  }
325 
326  public QueryPhraseMap RetrieveQueryFromSubMap(QueryPhraseMap rootMap, String term)
327  {
328  foreach (var kvp in rootMap.subMap)
329  {
330  if (StringUtils.TermStringMatch(kvp.Key, term))
331  return kvp.Value;
332  }
333  return null;
334  }
335 
336  /*
337  *
338  * <param name="fieldName"></param>
339  * <param name="phraseCandidate"></param>
340  * <returns>QueryPhraseMap</returns>
341  */
342  public QueryPhraseMap SearchPhrase(String fieldName, List<TermInfo> phraseCandidate)
343  {
344  QueryPhraseMap root = GetRootMap(fieldName);
345  if (root == null) return null;
346  return root.SearchPhrase(phraseCandidate);
347  }
348 
349  private QueryPhraseMap GetRootMap(String fieldName)
350  {
351  return rootMaps.Get(fieldMatch ? fieldName : null);
352  }
353 
354  int NextTermOrPhraseNumber()
355  {
356  return termOrPhraseNumber++;
357  }
358 
359  public class QueryPhraseMap
360  {
361 
362  public bool terminal;
363  int slop; // valid if terminal == true and phraseHighlight == true
364  public float boost; // valid if terminal == true
365  int termOrPhraseNumber; // valid if terminal == true
366  FieldQuery fieldQuery;
367  public HashMap<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
368 
369  public QueryPhraseMap(FieldQuery fieldQuery)
370  {
371  this.fieldQuery = fieldQuery;
372  }
373 
374  public void AddTerm(String termText, float boost)
375  {
376  QueryPhraseMap map = GetOrNewMap(subMap, termText);
377  map.MarkTerminal(boost);
378  }
379 
380  private QueryPhraseMap GetOrNewMap(HashMap<String, QueryPhraseMap> subMap, String term)
381  {
382  QueryPhraseMap map = subMap.Get(term);
383  if (map == null)
384  {
385  map = new QueryPhraseMap(fieldQuery);
386  subMap.Put(term, map);
387  }
388  return map;
389  }
390 
391  public void Add(Query query)
392  {
393  if (query is TermQuery)
394  {
395  AddTerm(((TermQuery)query).Term.Text, query.Boost);
396  }
397  else if (query is PrefixQuery)
398  {
399  AddTerm(((PrefixQuery)query).Prefix.Text + "*", query.Boost);
400  }
401  else if (query is PhraseQuery)
402  {
403  PhraseQuery pq = (PhraseQuery)query;
404  Term[] terms = pq.GetTerms();
405  HashMap<String, QueryPhraseMap> map = subMap;
406  QueryPhraseMap qpm = null;
407  foreach (Term term in terms)
408  {
409  qpm = GetOrNewMap(map, term.Text);
410  map = qpm.subMap;
411  }
412  qpm.MarkTerminal(pq.Slop, pq.Boost);
413  }
414  else
415  throw new ApplicationException("query \"" + query.ToString() + "\" must be flatten first.");
416  }
417 
418  public QueryPhraseMap GetTermMap(String term)
419  {
420  return subMap.Get(term);
421  }
422 
423  private void MarkTerminal(float boost)
424  {
425  MarkTerminal(0, boost);
426  }
427 
428  private void MarkTerminal(int slop, float boost)
429  {
430  this.terminal = true;
431  this.slop = slop;
432  this.boost = boost;
433  this.termOrPhraseNumber = fieldQuery.NextTermOrPhraseNumber();
434  }
435 
436  public bool IsTerminal
437  {
438  get { return terminal; }
439  }
440 
441  public int Slop
442  {
443  get { return slop; }
444  }
445 
446  public float Boost
447  {
448  get { return boost; }
449  }
450 
451  public int TermOrPhraseNumber
452  {
453  get { return termOrPhraseNumber; }
454  }
455 
456  public QueryPhraseMap SearchPhrase(List<TermInfo> phraseCandidate)
457  {
458  QueryPhraseMap currMap = this;
459  foreach (TermInfo ti in phraseCandidate)
460  {
461  currMap = currMap.subMap[ti.Text];
462  if (currMap == null) return null;
463  }
464  return currMap.IsValidTermOrPhrase(phraseCandidate) ? currMap : null;
465  }
466 
467  public bool IsValidTermOrPhrase(List<TermInfo> phraseCandidate)
468  {
469  // check terminal
470  if (!terminal) return false;
471 
472  // if the candidate is a term, it is valid
473  if (phraseCandidate.Count == 1) return true;
474 
475  // else check whether the candidate is valid phrase
476  // compare position-gaps between terms to slop
477 
478  int pos = phraseCandidate[0].Position;
479  for (int i = 1; i < phraseCandidate.Count; i++)
480  {
481  int nextPos = phraseCandidate[i].Position;
482  if (Math.Abs(nextPos - pos - 1) > slop) return false;
483  pos = nextPos;
484  }
485  return true;
486  }
487  }
488  }
489 }