Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
MultiSearcher.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Linq;
21 using Lucene.Net.Support;
22 using Lucene.Net.Util;
23 using Document = Lucene.Net.Documents.Document;
24 using FieldSelector = Lucene.Net.Documents.FieldSelector;
25 using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
26 using IndexReader = Lucene.Net.Index.IndexReader;
27 using Term = Lucene.Net.Index.Term;
28 using ReaderUtil = Lucene.Net.Util.ReaderUtil;
29 
30 namespace Lucene.Net.Search
31 {
32 
33  /// <summary>Implements search over a set of <c>Searchables</c>.
34  ///
35  /// <p/>Applications usually need only call the inherited <see cref="Searcher.Search(Query, int)" />
36  /// or <see cref="Searcher.Search(Query,Filter, int)" /> methods.
37  /// </summary>
38  public class MultiSearcher:Searcher
39  {
40  private class AnonymousClassCollector:Collector
41  {
42  public AnonymousClassCollector(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
43  {
44  InitBlock(collector, start, enclosingInstance);
45  }
46  private void InitBlock(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
47  {
48  this.collector = collector;
49  this.start = start;
50  this.enclosingInstance = enclosingInstance;
51  }
52  private Lucene.Net.Search.Collector collector;
53  private int start;
54  private MultiSearcher enclosingInstance;
55  public MultiSearcher Enclosing_Instance
56  {
57  get
58  {
59  return enclosingInstance;
60  }
61 
62  }
63  public override void SetScorer(Scorer scorer)
64  {
65  collector.SetScorer(scorer);
66  }
67  public override void Collect(int doc)
68  {
69  collector.Collect(doc);
70  }
71  public override void SetNextReader(IndexReader reader, int docBase)
72  {
73  collector.SetNextReader(reader, start + docBase);
74  }
75 
76  public override bool AcceptsDocsOutOfOrder
77  {
78  get { return collector.AcceptsDocsOutOfOrder; }
79  }
80  }
81 
82  /// <summary> Document Frequency cache acting as a Dummy-Searcher. This class is no
83  /// full-fledged Searcher, but only supports the methods necessary to
84  /// initialize Weights.
85  /// </summary>
86  private class CachedDfSource:Searcher
87  {
88  private readonly Dictionary<Term,int> dfMap; // Map from Terms to corresponding doc freqs
89  private readonly int maxDoc; // document count
90 
91  public CachedDfSource(Dictionary<Term,int> dfMap, int maxDoc, Similarity similarity)
92  {
93  this.dfMap = dfMap;
94  this.maxDoc = maxDoc;
95  Similarity = similarity;
96  }
97 
98  public override int DocFreq(Term term)
99  {
100  int df;
101  try
102  {
103  df = dfMap[term];
104  }
105  catch (KeyNotFoundException) // C# equiv. of java code.
106  {
107  throw new System.ArgumentException("df for term " + term.Text + " not available");
108  }
109  return df;
110  }
111 
112  public override int[] DocFreqs(Term[] terms)
113  {
114  int[] result = new int[terms.Length];
115  for (int i = 0; i < terms.Length; i++)
116  {
117  result[i] = DocFreq(terms[i]);
118  }
119  return result;
120  }
121 
122  public override int MaxDoc
123  {
124  get { return maxDoc; }
125  }
126 
127  public override Query Rewrite(Query query)
128  {
129  // this is a bit of a hack. We know that a query which
130  // creates a Weight based on this Dummy-Searcher is
131  // always already rewritten (see preparedWeight()).
132  // Therefore we just return the unmodified query here
133  return query;
134  }
135 
136  // TODO: This probably shouldn't throw an exception?
137  protected override void Dispose(bool disposing)
138  {
139  throw new System.NotSupportedException();
140  }
141 
142  public override Document Doc(int i)
143  {
144  throw new System.NotSupportedException();
145  }
146 
147  public override Document Doc(int i, FieldSelector fieldSelector)
148  {
149  throw new System.NotSupportedException();
150  }
151 
152  public override Explanation Explain(Weight weight, int doc)
153  {
154  throw new System.NotSupportedException();
155  }
156 
157  public override void Search(Weight weight, Filter filter, Collector results)
158  {
159  throw new System.NotSupportedException();
160  }
161 
162  public override TopDocs Search(Weight weight, Filter filter, int n)
163  {
164  throw new System.NotSupportedException();
165  }
166 
167  public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
168  {
169  throw new System.NotSupportedException();
170  }
171  }
172 
173  private Searchable[] searchables;
174  private int[] starts;
175  private int maxDoc = 0;
176 
177  private bool isDisposed;
178 
179  /// <summary>Creates a searcher which searches <i>searchers</i>. </summary>
180  public MultiSearcher(params Searchable[] searchables)
181  {
182  this.searchables = searchables;
183 
184  starts = new int[searchables.Length + 1]; // build starts array
185  for (int i = 0; i < searchables.Length; i++)
186  {
187  starts[i] = maxDoc;
188  maxDoc += searchables[i].MaxDoc; // compute maxDocs
189  }
190  starts[searchables.Length] = maxDoc;
191  }
192 
193  /// <summary>Return the array of <see cref="Searchable" />s this searches. </summary>
194  public virtual Searchable[] GetSearchables()
195  {
196  return searchables;
197  }
198 
199  protected internal virtual int[] GetStarts()
200  {
201  return starts;
202  }
203 
204  protected override void Dispose(bool disposing)
205  {
206  if (isDisposed) return;
207 
208  if (disposing)
209  {
210  for (int i = 0; i < searchables.Length; i++)
211  searchables[i].Close();
212  }
213 
214  isDisposed = true;
215  }
216 
217  public override int DocFreq(Term term)
218  {
219  int docFreq = 0;
220  for (int i = 0; i < searchables.Length; i++)
221  docFreq += searchables[i].DocFreq(term);
222  return docFreq;
223  }
224 
225  // inherit javadoc
226  public override Document Doc(int n)
227  {
228  int i = SubSearcher(n); // find searcher index
229  return searchables[i].Doc(n - starts[i]); // dispatch to searcher
230  }
231 
232  // inherit javadoc
233  public override Document Doc(int n, FieldSelector fieldSelector)
234  {
235  int i = SubSearcher(n); // find searcher index
236  return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher
237  }
238 
239  /// <summary>Returns index of the searcher for document <c>n</c> in the array
240  /// used to construct this searcher.
241  /// </summary>
242  public virtual int SubSearcher(int n)
243  {
244  // find searcher for doc n:
245  return ReaderUtil.SubIndex(n, starts);
246  }
247 
248  /// <summary>Returns the document number of document <c>n</c> within its
249  /// sub-index.
250  /// </summary>
251  public virtual int SubDoc(int n)
252  {
253  return n - starts[SubSearcher(n)];
254  }
255 
256  public override int MaxDoc
257  {
258  get { return maxDoc; }
259  }
260 
261  public override TopDocs Search(Weight weight, Filter filter, int nDocs)
262  {
263  HitQueue hq = new HitQueue(nDocs, false);
264  int totalHits = 0;
265 
266  var lockObj = new object();
267  for (int i = 0; i < searchables.Length; i++)
268  {
269  // search each searcher
270  // use NullLock, we don't care about synchronization for these
271  TopDocs docs = MultiSearcherCallableNoSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, nDocs, hq, i, starts);
272  totalHits += docs.TotalHits; // update totalHits
273  }
274 
275  ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
276  for (int i = hq.Size() - 1; i >= 0; i--)
277  // put docs in array
278  scoreDocs2[i] = hq.Pop();
279 
280  float maxScore = (totalHits == 0)?System.Single.NegativeInfinity:scoreDocs2[0].Score;
281 
282  return new TopDocs(totalHits, scoreDocs2, maxScore);
283  }
284 
285  public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
286  {
287  var hq = new FieldDocSortedHitQueue(n);
288  int totalHits = 0;
289 
290  float maxScore = System.Single.NegativeInfinity;
291 
292  var lockObj = new object();
293  for (int i = 0; i < searchables.Length; i++)
294  {
295  // search each searcher
296  // use NullLock, we don't care about synchronization for these
297  TopFieldDocs docs = MultiSearcherCallableWithSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, n, hq, sort,
298  i, starts);
299  totalHits += docs.TotalHits;
300  maxScore = System.Math.Max(maxScore, docs.MaxScore);
301  }
302 
303  ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
304  for (int i = hq.Size() - 1; i >= 0; i--)
305  // put docs in array
306  scoreDocs2[i] = hq.Pop();
307 
308  return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
309  }
310 
311  ///<inheritdoc />
312  public override void Search(Weight weight, Filter filter, Collector collector)
313  {
314  for (int i = 0; i < searchables.Length; i++)
315  {
316  int start = starts[i];
317 
318  Collector hc = new AnonymousClassCollector(collector, start, this);
319  searchables[i].Search(weight, filter, hc);
320  }
321  }
322 
323  public override Query Rewrite(Query original)
324  {
325  Query[] queries = new Query[searchables.Length];
326  for (int i = 0; i < searchables.Length; i++)
327  {
328  queries[i] = searchables[i].Rewrite(original);
329  }
330  return queries[0].Combine(queries);
331  }
332 
333  public override Explanation Explain(Weight weight, int doc)
334  {
335  int i = SubSearcher(doc); // find searcher index
336  return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
337  }
338 
339  /// <summary> Create weight in multiple index scenario.
340  ///
341  /// Distributed query processing is done in the following steps:
342  /// 1. rewrite query
343  /// 2. extract necessary terms
344  /// 3. collect dfs for these terms from the Searchables
345  /// 4. create query weight using aggregate dfs.
346  /// 5. distribute that weight to Searchables
347  /// 6. merge results
348  ///
349  /// Steps 1-4 are done here, 5+6 in the search() methods
350  ///
351  /// </summary>
352  /// <returns> rewritten queries
353  /// </returns>
354  public /*protected internal*/ override Weight CreateWeight(Query original)
355  {
356  // step 1
357  Query rewrittenQuery = Rewrite(original);
358 
359  // step 2
360  ISet<Term> terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<Term>();
361  rewrittenQuery.ExtractTerms(terms);
362 
363  // step3
364  Term[] allTermsArray = terms.ToArray();
365  int[] aggregatedDfs = new int[terms.Count];
366  for (int i = 0; i < searchables.Length; i++)
367  {
368  int[] dfs = searchables[i].DocFreqs(allTermsArray);
369  for (int j = 0; j < aggregatedDfs.Length; j++)
370  {
371  aggregatedDfs[j] += dfs[j];
372  }
373  }
374 
375  var dfMap = new Dictionary<Term, int>();
376  for (int i = 0; i < allTermsArray.Length; i++)
377  {
378  dfMap[allTermsArray[i]] = aggregatedDfs[i];
379  }
380 
381  // step4
382  int numDocs = MaxDoc;
383  CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, Similarity);
384 
385  return rewrittenQuery.Weight(cacheSim);
386  }
387 
388  internal Func<ThreadLock, object, Searchable, Weight, Filter, int, HitQueue, int, int[], TopDocs> MultiSearcherCallableNoSort =
389  (threadLock, lockObj, searchable, weight, filter, nDocs, hq, i, starts) =>
390  {
391  TopDocs docs = searchable.Search(weight, filter, nDocs);
392  ScoreDoc[] scoreDocs = docs.ScoreDocs;
393  for(int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq
394  {
395  ScoreDoc scoreDoc = scoreDocs[j];
396  scoreDoc.Doc += starts[i]; //convert doc
397  //it would be so nice if we had a thread-safe insert
398  try
399  {
400  threadLock.Enter(lockObj);
401  if (scoreDoc == hq.InsertWithOverflow(scoreDoc))
402  break;
403  }
404  finally
405  {
406  threadLock.Exit(lockObj);
407  }
408  }
409  return docs;
410  };
411 
412  internal Func<ThreadLock, object, Searchable, Weight, Filter, int, FieldDocSortedHitQueue, Sort, int, int[], TopFieldDocs>
413  MultiSearcherCallableWithSort = (threadLock, lockObj, searchable, weight, filter, nDocs, hq, sort, i, starts) =>
414  {
415  TopFieldDocs docs = searchable.Search(weight, filter, nDocs, sort);
416  // if one of the Sort fields is FIELD_DOC, need to fix its values, so that
417  // it will break ties by doc Id properly. Otherwise, it will compare to
418  // 'relative' doc Ids, that belong to two different searchables.
419  for (int j = 0; j < docs.fields.Length; j++)
420  {
421  if (docs.fields[j].Type == SortField.DOC)
422  {
423  // iterate over the score docs and change their fields value
424  for (int j2 = 0; j2 < docs.ScoreDocs.Length; j2++)
425  {
426  FieldDoc fd = (FieldDoc) docs.ScoreDocs[j2];
427  fd.fields[j] = (int)fd.fields[j] + starts[i];
428  }
429  break;
430  }
431  }
432  try
433  {
434  threadLock.Enter(lockObj);
435  hq.SetFields(docs.fields);
436  }
437  finally
438  {
439  threadLock.Exit(lockObj);
440  }
441 
442  ScoreDoc[] scoreDocs = docs.ScoreDocs;
443  for (int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq
444  {
445  FieldDoc fieldDoc = (FieldDoc) scoreDocs[j];
446  fieldDoc.Doc += starts[i]; //convert doc
447  //it would be so nice if we had a thread-safe insert
448  lock (lockObj)
449  {
450  if (fieldDoc == hq.InsertWithOverflow(fieldDoc))
451  break;
452 
453  }
454  }
455  return docs;
456  };
457  }
458 }