Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
SimpleFacetedSearch.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Linq;
21 using System.Text;
22 using Lucene.Net.Analysis;
23 using Lucene.Net.Documents;
24 using Lucene.Net.Analysis.Standard;
25 using Lucene.Net.Index;
26 using Lucene.Net.Search;
27 using Lucene.Net.QueryParsers;
28 using Lucene.Net.Store;
29 using Lucene.Net.Util;
30 using System.Threading;
31 
32 #if !NET35
33 using System.Threading.Tasks;
34 #endif
35 
36 /*
37  Suppose, we want a faceted search on fields f1 f2 f3,
38  and their values in index are
39 
40  f1 f2 f3
41  -- -- --
42 doc1 A I 1
43 doc2 A I 2
44 doc3 A I 3
45 doc4 A J 1
46 doc5 A J 2
47 doc6 A J 3
48 doc7 B I 1
49 
50  Algorithm:
51  1- Find all possible values for f1 which are (A,B) , for f2 which are (I,J) and for f3 which are (1,2,3)
52  2- Find Cartesian Product of (A,B)X(I,J)X(1,2,3). (12 possible groups)
53  3- Eliminate the ones that surely result in 0 hits. (for ex, B J 2. since they have no doc. in common)
54 */
55 
56 /*
57  TODO: Support for pre-built queries defining groups can be added
58 */
59 
60 namespace Lucene.Net.Search
61 {
62  public partial class SimpleFacetedSearch : IDisposable
63  {
64  public const int DefaultMaxDocPerGroup = 25;
65  public static int MAX_FACETS = 2048;
66 
67  IndexReader _Reader;
68  List<KeyValuePair<List<string>, OpenBitSetDISI>> _Groups = new List<KeyValuePair<List<string>, OpenBitSetDISI>>();
69 
70  public SimpleFacetedSearch(IndexReader reader, string groupByField) : this(reader, new string[] { groupByField })
71  {
72  }
73 
74  public SimpleFacetedSearch(IndexReader reader, string[] groupByFields)
75  {
76  this._Reader = reader;
77 
78  List<FieldValuesBitSets> fieldValuesBitSets = new List<FieldValuesBitSets>();
79 
80  //STEP 1
81  //f1 = A, B
82  //f2 = I, J
83  //f3 = 1, 2, 3
84  int maxFacets = 1;
85  IList<IList<string>> inputToCP = new List<IList<string>>();
86  foreach (string field in groupByFields)
87  {
88  FieldValuesBitSets f = new FieldValuesBitSets(reader, field);
89  maxFacets *= f.FieldValueBitSetPair.Count;
90  if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS);
91  fieldValuesBitSets.Add(f);
92  inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList());
93  }
94 
95  //STEP 2
96  // comb1: A I 1
97  // comb2: A I 2 etc.
98  var cp = inputToCP.CartesianProduct();
99 
100  //SETP 3
101  //create a single BitSet for each combination
102  //BitSet1: A AND I AND 1
103  //BitSet2: A AND I AND 2 etc.
104  //and remove impossible comb's (for ex, B J 3) from list.
105 #if !NET35
106  Parallel.ForEach(cp, combinations =>
107 #else
108  foreach(var combinations in cp)
109 #endif
110  {
111  OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc);
112  bitSet.Set(0, bitSet.Size());
113  List<string> comb = combinations.ToList();
114 
115  for (int j = 0; j < comb.Count; j++)
116  {
117  bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]);
118  }
119 
120  //STEP 3
121  if (bitSet.Cardinality() > 0)
122  {
123  lock(_Groups)
124  _Groups.Add(new KeyValuePair<List<string>, OpenBitSetDISI>(comb, bitSet));
125  }
126  }
127 #if !NET35
128  );
129 #endif
130 
131 
132  //Now _Groups has 7 rows (as <List<string>, BitSet> pairs)
133  }
134 
135  public Hits Search(Query query)
136  {
137  return Search(query, DefaultMaxDocPerGroup);
138  }
139 
140  public Hits Search(Query query, int maxDocPerGroup)
141  {
142  var hitsPerGroup = new List<HitsPerFacet>();
143 
144  DocIdSet queryDocidSet = new CachingWrapperFilter(new QueryWrapperFilter(query)).GetDocIdSet(_Reader);
145  var actions = new Action[_Groups.Count];
146  for (int i = 0; i < _Groups.Count; i++)
147  {
148  var h = new HitsPerFacet(new FacetName(_Groups[i].Key.ToArray()), _Reader, queryDocidSet, _Groups[i].Value, maxDocPerGroup);
149  hitsPerGroup.Add(h);
150  actions[i] = h.Calculate;
151  }
152 
153 #if !NET35
154  Parallel.Invoke(actions);
155 #else
156  foreach (var action in actions)
157  action();
158 #endif
159 
160  Hits hits = new Hits {HitsPerFacet = hitsPerGroup.ToArray()};
161 
162  return hits;
163  }
164 
165  public void Dispose()
166  {
167  }
168  }
169 }