Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
SynExpand.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using System.Linq;
22 using Lucene.Net.Analysis;
23 using Lucene.Net.Analysis.Standard;
24 using Lucene.Net.Analysis.Tokenattributes;
25 using Lucene.Net.Index;
26 using Lucene.Net.Search;
27 using Lucene.Net.Store;
28 
29 namespace WorldNet.Net
30 {
31 
32 
38  public sealed class SynExpand
39  {
40  static List<String> already;
41  private static BooleanQuery tmp;
42 
53  [STAThread]
54  public static void Main(String[] args)
55  {
56  if (args.Length != 2)
57  {
58  Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>");
59  return;
60  }
61 
62  var directory = FSDirectory.Open(new DirectoryInfo(args[0]));
63  var searcher = new IndexSearcher(directory, true);
64 
65  String query = args[1];
66  const string field = "contents";
67 
68  Query q = Expand(query, searcher, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), field, 0.9f);
69  System.Console.Out.WriteLine("Query: " + q.ToString(field));
70 
71  searcher.Close();
72  directory.Close();
73  }
74 
75 
85  public static Query Expand(String query,
86  Searcher syns,
87  Analyzer a,
88  String field,
89  float boost)
90  {
91  already = new List<String>(); // avoid dups
92  var top = new List<String>(); // needs to be separately listed..
93  if (field == null)
94  field = "contents";
95 
96  if (a == null)
97  a = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
98 
99  // [1] Parse query into separate words so that when we expand we can avoid dups
100  var ts = a.TokenStream(field, new StringReader(query));
101  var termAtt = ts.AddAttribute<TermAttribute>();
102 
103  while (ts.IncrementToken())
104  {
105  var word = termAtt.Term;
106 
107  if (!already.Contains(word))
108  {
109  already.Add(word);
110  top.Add(word);
111  }
112  }
113 
114  tmp = new BooleanQuery();
115 
116  // [2] form query
117  System.Collections.IEnumerator it = top.GetEnumerator();
118  while (it.MoveNext())
119  {
120  // [2a] add to level words in
121  var word = (String) it.Current;
122  var tq = new TermQuery(new Term(field, word));
123  tmp.Add(tq, Occur.SHOULD);
124 
125  var c = new CollectorImpl(field, boost);
126  syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c);
127  }
128 
129  return tmp;
130  }
131 
132 
136  public class Syns2Index
137  {
139  public const String F_SYN = "syn";
140 
142  public const String F_WORD = "word";
143  }
144 
148  internal sealed class CollectorImpl : Collector
149  {
150  private IndexReader reader;
151  private readonly string field;
152  private readonly float boost;
153 
154  public CollectorImpl(string field, float boost)
155  {
156  this.field = field;
157  this.boost = boost;
158  }
159 
160  public override void SetScorer(Scorer scorer)
161  {
162  // Ignore
163  }
164 
165  public override void Collect(int doc)
166  {
167  var d = reader.Document(doc);
168  var values = d.GetValues(Syns2Index.F_SYN);
169  foreach (var syn in values.Where(syn => !already.Contains(syn)))
170  {
171  already.Add(syn);
172 
173  var tq = new TermQuery(new Term(field, syn));
174  if (boost > 0) // else keep normal 1.0
175  tq.Boost = boost;
176 
177  tmp.Add(tq, Occur.SHOULD);
178  }
179  }
180 
181  public override void SetNextReader(IndexReader reader, int docBase)
182  {
183  this.reader = reader;
184  }
185 
186  public override bool AcceptsDocsOutOfOrder
187  {
188  get { return true; }
189  }
190 
191  }
192 
193  }
194 }