Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
SynLookup.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using System.Linq;
22 using Lucene.Net.Analysis;
23 using Lucene.Net.Analysis.Tokenattributes;
24 using Lucene.Net.Index;
25 using Lucene.Net.Search;
26 using Lucene.Net.Store;
27 
28 namespace WorldNet.Net
29 {
31  public class SynLookup
32  {
33  static List<String> already;
34  private static BooleanQuery tmp;
35 
36  [STAThread]
37  public static void Main(System.String[] args)
38  {
39  if (args.Length != 2)
40  {
41  System.Console.Out.WriteLine(typeof(SynLookup) + " <index path> <word>");
42  return;
43  }
44 
45  using (var directory = FSDirectory.Open(new DirectoryInfo(args[0])))
46  {
47  using (var searcher = new IndexSearcher(directory, true))
48  {
49 
50  String word = args[1];
51  Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
52  var countingCollector = new CountingCollector();
53  searcher.Search(query, countingCollector);
54 
55  if (countingCollector.numHits == 0)
56  {
57  Console.Out.WriteLine("No synonyms found for " + word);
58  }
59  else
60  {
61  Console.Out.WriteLine("Synonyms found for \"" + word + "\":");
62  }
63 
64  var hits = searcher.Search(query, countingCollector.numHits).ScoreDocs;
65 
66  foreach (var v in
67  hits.Select(t => searcher.Doc(t.Doc)).Select(doc => doc.GetValues(Syns2Index.F_SYN)).SelectMany(values => values))
68  {
69  Console.Out.WriteLine(v);
70  }
71 
72  }
73  }
74  }
75 
84  public static Query Expand(String query,
85  Searcher syns,
86  Analyzer a,
87  String field,
88  float boost)
89  {
90  already = new List<String>(); // avoid dups
91  var top = new List<String>(); // needs to be separately listed..
92 
93  var ts = a.TokenStream(field, new StringReader(query));
94  var termAtt = ts.AddAttribute<TermAttribute>();
95 
96  while (ts.IncrementToken())
97  {
98  var word = termAtt.Term;
99 
100  if (!already.Contains(word))
101  {
102  already.Add(word);
103  top.Add(word);
104  }
105  }
106 
107  tmp = new BooleanQuery();
108 
109  // [2] form query
110  System.Collections.IEnumerator it = top.GetEnumerator();
111  while (it.MoveNext())
112  {
113  // [2a] add to level words in
114  var word = (String)it.Current;
115  var tq = new TermQuery(new Term(field, word));
116  tmp.Add(tq, Occur.SHOULD);
117 
118  var c = new CollectorImpl(field, boost);
119  syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)), c);
120  }
121 
122  return tmp;
123  }
124 
125  internal sealed class CountingCollector : Collector
126  {
127  public int numHits;
128 
129  public override void SetScorer(Scorer scorer)
130  { }
131 
132  public override void Collect(int doc)
133  {
134  numHits++;
135  }
136 
137  public override void SetNextReader(IndexReader reader, int docBase)
138  { }
139 
140  public override bool AcceptsDocsOutOfOrder
141  {
142  get { return true; }
143  }
144  }
145 
149  internal sealed class CollectorImpl : Collector
150  {
151  private IndexReader reader;
152  private readonly string field;
153  private readonly float boost;
154 
155  public CollectorImpl(string field, float boost)
156  {
157  this.field = field;
158  this.boost = boost;
159  }
160 
161  public override void SetScorer(Scorer scorer)
162  {
163  // Ignore
164  }
165 
166  public override void Collect(int doc)
167  {
168  var d = reader.Document(doc);
169  var values = d.GetValues(Syns2Index.F_SYN);
170  foreach (var syn in values.Where(syn => !already.Contains(syn)))
171  {
172  already.Add(syn);
173 
174  var tq = new TermQuery(new Term(field, syn));
175  if (boost > 0) // else keep normal 1.0
176  tq.Boost = boost;
177 
178  tmp.Add(tq, Occur.SHOULD);
179  }
180  }
181 
182  public override void SetNextReader(IndexReader reader, int docBase)
183  {
184  this.reader = reader;
185  }
186 
187  public override bool AcceptsDocsOutOfOrder
188  {
189  get { return true; }
190  }
191 
192  }
193 
197  public class Syns2Index
198  {
200  public const String F_SYN = "syn";
201 
203  public const String F_WORD = "word";
204  }
205 
206  }
207 
208 }