Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
SearchFiles.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.IO;
20 using System.Text;
21 using Lucene.Net.Analysis;
22 using Lucene.Net.Analysis.Standard;
23 using Lucene.Net.Documents;
24 using Lucene.Net.QueryParsers;
25 using Lucene.Net.Index;
26 using Lucene.Net.Search;
27 using FSDirectory = Lucene.Net.Store.FSDirectory;
28 using Version = Lucene.Net.Util.Version;
29 
30 namespace Lucene.Net.Demo
31 {
32 
33  /// <summary>Simple command-line based search demo. </summary>
34  public static class SearchFiles
35  {
36  private class AnonymousClassCollector : Collector
37  {
38  private Scorer scorer;
39  private int docBase;
40 
41  // simply print docId and score of every matching document
42  public override void Collect(int doc)
43  {
44  Console.Out.WriteLine("doc=" + doc + docBase + " score=" + scorer.Score());
45  }
46 
47  public override bool AcceptsDocsOutOfOrder
48  {
49  get { return true; }
50  }
51 
52  public override void SetNextReader(IndexReader reader, int docBase)
53  {
54  this.docBase = docBase;
55  }
56 
57  public override void SetScorer(Scorer scorer)
58  {
59  this.scorer = scorer;
60  }
61  }
62 
63  /// <summary>
64  /// Use the norms from one field for all fields. Norms are read into memory,
65  /// using a byte of memory per document per searched field. This can cause
66  /// search of large collections with a large number of fields to run out of
67  /// memory. If all of the fields contain only a single token, then the norms
68  /// are all identical, then single norm vector may be shared.
69  /// </summary>
70  private class OneNormsReader : FilterIndexReader
71  {
72  private readonly String field;
73 
74  public OneNormsReader(IndexReader in_Renamed, String field):base(in_Renamed)
75  {
76  this.field = field;
77  }
78 
79  public override byte[] Norms(String field)
80  {
81  return in_Renamed.Norms(this.field);
82  }
83  }
84 
85  /// <summary>Simple command-line based search demo. </summary>
86  [STAThread]
87  public static void Main(String[] args)
88  {
89  String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
90  usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
91  if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0])))
92  {
93  Console.Out.WriteLine(usage);
94  Environment.Exit(0);
95  }
96 
97  String index = "index";
98  String field = "contents";
99  String queries = null;
100  int repeat = 0;
101  bool raw = false;
102  String normsField = null;
103  bool paging = true;
104  int hitsPerPage = 10;
105 
106  for (int i = 0; i < args.Length; i++)
107  {
108  if ("-index".Equals(args[i]))
109  {
110  index = args[i + 1];
111  i++;
112  }
113  else if ("-field".Equals(args[i]))
114  {
115  field = args[i + 1];
116  i++;
117  }
118  else if ("-queries".Equals(args[i]))
119  {
120  queries = args[i + 1];
121  i++;
122  }
123  else if ("-repeat".Equals(args[i]))
124  {
125  repeat = Int32.Parse(args[i + 1]);
126  i++;
127  }
128  else if ("-raw".Equals(args[i]))
129  {
130  raw = true;
131  }
132  else if ("-norms".Equals(args[i]))
133  {
134  normsField = args[i + 1];
135  i++;
136  }
137  else if ("-paging".Equals(args[i]))
138  {
139  if (args[i + 1].Equals("false"))
140  {
141  paging = false;
142  }
143  else
144  {
145  hitsPerPage = Int32.Parse(args[i + 1]);
146  if (hitsPerPage == 0)
147  {
148  paging = false;
149  }
150  }
151  i++;
152  }
153  }
154 
155  IndexReader indexReader = null;
156  try
157  {
158  // only searching, so read-only=true
159  indexReader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(index)), true); // only searching, so read-only=true
160 
161  if (normsField != null)
162  indexReader = new OneNormsReader(indexReader, normsField);
163 
164  Searcher searcher = new IndexSearcher(indexReader);
165  Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
166 
167  StreamReader queryReader;
168  if (queries != null)
169  {
170  queryReader = new StreamReader(new StreamReader(queries, Encoding.Default).BaseStream, new StreamReader(queries, Encoding.Default).CurrentEncoding);
171  }
172  else
173  {
174  queryReader = new StreamReader(new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).BaseStream, new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).CurrentEncoding);
175  }
176 
177  var parser = new QueryParser(Version.LUCENE_30, field, analyzer);
178  while (true)
179  {
180  if (queries == null)
181  // prompt the user
182  Console.Out.WriteLine("Enter query: ");
183 
184  String line = queryReader.ReadLine();
185 
186  if (line == null || line.Length == - 1)
187  break;
188 
189  line = line.Trim();
190  if (line.Length == 0)
191  break;
192 
193  Query query = parser.Parse(line);
194  Console.Out.WriteLine("Searching for: " + query.ToString(field));
195 
196  if (repeat > 0)
197  {
198  // repeat & time as benchmark
199  DateTime start = DateTime.Now;
200  for (int i = 0; i < repeat; i++)
201  {
202  searcher.Search(query, null, 100);
203  }
204  DateTime end = DateTime.Now;
205  Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms");
206  }
207 
208  if (paging)
209  {
210  DoPagingSearch(queryReader, searcher, query, hitsPerPage, raw, queries == null);
211  }
212  else
213  {
214  DoStreamingSearch(searcher, query);
215  }
216  }
217  queryReader.Close();
218  }
219  finally
220  {
221  if (indexReader != null)
222  {
223  indexReader.Dispose();
224  }
225  }
226  }
227 
228  /// <summary>
229  /// This method uses a custom HitCollector implementation which simply prints out
230  /// the docId and score of every matching document.
231  ///
232  /// This simulates the streaming search use case, where all hits are supposed to
233  /// be processed, regardless of their relevance.
234  /// </summary>
235  public static void DoStreamingSearch(Searcher searcher, Query query)
236  {
237  Collector streamingHitCollector = new AnonymousClassCollector();
238  searcher.Search(query, streamingHitCollector);
239  }
240 
241  /// <summary> This demonstrates a typical paging search scenario, where the search engine presents
242  /// pages of size n to the user. The user can then go to the next page if interested in
243  /// the next hits.
244  ///
245  /// When the query is executed for the first time, then only enough results are collected
246  /// to fill 5 result pages. If the user wants to page beyond this limit, then the query
247  /// is executed another time and all hits are collected.
248  ///
249  /// </summary>
250  public static void DoPagingSearch(StreamReader input, Searcher searcher, Query query, int hitsPerPage, bool raw, bool interactive)
251  {
252 
253  // Collect enough docs to show 5 pages
254  var collector = TopScoreDocCollector.Create(5 * hitsPerPage, false);
255  searcher.Search(query, collector);
256  var hits = collector.TopDocs().ScoreDocs;
257 
258  int numTotalHits = collector.TotalHits;
259  Console.Out.WriteLine(numTotalHits + " total matching documents");
260 
261  int start = 0;
262  int end = Math.Min(numTotalHits, hitsPerPage);
263 
264  while (true)
265  {
266  if (end > hits.Length)
267  {
268  Console.Out.WriteLine("Only results 1 - " + hits.Length + " of " + numTotalHits + " total matching documents collected.");
269  Console.Out.WriteLine("Collect more (y/n) ?");
270  String line = input.ReadLine();
271  if (String.IsNullOrEmpty(line) || line[0] == 'n')
272  {
273  break;
274  }
275 
276  collector = TopScoreDocCollector.Create(numTotalHits, false);
277  searcher.Search(query, collector);
278  hits = collector.TopDocs().ScoreDocs;
279  }
280 
281  end = Math.Min(hits.Length, start + hitsPerPage);
282 
283  for (int i = start; i < end; i++)
284  {
285  if (raw)
286  {
287  // output raw format
288  Console.Out.WriteLine("doc=" + hits[i].Doc + " score=" + hits[i].Score);
289  continue;
290  }
291 
292  Document doc = searcher.Doc(hits[i].Doc);
293  String path = doc.Get("path");
294  if (path != null)
295  {
296  Console.Out.WriteLine((i + 1) + ". " + path);
297  String title = doc.Get("title");
298  if (title != null)
299  {
300  Console.Out.WriteLine(" Title: " + doc.Get("title"));
301  }
302  }
303  else
304  {
305  Console.Out.WriteLine((i + 1) + ". " + "No path for this document");
306  }
307  }
308 
309  if (!interactive)
310  {
311  break;
312  }
313 
314  if (numTotalHits >= end)
315  {
316  bool quit = false;
317  while (true)
318  {
319  Console.Out.Write("Press ");
320  if (start - hitsPerPage >= 0)
321  {
322  Console.Out.Write("(p)revious page, ");
323  }
324  if (start + hitsPerPage < numTotalHits)
325  {
326  Console.Out.Write("(n)ext page, ");
327  }
328  Console.Out.WriteLine("(q)uit or enter number to jump to a page.");
329 
330  String line = input.ReadLine();
331  if (String.IsNullOrEmpty(line) || line[0] == 'q')
332  {
333  quit = true;
334  break;
335  }
336  if (line[0] == 'p')
337  {
338  start = Math.Max(0, start - hitsPerPage);
339  break;
340  }
341  else if (line[0] == 'n')
342  {
343  if (start + hitsPerPage < numTotalHits)
344  {
345  start += hitsPerPage;
346  }
347  break;
348  }
349  else
350  {
351  int page = Int32.Parse(line);
352  if ((page - 1) * hitsPerPage < numTotalHits)
353  {
354  start = (page - 1) * hitsPerPage;
355  break;
356  }
357  else
358  {
359  Console.Out.WriteLine("No such page");
360  }
361  }
362  }
363  if (quit)
364  break;
365  end = Math.Min(numTotalHits, start + hitsPerPage);
366  }
367  }
368  }
369  }
370 }