Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
IndexFiles.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.IO;
20 using Lucene.Net.Analysis.Standard;
21 using Lucene.Net.Index;
22 
23 using FSDirectory = Lucene.Net.Store.FSDirectory;
24 using Version = Lucene.Net.Util.Version;
25 
26 namespace Lucene.Net.Demo
27 {
28 
29  /// <summary>Index all text files under a directory. </summary>
30  public static class IndexFiles
31  {
32  internal static readonly DirectoryInfo INDEX_DIR = new DirectoryInfo("index");
33 
34  /// <summary>Index all text files under a directory. </summary>
35  [STAThread]
36  public static void Main(String[] args)
37  {
38  var usage = typeof(IndexFiles) + " <root_directory>";
39  if (args.Length == 0)
40  {
41  Console.Error.WriteLine("Usage: " + usage);
42  Environment.Exit(1);
43  }
44 
45  if (File.Exists(INDEX_DIR.FullName) || Directory.Exists(INDEX_DIR.FullName))
46  {
47  Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
48  Environment.Exit(1);
49  }
50 
51  var docDir = new DirectoryInfo(args[0]);
52  var docDirExists = File.Exists(docDir.FullName) || Directory.Exists(docDir.FullName);
53  if (!docDirExists) // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#?
54  {
55  Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path");
56  Environment.Exit(1);
57  }
58 
59  var start = DateTime.Now;
60  try
61  {
62  using (var writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED))
63  {
64  Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'...");
65  IndexDirectory(writer, docDir);
66  Console.Out.WriteLine("Optimizing...");
67  writer.Optimize();
68  writer.Commit();
69  }
70  var end = DateTime.Now;
71  Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds");
72  }
73  catch (IOException e)
74  {
75  Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
76  }
77  }
78 
79  internal static void IndexDirectory(IndexWriter writer, DirectoryInfo directory)
80  {
81  foreach(var subDirectory in directory.GetDirectories())
82  IndexDirectory(writer, subDirectory);
83 
84  foreach (var file in directory.GetFiles())
85  IndexDocs(writer, file);
86  }
87 
88  internal static void IndexDocs(IndexWriter writer, FileInfo file)
89  {
90  Console.Out.WriteLine("adding " + file);
91 
92  try
93  {
94  writer.AddDocument(FileDocument.Document(file));
95  }
96  catch (FileNotFoundException)
97  {
98  // At least on Windows, some temporary files raise this exception with an
99  // "access denied" message checking if the file can be read doesn't help.
100  }
101  catch (UnauthorizedAccessException)
102  {
103  // Handle any access-denied errors that occur while reading the file.
104  }
105  catch (IOException)
106  {
107  // Generic handler for any io-related exceptions that occur.
108  }
109  }
110  }
111 }