19 using System.Diagnostics;
21 using Lucene.Net.Analysis.Standard;
22 using Lucene.Net.Index;
27 namespace Lucene.Net.Demo
31 public static class IndexHTML
36 public static void Main(System.String[] argv)
40 var index =
new DirectoryInfo(
"index");
42 DirectoryInfo root = null;
44 var usage =
"IndexHTML [-create] [-index <index>] <root_directory>";
48 Console.Error.WriteLine(
"Usage: " + usage);
52 for (
int i = 0; i < argv.Length; i++)
54 if (argv[i].Equals(
"-index"))
57 index =
new DirectoryInfo(argv[++i]);
59 else if (argv[i].Equals(
"-create"))
64 else if (i != argv.Length - 1)
66 Console.Error.WriteLine(
"Usage: " + usage);
70 root =
new DirectoryInfo(argv[i]);
75 Console.Error.WriteLine(
"Specify directory to index");
76 Console.Error.WriteLine(
"Usage: " + usage);
80 var start = DateTime.Now;
88 IndexDocs(writer, root, index, Operation.RemoveStale);
91 var operation = create
92 ? Operation.CompleteReindex
93 : Operation.IncrementalReindex;
94 IndexDocs(writer, root, index, operation);
96 Console.Out.WriteLine(
"Optimizing index...");
101 var end = DateTime.Now;
103 Console.Out.Write(end.Millisecond - start.Millisecond);
104 Console.Out.WriteLine(
" total milliseconds");
108 Console.Error.WriteLine(e.StackTrace);
118 private static void IndexDocs(
IndexWriter writer, DirectoryInfo file, DirectoryInfo index, Operation operation)
120 if (operation == Operation.CompleteReindex)
123 IndexDirectory(writer, null, file, operation);
130 using (var uidIter = reader.Terms(
new Term(
"uid",
"")))
132 IndexDirectory(writer, uidIter, file, operation);
134 if (operation == Operation.RemoveStale) {
140 while (uidIter.Term != null && uidIter.Term.Field ==
"uid") {
141 Console.Out.WriteLine(
"deleting " + HTMLDocument.Uid2url(uidIter.Term.Text));
150 private static void IndexDirectory(
IndexWriter writer,
TermEnum uidIter, DirectoryInfo dir, Operation operation) {
151 var entries =
Directory.GetFileSystemEntries(dir.FullName);
158 foreach (var entry
in entries) {
159 var path = Path.Combine(dir.FullName, entry);
161 IndexDirectory(writer, uidIter,
new DirectoryInfo(path), operation);
162 }
else if (File.Exists(path)) {
163 IndexFile(writer, uidIter,
new FileInfo(path), operation);
168 private static void IndexFile(
IndexWriter writer,
TermEnum uidIter, FileInfo file, Operation operation)
170 if (file.FullName.EndsWith(
".html") || file.FullName.EndsWith(
".htm") || file.FullName.EndsWith(
".txt"))
174 if (operation == Operation.IncrementalReindex ||
175 operation == Operation.RemoveStale)
178 Debug.Assert(uidIter != null,
"Expected uidIter != null for operation " + operation);
180 var uid = HTMLDocument.Uid(file);
182 while (uidIter.
Term != null && uidIter.
Term.
Field ==
"uid" && String.CompareOrdinal(uidIter.
Term.
Text, uid) < 0)
184 if (operation == Operation.RemoveStale)
186 Console.Out.WriteLine(
"deleting " + HTMLDocument.Uid2url(uidIter.
Term.
Text));
196 if (uidIter.
Term != null && uidIter.
Term.
Field ==
"uid" && String.CompareOrdinal(uidIter.
Term.
Text, uid) == 0)
202 else if (operation == Operation.IncrementalReindex)
206 var doc = HTMLDocument.Document(file);
207 Console.Out.WriteLine(
"adding " + doc.Get(
"path"));
215 Debug.Assert(uidIter == null,
"Expected uidIter == null for operation == " + operation);
217 var doc = HTMLDocument.Document(file);
218 Console.Out.WriteLine(
"adding " + doc.Get(
"path"));
224 private enum Operation {