19 using System.Collections.Generic;
22 using Lucene.Net.Store;
26 using Document = Lucene.Net.Documents.Document;
27 using Field = Lucene.Net.Documents.Field;
30 namespace WorldNet.Net
60 private static readonly System.IO.StreamWriter o;
63 private static readonly System.IO.StreamWriter err;
66 public const System.String F_SYN =
"syn";
69 public const System.String F_WORD =
"word";
78 public static void Main(System.String[] args)
81 String prologFilename = null;
82 String indexDir = null;
85 prologFilename = args[0];
95 if (!(
new FileInfo(prologFilename)).Exists)
97 err.WriteLine(
"Error: cannot read Prolog file: " + prologFilename);
101 if (
Directory.Exists((
new FileInfo(indexDir)).FullName))
103 err.WriteLine(
"Error: index directory already exists: " + indexDir);
104 err.WriteLine(
"Please specify a name of a non-existent directory");
108 o.WriteLine(
"Opening Prolog file " + prologFilename);
109 var fis =
new FileStream(prologFilename, FileMode.Open, FileAccess.Read);
110 var br =
new StreamReader(
new StreamReader(fis, System.Text.Encoding.Default).BaseStream,
new StreamReader(fis, System.Text.Encoding.Default).CurrentEncoding);
114 System.Collections.IDictionary word2Nums =
new System.Collections.SortedList();
116 System.Collections.IDictionary num2Words =
new System.Collections.SortedList();
124 o.WriteLine(
"[1/2] Parsing " + prologFilename);
125 while ((line = br.ReadLine()) != null)
128 if ((++row) % mod == 0)
131 o.WriteLine(
"\t" + row +
" " + line +
" " + word2Nums.Count +
" " + num2Words.Count +
" ndecent=" + ndecent);
135 if (!line.StartsWith(
"s("))
137 err.WriteLine(
"OUCH: " + line);
142 line = line.Substring(2);
143 var comma = line.IndexOf(
',');
144 var num = line.Substring(0, comma);
145 var q1 = line.IndexOf(
'\'');
146 line = line.Substring(q1 + 1);
147 var q2 = line.IndexOf(
'\'');
148 var word = line.Substring(0, q2).ToLower().Replace(
"''",
"'");
159 var lis = (System.Collections.IList) word2Nums[word];
162 lis =
new List<String> {num};
163 word2Nums[word] = lis;
169 lis = (System.Collections.IList) num2Words[num];
172 lis =
new List<String> { word };
173 num2Words[num] = lis;
184 o.WriteLine(
"[2/2] Building index to store synonyms, " +
" map sizes are " + word2Nums.Count +
" and " + num2Words.Count);
185 Index(indexDir, word2Nums, num2Words);
194 private static bool IsDecent(String s)
197 for (var i = 0; i < len; i++)
199 if (!Char.IsLetter(s[i]))
213 private static void Index(String indexDir, System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words)
221 writer.UseCompoundFile =
true;
223 var i1 = word2Nums.Keys.GetEnumerator();
224 while (i1.MoveNext())
226 var g = (String)i1.Current;
229 var n = Index(word2Nums, num2Words, g, doc);
232 doc.Add(
new Field(F_WORD, g,
Field.Store.YES,
Field.Index.NOT_ANALYZED));
233 if ((++row % mod) == 0)
235 o.WriteLine(
"\trow=" + row +
"/" + word2Nums.Count +
" doc= " + doc);
238 writer.AddDocument(doc);
241 o.WriteLine(
"Optimizing..");
251 private static int Index(System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words, System.String g,
Document doc)
253 var keys = (System.Collections.IList) word2Nums[g];
254 var i2 = keys.GetEnumerator();
256 var already =
new System.Collections.SortedList();
259 while (i2.MoveNext())
262 ((System.Collections.IList) num2Words[i2.Current]).Cast<
object>().Where(item => already.Contains(item) ==
false))
264 already.Add(item, item);
270 var it = already.GetEnumerator();
271 while (it.MoveNext())
273 var cur = (String) it.Key;
286 private static void Usage()
288 o.WriteLine(
"\n\n" + typeof(Syns2Index) +
" <prolog file> <index dir>\n\n");