19 using System.Collections.Generic;
21 namespace SpellChecker.Net.Search.Spell
25 using Lucene.Net.Search;
26 using Lucene.Net.Store;
30 using Document = Lucene.Net.Documents.Document;
31 using Field = Lucene.Net.Documents.Field;
35 using Query = Lucene.Net.Search.Query;
36 using Term = Lucene.Net.Index.Term;
65 public const System.String F_WORD =
"word";
66 private readonly
Term F_WORD_TERM =
new Term(F_WORD);
72 private const float bStart = 2.0f;
73 private const float bEnd = 1.0f;
81 private static readonly System.Object searcherLock =
new System.Object();
89 private static readonly System.Object modifyCurrentIndexLock =
new System.Object();
90 private volatile bool closed =
false;
92 internal float minScore = 0.5f;
104 this.SetSpellIndex(spellIndex);
105 this.setStringDistance(sd);
126 virtual public void SetSpellIndex(
Directory spellIndexDir)
130 lock (modifyCurrentIndexLock)
135 var writer =
new IndexWriter(spellIndexDir, null,
true,
139 SwapSearcher(spellIndexDir);
169 virtual public void SetAccuracy(
float minScore)
171 this.minScore = minScore;
182 public virtual System.String[] SuggestSimilar(System.String word,
int num_sug)
184 return this.SuggestSimilar(word, num_sug, null, null,
false);
206 public virtual System.String[] SuggestSimilar(System.String word,
int numSug,
IndexReader ir, System.String field,
bool morePopular)
211 float min = this.minScore;
212 int lengthWord = word.Length;
214 int freq = (ir != null && field != null) ? ir.DocFreq(
new Term(field, word)) : 0;
215 int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
217 if (!morePopular && freq > 0)
219 return new String[] { word };
226 var alreadySeen =
new HashSet<string>();
227 for (var ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
231 grams = FormGrams(word, ng);
233 if (grams.Length == 0)
240 Add(query,
"start" + ng, grams[0], bStart);
245 Add(query,
"end" + ng, grams[grams.Length - 1], bEnd);
248 for (
int i = 0; i < grams.Length; i++)
250 Add(query, key, grams[i]);
254 int maxHits = 10 * numSug;
262 int stop = Math.Min(hits.Length, maxHits);
264 for (
int i = 0; i < stop; i++)
266 sugWord.
termString = indexSearcher.
Doc(hits[i].Doc).Get(F_WORD);
276 if (sugWord.
score < min)
281 if (ir != null && field != null)
285 if ((morePopular && goalFreq > sugWord.
freq) || sugWord.
freq < 1)
291 if (alreadySeen.Add(sugWord.
termString) ==
false)
294 sugQueue.InsertWithOverflow(sugWord);
295 if (sugQueue.Size() == numSug)
304 String[] list =
new String[sugQueue.Size()];
305 for (
int i = sugQueue.Size() - 1; i >= 0; i--)
307 list[i] = ((
SuggestWord)sugQueue.Pop()).termString;
314 ReleaseSearcher(indexSearcher);
321 private static void Add(
BooleanQuery q, System.String k, System.String v,
float boost)
330 private static void Add(
BooleanQuery q, System.String k, System.String v)
343 private static System.String[] FormGrams(System.String text,
int ng)
345 int len = text.Length;
346 System.String[] res =
new System.String[len - ng + 1];
347 for (
int i = 0; i < len - ng + 1; i++)
349 res[i] = text.Substring(i, (i + ng) - (i));
357 public virtual void ClearIndex()
359 lock (modifyCurrentIndexLock)
376 public virtual bool Exist(System.String word)
382 return indexSearcher.
DocFreq(F_WORD_TERM.CreateTerm(word)) > 0;
386 ReleaseSearcher(indexSearcher);
397 public virtual void IndexDictionary(
IDictionary dict,
int mergeFactor,
int ramMB)
399 lock (modifyCurrentIndexLock)
404 writer.MergeFactor = mergeFactor;
405 writer.SetMaxBufferedDocs(ramMB);
408 while (iter.MoveNext())
410 System.String word = (System.String)iter.Current;
412 int len = word.Length;
418 if (this.Exist(word))
425 Document doc = CreateDocument(word, GetMin(len), GetMax(len));
426 writer.AddDocument(doc);
443 IndexDictionary(dict, 300, 10);
446 private int GetMin(
int l)
460 private int GetMax(
int l)
474 private static Document CreateDocument(System.String text,
int ng1,
int ng2)
477 doc.Add(
new Field(F_WORD, text,
Field.Store.YES,
Field.Index.NOT_ANALYZED));
478 AddGram(text, doc, ng1, ng2);
483 private static void AddGram(System.String text,
Document doc,
int ng1,
int ng2)
485 int len = text.Length;
486 for (
int ng = ng1; ng <= ng2; ng++)
488 System.String key =
"gram" + ng;
489 System.String end = null;
490 for (
int i = 0; i < len - ng + 1; i++)
492 System.String gram = text.Substring(i, (i + ng) - (i));
493 doc.Add(
new Field(key, gram,
Field.Store.NO,
Field.Index.NOT_ANALYZED));
496 doc.Add(
new Field(
"start" + ng, gram,
Field.Store.NO,
Field.Index.NOT_ANALYZED));
503 doc.Add(
new Field(
"end" + ng, end,
Field.Store.NO,
Field.Index.NOT_ANALYZED));
513 searcher.IndexReader.IncRef();
522 aSearcher.IndexReader.DecRef();
525 private void EnsureOpen()
539 if (searcher != null)
559 indexSearcher.
Close();
562 if (searcher != null)
567 searcher = indexSearcher;
568 this.spellindex = dir;
599 public void Dispose()
602 GC.SuppressFinalize(
this);
605 protected void Dispose(
bool disposeOfManagedResources)
607 if (disposeOfManagedResources)