23 using System.Collections.Generic;
27 using Lucene.Net.Analysis;
28 using Lucene.Net.Analysis.Tokenattributes;
29 using Lucene.Net.Documents;
30 using Lucene.Net.Search;
31 using Lucene.Net.Support;
33 namespace Lucene.Net.Index.Memory
156 public partial class MemoryIndex
159 private HashMap<String, Info> fields =
new HashMap<String, Info>();
162 [NonSerialized]
private KeyValuePair<String, Info>[] sortedFields;
168 private static float docBoost = 1.0f;
170 private static long serialVersionUID = 2782195016849084649L;
172 private static bool DEBUG =
false;
196 this.stride = storeOffsets ? 3 : 1;
216 public void AddField(String fieldName, String text,
Analyzer analyzer)
218 if (fieldName == null)
219 throw new ArgumentException(
"fieldName must not be null");
221 throw new ArgumentException(
"text must not be null");
222 if (analyzer == null)
223 throw new ArgumentException(
"analyzer must not be null");
227 AddField(fieldName, stream);
242 public TokenStream CreateKeywordTokenStream<T>(ICollection<T> keywords)
245 if (keywords == null)
246 throw new ArgumentException(
"keywords must not be null");
248 return new KeywordTokenStream<T>(keywords);
261 AddField(fieldName, stream, 1.0f);
279 public void AddField(String fieldName,
TokenStream stream,
float boost)
283 if (fieldName == null)
284 throw new ArgumentException(
"fieldName must not be null");
286 throw new ArgumentException(
"token stream must not be null");
288 throw new ArgumentException(
"boost factor must be greater than 0.0");
289 if (fields[fieldName] != null)
290 throw new ArgumentException(
"field must not be added more than once");
292 var terms =
new HashMap<String, ArrayIntList>();
294 int numOverlapTokens = 0;
304 String term = termAtt.Term;
305 if (term.Length == 0)
continue;
308 int posIncr = posIncrAttribute.PositionIncrement;
313 ArrayIntList positions = terms[term];
314 if (positions == null)
317 positions =
new ArrayIntList(stride);
318 terms[term] = positions;
326 positions.Add(pos, offsetAtt.StartOffset, offsetAtt.EndOffset);
334 boost = boost*docBoost;
335 fields[fieldName] =
new Info(terms, numTokens, numOverlapTokens, boost);
339 catch (IOException e)
342 throw new SystemException(
string.Empty, e);
348 if (stream != null) stream.
Close();
350 catch (IOException e2)
352 throw new SystemException(
string.Empty, e2);
366 MemoryIndexReader reader =
new MemoryIndexReader(
this);
368 reader.SetSearcher(searcher);
387 throw new ArgumentException(
"query must not be null");
389 Searcher searcher = CreateSearcher();
392 float[] scores =
new float[1];
393 searcher.
Search(query,
new FillingCollector(scores));
394 float score = scores[0];
397 catch (IOException e)
400 throw new SystemException(
string.Empty, e);
428 public int GetMemorySize()
434 size += VM.SizeOfObject(2*PTR + INT);
435 if (sortedFields != null) size += VM.SizeOfObjectArray(sortedFields.Length);
437 size += VM.SizeOfHashMap(fields.Count);
438 foreach (var entry
in fields)
441 Info info = entry.Value;
442 size += VM.SizeOfObject(2*INT + 3*PTR);
443 if (info.SortedTerms != null) size += VM.SizeOfObjectArray(info.SortedTerms.Length);
445 int len = info.Terms.Count;
446 size += VM.SizeOfHashMap(len);
448 var iter2 = info.Terms.GetEnumerator();
453 KeyValuePair<String, ArrayIntList> e = iter2.Current;
454 size += VM.SizeOfObject(PTR + 3*INT);
456 ArrayIntList positions = e.Value;
457 size += VM.SizeOfArrayIntList(positions.Size());
463 private int NumPositions(ArrayIntList positions)
465 return positions.Size()/stride;
470 private void SortFields()
472 if (sortedFields == null) sortedFields =
Sort(fields);
478 where TKey :
class, IComparable<TKey>
480 int size = map.Count;
482 var entries = map.ToArray();
484 if (size > 1) Array.
Sort(entries, TermComparer.KeyComparer);
494 public override String ToString()
496 StringBuilder result =
new StringBuilder(256);
499 int sumPositions = 0;
502 for (
int i = 0; i < sortedFields.Length; i++)
504 KeyValuePair<String, Info> entry = sortedFields[i];
505 String fieldName = entry.Key;
506 Info info = entry.Value;
508 result.Append(fieldName +
":\n");
512 for (
int j = 0; j < info.SortedTerms.Length; j++)
514 KeyValuePair<String, ArrayIntList> e = info.SortedTerms[j];
516 ArrayIntList positions = e.Value;
517 result.Append(
"\t'" + term +
"':" + NumPositions(positions) +
":");
518 result.Append(positions.ToString(stride));
520 numPos += NumPositions(positions);
521 numChars += term.Length;
524 result.Append(
"\tterms=" + info.SortedTerms.Length);
525 result.Append(
", positions=" + numPos);
526 result.Append(
", Kchars=" + (numChars/1000.0f));
528 sumPositions += numPos;
529 sumChars += numChars;
530 sumTerms += info.SortedTerms.Length;
533 result.Append(
"\nfields=" + sortedFields.Length);
534 result.Append(
", terms=" + sumTerms);
535 result.Append(
", positions=" + sumPositions);
536 result.Append(
", Kchars=" + (sumChars/1000.0f));
537 return result.ToString();
550 private sealed
class Info
552 public static readonly IComparer<KeyValuePair<string, Info>> InfoComparer =
new TermComparer<Info>();
558 private HashMap<String, ArrayIntList> terms;
561 [NonSerialized]
private KeyValuePair<String, ArrayIntList>[] sortedTerms;
564 private int numTokens;
567 private int numOverlapTokens;
573 [NonSerialized]
public Term template;
575 private static long serialVersionUID = 2882195016849084649L;
577 public Info(HashMap<String, ArrayIntList> terms,
int numTokens,
int numOverlapTokens,
float boost)
580 this.numTokens = numTokens;
581 this.NumOverlapTokens = numOverlapTokens;
585 public HashMap<string, ArrayIntList> Terms
587 get {
return terms; }
592 get {
return numTokens; }
595 public int NumOverlapTokens
597 get {
return numOverlapTokens; }
598 set { numOverlapTokens = value; }
603 get {
return boost; }
606 public KeyValuePair<string, ArrayIntList>[] SortedTerms
608 get {
return sortedTerms; }
620 public void SortTerms()
622 if (SortedTerms == null) sortedTerms =
Sort(Terms);
627 public ArrayIntList GetPositions(String term)
634 public ArrayIntList GetPositions(
int pos)
636 return SortedTerms[pos].Value;
650 private sealed
class ArrayIntList
653 private int[] elements;
654 private int size = 0;
656 private static long serialVersionUID = 2282195016849084649L;
658 private ArrayIntList()
664 public ArrayIntList(
int initialCapacity)
666 elements =
new int[initialCapacity];
669 public void Add(
int elem)
671 if (size == elements.Length) EnsureCapacity(size + 1);
672 elements[size++] = elem;
675 public void Add(
int pos,
int start,
int end)
677 if (size + 3 > elements.Length) EnsureCapacity(size + 3);
678 elements[size] = pos;
679 elements[size + 1] = start;
680 elements[size + 2] = end;
684 public int Get(
int index)
686 if (index >= size) ThrowIndex(index);
687 return elements[index];
695 public int[] ToArray(
int stride)
697 int[] arr =
new int[Size()/stride];
700 Array.Copy(elements, 0, arr, 0, size);
704 for (
int i = 0, j = 0; j < size; i++, j += stride) arr[i] = elements[j];
709 private void EnsureCapacity(
int minCapacity)
711 int newCapacity = Math.Max(minCapacity, (elements.Length*3)/2 + 1);
712 int[] newElements =
new int[newCapacity];
713 Array.Copy(elements, 0, newElements, 0, size);
714 elements = newElements;
717 private void ThrowIndex(
int index)
719 throw new IndexOutOfRangeException(
"index: " + index
720 +
", size: " + size);
725 public string ToString(
int stride)
727 int s = Size()/stride;
728 int len = Math.Min(10, s);
729 StringBuilder buf =
new StringBuilder(4*len);
731 for (
int i = 0; i < len; i++)
733 buf.Append(Get(i*stride));
734 if (i < len - 1) buf.Append(
", ");
736 if (len != s) buf.Append(
", ...");
738 return buf.ToString();
746 private static readonly
Term MATCH_ALL_TERM =
new Term(
"");
753 private sealed
partial class MemoryIndexReader :
IndexReader
755 private readonly MemoryIndex _index;
759 internal MemoryIndexReader(MemoryIndex index)
764 private Info GetInfo(String fieldName)
766 return _index.fields[fieldName];
769 private Info GetInfo(
int pos)
771 return _index.sortedFields[pos].Value;
774 public override int DocFreq(
Term term)
776 Info info = GetInfo(term.Field);
778 if (info != null) freq = info.GetPositions(term.Text) != null ? 1 : 0;
779 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.docFreq: " + term +
", freq:" + freq);
785 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.terms()");
786 return Terms(MATCH_ALL_TERM);
791 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.terms: " + term);
797 if (_index.sortedFields.Length == 1 && _index.sortedFields[0].Key == term.Field)
803 j = Array.BinarySearch(_index.sortedFields,
new KeyValuePair<string, Info>(term.Field, null), Info.InfoComparer);
811 if (j < _index.sortedFields.Length) GetInfo(j).SortTerms();
816 Info info = GetInfo(j);
818 i = Array.BinarySearch(info.SortedTerms,
new KeyValuePair<string, ArrayIntList>(term.Text, null), Info.ArrayIntListComparer);
823 if (i >= info.SortedTerms.Length)
828 if (j < _index.sortedFields.Length) GetInfo(j).SortTerms();
835 return new MemoryTermEnum(_index,
this, ix, jx);
840 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.termPositions");
842 return new MemoryTermPositions(_index,
this);
848 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.termDocs");
852 public override ITermFreqVector[] GetTermFreqVectors(
int docNumber)
854 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.getTermFreqVectors");
856 return _index.fields.Keys.Select(k => GetTermFreqVector(docNumber, k)).ToArray();
859 public override void GetTermFreqVector(
int docNumber, TermVectorMapper mapper)
861 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.getTermFreqVectors");
864 foreach (String fieldName
in _index.fields.Keys)
866 GetTermFreqVector(docNumber, fieldName, mapper);
870 public override void GetTermFreqVector(
int docNumber, String field, TermVectorMapper mapper)
872 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.getTermFreqVector");
873 Info info = GetInfo(field);
879 mapper.SetExpectations(field, info.SortedTerms.Length, _index.stride != 1,
true);
880 for (
int i = info.SortedTerms.Length; --i >= 0;)
883 ArrayIntList positions = info.SortedTerms[i].Value;
884 int size = positions.Size();
885 var offsets =
new TermVectorOffsetInfo[size/_index.stride];
887 for (
int k = 0, j = 1; j < size; k++, j += _index.stride)
889 int start = positions.Get(j);
890 int end = positions.Get(j + 1);
891 offsets[k] =
new TermVectorOffsetInfo(start, end);
893 mapper.Map(info.SortedTerms[i].Key, _index.NumPositions(info.SortedTerms[i].Value), offsets,
894 (info.SortedTerms[i].Value).ToArray(_index.stride));
898 public override ITermFreqVector GetTermFreqVector(
int docNumber, String fieldName)
900 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.getTermFreqVector");
901 Info info = GetInfo(fieldName);
902 if (info == null)
return null;
905 return new MemoryTermPositionVector(_index, info, fieldName);
910 if (searcher != null)
return searcher.
Similarity;
914 internal void SetSearcher(
Searcher searcher)
916 this.searcher = searcher;
920 private byte[] cachedNorms;
921 private String cachedFieldName;
924 public override byte[] Norms(String fieldName)
926 byte[] norms = cachedNorms;
928 if (fieldName != cachedFieldName || sim != cachedSimilarity)
931 Info info = GetInfo(fieldName);
932 int numTokens = info != null ? info.NumTokens : 0;
933 int numOverlapTokens = info != null ? info.NumOverlapTokens : 0;
934 float boost = info != null ? info.Boost : 1.0f;
938 norms =
new byte[] {norm};
942 cachedFieldName = fieldName;
943 cachedSimilarity = sim;
945 System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.norms: " + fieldName +
":" + n +
":" +
946 norm +
":" + numTokens);
951 public override void Norms(String fieldName, byte[] bytes,
int offset)
953 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.norms*: " + fieldName);
954 byte[] norms = Norms(fieldName);
955 Buffer.BlockCopy(norms, 0, bytes, offset, norms.Length);
958 protected override void DoSetNorm(
int doc, String fieldName, byte value)
960 throw new NotSupportedException();
963 public override int NumDocs()
965 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.numDocs");
966 return _index.fields.Count > 0 ? 1 : 0;
969 public override int MaxDoc
973 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.maxDoc");
980 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.document");
987 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.document");
991 public override bool IsDeleted(
int n)
993 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.isDeleted");
997 public override bool HasDeletions
1001 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.hasDeletions");
1006 protected override void DoDelete(
int docNum)
1008 throw new NotSupportedException();
1011 protected override void DoUndeleteAll()
1013 throw new NotSupportedException();
1016 protected override void DoCommit(IDictionary<String, String> commitUserData)
1018 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.doCommit");
1022 protected override void DoClose()
1024 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.doClose");
1028 public override ICollection<String> GetFieldNames(
FieldOption fieldOption)
1030 if (DEBUG) System.Diagnostics.Debug.WriteLine(
"MemoryIndexReader.getFieldNamesOption");
1033 if (fieldOption ==
FieldOption.INDEXED_NO_TERMVECTOR)
1035 if (fieldOption ==
FieldOption.TERMVECTOR_WITH_OFFSET && _index.stride == 1)
1037 if (fieldOption ==
FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && _index.stride == 1)
1040 return _index.fields.Keys.AsReadOnly();
1048 private static class VM
1051 public static readonly
int PTR = Is64BitVM() ? 8 : 4;
1054 public static readonly
int BOOLEAN = 1;
1055 public static readonly
int BYTE = 1;
1056 public static readonly
int CHAR = 2;
1057 public static readonly
int SHORT = 2;
1058 public static readonly
int INT = 4;
1059 public static readonly
int LONG = 8;
1060 public static readonly
int FLOAT = 4;
1061 public static readonly
int DOUBLE = 8;
1063 private static readonly
int LOG_PTR = (int) Math.Round(Log2(PTR));
1069 private static readonly
int OBJECT_HEADER = 2*PTR;
1076 private static int SizeOf(
int n)
1078 return (((n - 1) >> LOG_PTR) + 1) << LOG_PTR;
1081 public static int SizeOfObject(
int n)
1083 return SizeOf(OBJECT_HEADER + n);
1086 public static int SizeOfObjectArray(
int len)
1088 return SizeOfObject(INT + PTR*len);
1091 public static int SizeOfCharArray(
int len)
1093 return SizeOfObject(INT + CHAR*len);
1096 public static int SizeOfIntArray(
int len)
1098 return SizeOfObject(INT + INT*len);
1101 public static int SizeOfString(
int len)
1103 return SizeOfObject(3*INT + PTR) + SizeOfCharArray(len);
1106 public static int SizeOfHashMap(
int len)
1108 return SizeOfObject(4*PTR + 4*INT) + SizeOfObjectArray(len)
1109 + len*SizeOfObject(3*PTR + INT);
1113 public static int SizeOfArrayList(
int len)
1115 return SizeOfObject(PTR + 2*INT) + SizeOfObjectArray(len);
1118 public static int SizeOfArrayIntList(
int len)
1120 return SizeOfObject(PTR + INT) + SizeOfIntArray(len);
1123 private static bool Is64BitVM()
1125 return IntPtr.Size == 8;
1130 private static double Log2(
double value)
1132 return Math.Log(value, 2);