Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
FieldTermStack.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Text;
21 
22 using Lucene.Net.Analysis;
23 using Lucene.Net.Documents;
24 using Lucene.Net.Search;
25 using Lucene.Net.Index;
26 using Lucene.Net.QueryParsers;
27 using Lucene.Net.Store;
28 
29 
30 namespace Lucene.Net.Search.Vectorhighlight
31 {
32 
33  /// <summary>
34  /// <c>FieldTermStack</c> is a stack that keeps query terms in the specified field
35  /// of the document to be highlighted.
36  /// </summary>
37  public class FieldTermStack
38  {
39  private String fieldName;
40  public LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
41 
42  public static void Main(String[] args)
43  {
44  Analyzer analyzer = new WhitespaceAnalyzer();
45  QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "f", analyzer);
46  Query query = parser.Parse("a x:b");
47  FieldQuery fieldQuery = new FieldQuery(query, true, false);
48 
49  Directory dir = new RAMDirectory();
50  IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
51  Document doc = new Document();
52  doc.Add(new Field("f", "a a a b b c a b b c d e f", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
53  doc.Add(new Field("f", "b a b a f", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
54  writer.AddDocument(doc);
55  writer.Close();
56 
57  IndexReader reader = IndexReader.Open(dir,true);
58  FieldTermStack ftl = new FieldTermStack(reader, 0, "f", fieldQuery);
59  reader.Close();
60  }
61 
62  /// <summary>
63  /// a constructor.
64  /// </summary>
65  /// <param name="reader">IndexReader of the index</param>
66  /// <param name="docId">document id to be highlighted</param>
67  /// <param name="fieldName">field of the document to be highlighted</param>
68  /// <param name="fieldQuery">FieldQuery object</param>
69 #if LUCENENET_350 //Lucene.Net specific code. See https://issues.apache.org/jira/browse/LUCENENET-350
70  public FieldTermStack(IndexReader reader, int docId, String fieldName, FieldQuery fieldQuery)
71  {
72  this.fieldName = fieldName;
73 
74  List<string> termSet = fieldQuery.getTermSet(fieldName);
75 
76  // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
77  if (termSet == null) return;
78 
79  //TermFreqVector tfv = reader.GetTermFreqVector(docId, fieldName);
80  VectorHighlightMapper tfv = new VectorHighlightMapper(termSet);
81  reader.GetTermFreqVector(docId, fieldName, tfv);
82 
83  if (tfv.Size==0) return; // just return to make null snippets
84 
85  string[] terms = tfv.GetTerms();
86  foreach (String term in terms)
87  {
88  if (!StringUtils.AnyTermMatch(termSet, term)) continue;
89  int index = tfv.IndexOf(term);
90  TermVectorOffsetInfo[] tvois = tfv.GetOffsets(index);
91  if (tvois == null) return; // just return to make null snippets
92  int[] poss = tfv.GetTermPositions(index);
93  if (poss == null) return; // just return to make null snippets
94  for (int i = 0; i < tvois.Length; i++)
95  termList.AddLast(new TermInfo(term, tvois[i].StartOffset, tvois[i].EndOffset, poss[i]));
96  }
97  // sort by position
98  //Collections.sort(termList);
99  Sort(termList);
100  }
101 #else //Original Port
102  public FieldTermStack(IndexReader reader, int docId, String fieldName, FieldQuery fieldQuery)
103  {
104  this.fieldName = fieldName;
105 
106  TermFreqVector tfv = reader.GetTermFreqVector(docId, fieldName);
107  if (tfv == null) return; // just return to make null snippets
108  TermPositionVector tpv = null;
109  try
110  {
111  tpv = (TermPositionVector)tfv;
112  }
113  catch (InvalidCastException e)
114  {
115  return; // just return to make null snippets
116  }
117 
118  List<String> termSet = fieldQuery.getTermSet(fieldName);
119  // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
120  if (termSet == null) return;
121 
122  foreach (String term in tpv.GetTerms())
123  {
124  if (!termSet.Contains(term)) continue;
125  int index = tpv.IndexOf(term);
126  TermVectorOffsetInfo[] tvois = tpv.GetOffsets(index);
127  if (tvois == null) return; // just return to make null snippets
128  int[] poss = tpv.GetTermPositions(index);
129  if (poss == null) return; // just return to make null snippets
130  for (int i = 0; i < tvois.Length; i++)
131  termList.AddLast(new TermInfo(term, tvois[i].GetStartOffset(), tvois[i].GetEndOffset(), poss[i]));
132  }
133 
134  // sort by position
135  //Collections.sort(termList);
136  Sort(termList);
137  }
138 #endif
139 
140  void Sort(LinkedList<TermInfo> linkList)
141  {
142  TermInfo[] arr = new TermInfo[linkList.Count];
143  linkList.CopyTo(arr, 0);
144  Array.Sort(arr, new Comparison<TermInfo>(PosComparer));
145 
146  linkList.Clear();
147  foreach (TermInfo t in arr) linkList.AddLast(t);
148  }
149 
150  int PosComparer(TermInfo t1,TermInfo t2)
151  {
152  return t1.Position - t2.Position;
153  }
154 
155  /// <summary>
156  ///
157  /// </summary>
158  /// <value> field name </value>
159  public string FieldName
160  {
161  get { return fieldName; }
162  }
163 
164  /// <summary>
165  ///
166  /// </summary>
167  /// <returns>the top TermInfo object of the stack</returns>
168  public TermInfo Pop()
169  {
170  if (termList.Count == 0) return null;
171 
172  LinkedListNode<TermInfo> top = termList.First;
173  termList.RemoveFirst();
174  return top.Value;
175  }
176 
177  /// <summary>
178  ///
179  /// </summary>
180  /// <param name="termInfo">the TermInfo object to be put on the top of the stack</param>
181  public void Push(TermInfo termInfo)
182  {
183  // termList.push( termInfo ); // avoid Java 1.6 feature
184  termList.AddFirst(termInfo);
185  }
186 
187  /// <summary>
188  /// to know whether the stack is empty
189  /// </summary>
190  /// <returns>true if the stack is empty, false if not</returns>
191  public bool IsEmpty()
192  {
193  return termList == null || termList.Count == 0;
194  }
195 
196  public class TermInfo : IComparable<TermInfo>
197  {
198 
199  String text;
200  int startOffset;
201  int endOffset;
202  int position;
203 
204  public TermInfo(String text, int startOffset, int endOffset, int position)
205  {
206  this.text = text;
207  this.startOffset = startOffset;
208  this.endOffset = endOffset;
209  this.position = position;
210  }
211 
212  public string Text
213  {
214  get { return text; }
215  }
216 
217  public int StartOffset
218  {
219  get { return startOffset; }
220  }
221 
222  public int EndOffset
223  {
224  get { return endOffset; }
225  }
226 
227  public int Position
228  {
229  get { return position; }
230  }
231 
232  public override string ToString()
233  {
234  StringBuilder sb = new StringBuilder();
235  sb.Append(text).Append('(').Append(startOffset).Append(',').Append(endOffset).Append(',').Append(position).Append(')');
236  return sb.ToString();
237  }
238 
239  public int CompareTo(TermInfo o)
240  {
241  return (this.position - o.position);
242  }
243  }
244  }
245 }