Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
FieldPhraseList.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Text;
21 
22 using Lucene.Net.Documents;
23 using Lucene.Net.Search;
24 using Lucene.Net.Index;
25 
26 using TermInfo = Lucene.Net.Search.Vectorhighlight.FieldTermStack.TermInfo;
27 using QueryPhraseMap = Lucene.Net.Search.Vectorhighlight.FieldQuery.QueryPhraseMap;
28 
29 namespace Lucene.Net.Search.Vectorhighlight
30 {
35  public class FieldPhraseList
36  {
37  public LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
38 
44  public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack, fieldQuery, Int32.MaxValue)
45  {
46  }
47 
48 
55  public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit)
56  {
57  String field = fieldTermStack.FieldName;
58 
59  LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
60  QueryPhraseMap currMap = null;
61  QueryPhraseMap nextMap = null;
62  while (!fieldTermStack.IsEmpty() && (phraseList.Count < phraseLimit) )
63  {
64 
65  phraseCandidate.Clear();
66 
67  TermInfo ti = fieldTermStack.Pop();
68  currMap = fieldQuery.GetFieldTermMap(field, ti.Text);
69 
70  // if not found, discard top TermInfo from stack, then try next element
71  if (currMap == null) continue;
72 
73  // if found, search the longest phrase
74  phraseCandidate.AddLast(ti);
75  while (true)
76  {
77  ti = fieldTermStack.Pop();
78  nextMap = null;
79  if (ti != null)
80  nextMap = currMap.GetTermMap(ti.Text);
81  if (ti == null || nextMap == null)
82  {
83  if (ti != null)
84  fieldTermStack.Push(ti);
85  if (currMap.IsValidTermOrPhrase(new List<TermInfo>(phraseCandidate)))
86  {
87  AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
88  }
89  else
90  {
91  while (phraseCandidate.Count > 1)
92  {
93  TermInfo last = phraseCandidate.Last.Value;
94  phraseCandidate.RemoveLast();
95  fieldTermStack.Push(last);
96  currMap = fieldQuery.SearchPhrase(field, new List<TermInfo>(phraseCandidate));
97  if (currMap != null)
98  {
99  AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
100  break;
101  }
102  }
103  }
104  break;
105  }
106  else
107  {
108  phraseCandidate.AddLast(ti);
109  currMap = nextMap;
110  }
111  }
112  }
113  }
114 
115  void AddIfNoOverlap(WeightedPhraseInfo wpi)
116  {
117  foreach (WeightedPhraseInfo existWpi in phraseList)
118  {
119  if (existWpi.IsOffsetOverlap(wpi)) return;
120  }
121  phraseList.AddLast(wpi);
122  }
123 
124  public class WeightedPhraseInfo
125  {
126 
127  internal String text; // unnecessary member, just exists for debugging purpose
128  internal List<Toffs> termsOffsets; // usually termsOffsets.size() == 1,
129  // but if position-gap > 1 and slop > 0 then size() could be greater than 1
130  internal float boost; // query boost
131  internal int seqnum;
132 
133  public WeightedPhraseInfo(LinkedList<TermInfo> terms, float boost): this(terms, boost, 0)
134  {
135  }
136 
137  public WeightedPhraseInfo(LinkedList<TermInfo> terms, float boost, int number)
138  {
139  this.boost = boost;
140  this.seqnum = number;
141  termsOffsets = new List<Toffs>(terms.Count);
142  TermInfo ti = terms.First.Value;
143  termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset));
144  if (terms.Count == 1)
145  {
146  text = ti.Text;
147  return;
148  }
149  StringBuilder sb = new StringBuilder();
150  sb.Append(ti.Text);
151  int pos = ti.Position;
152 
153  bool dummy = true;
154  foreach(TermInfo ti2 in terms)
155  //for (int i = 1; i < terms.Count; i++)
156  {
157  if (dummy) { dummy = false; continue; } //Skip First Item {{DIGY}}
158  ti = ti2;
159  //ti = terms.get(i);
160  sb.Append(ti.Text);
161  if (ti.Position - pos == 1)
162  {
163  Toffs to = termsOffsets[termsOffsets.Count - 1];
164  to.SetEndOffset(ti.EndOffset);
165  }
166  else
167  {
168  termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset));
169  }
170  pos = ti.Position;
171  }
172  text = sb.ToString();
173  }
174 
175  public int StartOffset
176  {
177  get { return termsOffsets[0].startOffset; }
178  }
179 
180  public int EndOffset
181  {
182  get { return termsOffsets[termsOffsets.Count - 1].endOffset; }
183  }
184 
185  public bool IsOffsetOverlap(WeightedPhraseInfo other)
186  {
187  int so = StartOffset;
188  int eo = EndOffset;
189  int oso = other.StartOffset;
190  int oeo = other.EndOffset;
191  if (so <= oso && oso < eo) return true;
192  if (so < oeo && oeo <= eo) return true;
193  if (oso <= so && so < oeo) return true;
194  if (oso < eo && eo <= oeo) return true;
195  return false;
196  }
197 
198  public override string ToString()
199  {
200  StringBuilder sb = new StringBuilder();
201 
202  sb.Append(text).Append('(').Append(boost.ToString(".0").Replace(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator,".")).Append(")(");
203  foreach (Toffs to in termsOffsets)
204  {
205  sb.Append(to);
206  }
207  sb.Append(')');
208  return sb.ToString();
209  }
210 
211  public class Toffs
212  {
213  internal int startOffset;
214  internal int endOffset;
215  public Toffs(int startOffset, int endOffset)
216  {
217  this.startOffset = startOffset;
218  this.endOffset = endOffset;
219  }
220  internal void SetEndOffset(int endOffset)
221  {
222  this.endOffset = endOffset;
223  }
224  public override string ToString()
225  {
226  StringBuilder sb = new StringBuilder();
227  sb.Append('(').Append(startOffset).Append(',').Append(endOffset).Append(')');
228  return sb.ToString();
229  }
230  }
231  }
232  }
233 }