Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
BaseFragmentsBuilder.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Text;
21 
22 using Lucene.Net.Documents;
23 using Lucene.Net.Search;
24 using Lucene.Net.Index;
25 
26 using WeightedFragInfo = Lucene.Net.Search.Vectorhighlight.FieldFragList.WeightedFragInfo;
27 using SubInfo = Lucene.Net.Search.Vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
28 using Toffs = Lucene.Net.Search.Vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
29 
30 namespace Lucene.Net.Search.Vectorhighlight
31 {
32  public abstract class BaseFragmentsBuilder : FragmentsBuilder
33  {
34  protected String[] preTags, postTags;
35  public static String[] COLORED_PRE_TAGS = {
36  "<b style=\"background:yellow\">", "<b style=\"background:lawngreen\">", "<b style=\"background:aquamarine\">",
37  "<b style=\"background:magenta\">", "<b style=\"background:palegreen\">", "<b style=\"background:coral\">",
38  "<b style=\"background:wheat\">", "<b style=\"background:khaki\">", "<b style=\"background:lime\">",
39  "<b style=\"background:deepskyblue\">", "<b style=\"background:deeppink\">", "<b style=\"background:salmon\">",
40  "<b style=\"background:peachpuff\">", "<b style=\"background:violet\">", "<b style=\"background:mediumpurple\">",
41  "<b style=\"background:palegoldenrod\">", "<b style=\"background:darkkhaki\">", "<b style=\"background:springgreen\">",
42  "<b style=\"background:turquoise\">", "<b style=\"background:powderblue\">"
43  };
44 
45  public static String[] COLORED_POST_TAGS = { "</b>" };
46 
48  : this(new String[] { "<b>" }, new String[] { "</b>" })
49  {
50 
51  }
52 
53  protected BaseFragmentsBuilder(String[] preTags, String[] postTags)
54  {
55  this.preTags = preTags;
56  this.postTags = postTags;
57  }
58 
59  static Object CheckTagsArgument(Object tags)
60  {
61  if (tags is String) return tags;
62  else if (tags is String[]) return tags;
63  throw new ArgumentException("type of preTags/postTags must be a String or String[]");
64  }
65 
66  public abstract List<WeightedFragInfo> GetWeightedFragInfoList(List<WeightedFragInfo> src);
67 
68  public virtual String CreateFragment(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList)
69  {
70  String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1);
71  if (fragments == null || fragments.Length == 0) return null;
72  return fragments[0];
73  }
74 
75  public virtual String[] CreateFragments(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments)
76  {
77  if (maxNumFragments < 0)
78  throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
79 
80  List<WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos);
81 
82  List<String> fragments = new List<String>(maxNumFragments);
83  Field[] values = GetFields(reader, docId, fieldName);
84  if (values.Length == 0) return null;
85  StringBuilder buffer = new StringBuilder();
86  int[] nextValueIndex = { 0 };
87  for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++)
88  {
89  WeightedFragInfo fragInfo = fragInfos[n];
90  fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo));
91  }
92  return fragments.ToArray();
93  }
94 
95  [Obsolete]
96  protected virtual String[] GetFieldValues(IndexReader reader, int docId, String fieldName)
97  {
98  Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName }));
99  return doc.GetValues(fieldName); // according to Document class javadoc, this never returns null
100  }
101 
102  protected virtual Field[] GetFields(IndexReader reader, int docId, String fieldName)
103  {
104  // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
105  Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName }));
106  return doc.GetFields(fieldName); // according to Document class javadoc, this never returns null
107  }
108 
109  [Obsolete]
110  protected virtual String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo)
111  {
112  int s = fragInfo.startOffset;
113  return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s);
114  }
115 
116  protected virtual String MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo)
117  {
118  int s = fragInfo.startOffset;
119  return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s);
120  }
121 
122  private String MakeFragment(WeightedFragInfo fragInfo, String src, int s)
123  {
124  StringBuilder fragment = new StringBuilder();
125  int srcIndex = 0;
126  foreach (SubInfo subInfo in fragInfo.subInfos)
127  {
128  foreach (Toffs to in subInfo.termsOffsets)
129  {
130  fragment.Append(src.Substring(srcIndex, to.startOffset - s - srcIndex)).Append(GetPreTag(subInfo.seqnum))
131  .Append(src.Substring(to.startOffset - s, to.endOffset - s - (to.startOffset - s))).Append(GetPostTag(subInfo.seqnum));
132  srcIndex = to.endOffset - s;
133  }
134  }
135  fragment.Append(src.Substring(srcIndex));
136  return fragment.ToString();
137  }
138 
139  /*
140  [Obsolete]
141  protected String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo)
142  {
143  StringBuilder fragment = new StringBuilder();
144  int s = fragInfo.startOffset;
145  String src = GetFragmentSource(buffer, index, values, s, fragInfo.endOffset);
146  int srcIndex = 0;
147  foreach (SubInfo subInfo in fragInfo.subInfos)
148  {
149  foreach (Toffs to in subInfo.termsOffsets)
150  {
151  fragment.Append(src.Substring(srcIndex, to.startOffset - s - srcIndex)).Append(GetPreTag(subInfo.seqnum))
152  .Append(src.Substring(to.startOffset - s, to.endOffset - s - (to.startOffset - s))).Append(GetPostTag(subInfo.seqnum));
153  srcIndex = to.endOffset - s;
154  }
155  }
156  fragment.Append(src.Substring(srcIndex));
157  return fragment.ToString();
158  }
159  */
160 
161 
162  [Obsolete]
163  protected virtual String GetFragmentSource(StringBuilder buffer, int[] index, String[] values, int startOffset, int endOffset)
164  {
165  while (buffer.Length < endOffset && index[0] < values.Length)
166  {
167  buffer.Append(values[index[0]]);
168  if (values[index[0]].Length > 0 && index[0] + 1 < values.Length)
169  buffer.Append(' ');
170  index[0]++;
171  }
172  int eo = buffer.Length < endOffset ? buffer.Length : endOffset;
173  return buffer.ToString().Substring(startOffset, eo - startOffset);
174  }
175 
176  protected virtual String GetFragmentSource(StringBuilder buffer, int[] index, Field[] values, int startOffset, int endOffset)
177  {
178  while (buffer.Length < endOffset && index[0] < values.Length)
179  {
180  buffer.Append(values[index[0]].StringValue);
181  if (values[index[0]].IsTokenized && values[index[0]].StringValue.Length > 0 && index[0] + 1 < values.Length)
182  buffer.Append(' ');
183  index[0]++;
184  }
185  int eo = buffer.Length < endOffset ? buffer.Length : endOffset;
186  return buffer.ToString().Substring(startOffset, eo - startOffset);
187  }
188 
189  protected virtual String GetPreTag(int num)
190  {
191  int n = num % preTags.Length;
192  return preTags[n];
193  }
194 
195  protected virtual String GetPostTag(int num)
196  {
197  int n = num % postTags.Length;
198  return postTags[n];
199  }
200  }
201 }