Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
PayloadSpanUtil.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Linq;
21 using IndexReader = Lucene.Net.Index.IndexReader;
22 using Term = Lucene.Net.Index.Term;
23 using BooleanClause = Lucene.Net.Search.BooleanClause;
24 using BooleanQuery = Lucene.Net.Search.BooleanQuery;
25 using DisjunctionMaxQuery = Lucene.Net.Search.DisjunctionMaxQuery;
26 using FilteredQuery = Lucene.Net.Search.FilteredQuery;
27 using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery;
28 using PhraseQuery = Lucene.Net.Search.PhraseQuery;
29 using Query = Lucene.Net.Search.Query;
30 using TermQuery = Lucene.Net.Search.TermQuery;
31 using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
32 using SpanOrQuery = Lucene.Net.Search.Spans.SpanOrQuery;
33 using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
34 using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
35 
36 namespace Lucene.Net.Search.Payloads
37 {
38 
39  /// <summary> Experimental class to get set of payloads for most standard Lucene queries.
40  /// Operates like Highlighter - IndexReader should only contain doc of interest,
41  /// best to use MemoryIndex.
42  ///
43  /// <p/>
44  /// <font color="#FF0000">
45  /// WARNING: The status of the <b>Payloads</b> feature is experimental.
46  /// The APIs introduced here might change in the future and will not be
47  /// supported anymore in such a case.</font>
48  ///
49  /// </summary>
50  public class PayloadSpanUtil
51  {
52  private IndexReader reader;
53 
54  /// <param name="reader">that contains doc with payloads to extract
55  /// </param>
57  {
58  this.reader = reader;
59  }
60 
61  /// <summary> Query should be rewritten for wild/fuzzy support.
62  ///
63  /// </summary>
64  /// <param name="query">
65  /// </param>
66  /// <returns> payloads Collection
67  /// </returns>
68  /// <throws> IOException </throws>
69  public virtual ICollection<byte[]> GetPayloadsForQuery(Query query)
70  {
71  ICollection<byte[]> payloads = new List<byte[]>();
72  QueryToSpanQuery(query, payloads);
73  return payloads;
74  }
75 
76  private void QueryToSpanQuery(Query query, ICollection<byte[]> payloads)
77  {
78  if (query is BooleanQuery)
79  {
80  BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses();
81 
82  for (int i = 0; i < queryClauses.Length; i++)
83  {
84  if (!queryClauses[i].IsProhibited)
85  {
86  QueryToSpanQuery(queryClauses[i].Query, payloads);
87  }
88  }
89  }
90  else if (query is PhraseQuery)
91  {
92  Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms();
93  SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
94  for (int i = 0; i < phraseQueryTerms.Length; i++)
95  {
96  clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
97  }
98 
99  int slop = ((PhraseQuery) query).Slop;
100  bool inorder = false;
101 
102  if (slop == 0)
103  {
104  inorder = true;
105  }
106 
107  SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
108  sp.Boost = query.Boost;
109  GetPayloads(payloads, sp);
110  }
111  else if (query is TermQuery)
112  {
113  SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).Term);
114  stq.Boost = query.Boost;
115  GetPayloads(payloads, stq);
116  }
117  else if (query is SpanQuery)
118  {
119  GetPayloads(payloads, (SpanQuery) query);
120  }
121  else if (query is FilteredQuery)
122  {
123  QueryToSpanQuery(((FilteredQuery) query).Query, payloads);
124  }
125  else if (query is DisjunctionMaxQuery)
126  {
127 
128  for (IEnumerator<Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext(); )
129  {
130  QueryToSpanQuery(iterator.Current, payloads);
131  }
132  }
133  else if (query is MultiPhraseQuery)
134  {
135  MultiPhraseQuery mpq = (MultiPhraseQuery) query;
136  System.Collections.Generic.IList<Term[]> termArrays = mpq.GetTermArrays();
137  int[] positions = mpq.GetPositions();
138  if (positions.Length > 0)
139  {
140 
141  int maxPosition = positions[positions.Length - 1];
142  for (int i = 0; i < positions.Length - 1; ++i)
143  {
144  if (positions[i] > maxPosition)
145  {
146  maxPosition = positions[i];
147  }
148  }
149 
150  IList<Query>[] disjunctLists = new IList<Query>[maxPosition + 1];
151  int distinctPositions = 0;
152 
153  for (int i = 0; i < termArrays.Count; ++i)
154  {
155  Term[] termArray = termArrays[i];
156  IList<Query> disjuncts = disjunctLists[positions[i]];
157  if (disjuncts == null)
158  {
159  disjuncts = (disjunctLists[positions[i]] = new List<Query>(termArray.Length));
160  ++distinctPositions;
161  }
162  foreach(Term term in termArray)
163  {
164  disjuncts.Add(new SpanTermQuery(term));
165  }
166  }
167 
168  int positionGaps = 0;
169  int position = 0;
170  SpanQuery[] clauses = new SpanQuery[distinctPositions];
171  for (int i = 0; i < disjunctLists.Length; ++i)
172  {
173  IList<Query> disjuncts = disjunctLists[i];
174  if (disjuncts != null)
175  {
176  clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray()));
177  }
178  else
179  {
180  ++positionGaps;
181  }
182  }
183 
184  int slop = mpq.Slop;
185  bool inorder = (slop == 0);
186 
187  SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
188  sp.Boost = query.Boost;
189  GetPayloads(payloads, sp);
190  }
191  }
192  }
193 
194  private void GetPayloads(ICollection<byte[]> payloads, SpanQuery query)
195  {
196  Spans.Spans spans = query.GetSpans(reader);
197 
198  while (spans.Next() == true)
199  {
200  if (spans.IsPayloadAvailable())
201  {
202  ICollection<byte[]> payload = spans.GetPayload();
203  foreach (byte[] bytes in payload)
204  {
205  payloads.Add(bytes);
206  }
207  }
208  }
209  }
210  }
211 }