Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
PrefixAwareTokenStream.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using Lucene.Net.Analysis.Tokenattributes;
19 using Lucene.Net.Index;
20 
21 namespace Lucene.Net.Analysis.Miscellaneous
22 {
23  /// <summary>
24  /// Joins two token streams and leaves the last token of the first stream available
25  /// to be used when updating the token values in the second stream based on that token.
26  ///
27  /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
28  /// <p/>
29  /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
30  /// the ones located in Lucene.Net.Analysis.TokenAttributes.
31  /// </summary>
33  {
34  private readonly IFlagsAttribute _flagsAtt;
35  private readonly IOffsetAttribute _offsetAtt;
36  private readonly IFlagsAttribute _pFlagsAtt;
37 
38  private readonly IOffsetAttribute _pOffsetAtt;
39  private readonly IPayloadAttribute _pPayloadAtt;
40  private readonly IPositionIncrementAttribute _pPosIncrAtt;
41  private readonly ITermAttribute _pTermAtt;
42  private readonly ITypeAttribute _pTypeAtt;
43  private readonly IPayloadAttribute _payloadAtt;
44  private readonly IPositionIncrementAttribute _posIncrAtt;
45 
46  private readonly Token _previousPrefixToken = new Token();
47  private readonly Token _reusableToken = new Token();
48  private readonly ITermAttribute _termAtt;
49  private readonly ITypeAttribute _typeAtt;
50 
51  private bool _prefixExhausted;
52 
53  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
54  {
55  Suffix = suffix;
56  Prefix = prefix;
57  _prefixExhausted = false;
58 
59  // ReSharper disable DoNotCallOverridableMethodsInConstructor
60  _termAtt = AddAttribute<ITermAttribute>();
61  _posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
62  _payloadAtt = AddAttribute<IPayloadAttribute>();
63  _offsetAtt = AddAttribute<IOffsetAttribute>();
64  _typeAtt = AddAttribute<ITypeAttribute>();
65  _flagsAtt = AddAttribute<IFlagsAttribute>();
66  // ReSharper restore DoNotCallOverridableMethodsInConstructor
67 
68  _pTermAtt = prefix.AddAttribute<ITermAttribute>();
69  _pPosIncrAtt = prefix.AddAttribute<IPositionIncrementAttribute>();
70  _pPayloadAtt = prefix.AddAttribute<IPayloadAttribute>();
71  _pOffsetAtt = prefix.AddAttribute<IOffsetAttribute>();
72  _pTypeAtt = prefix.AddAttribute<ITypeAttribute>();
73  _pFlagsAtt = prefix.AddAttribute<IFlagsAttribute>();
74  }
75 
76  public TokenStream Prefix { get; set; }
77 
78  public TokenStream Suffix { get; set; }
79 
80  public override sealed bool IncrementToken()
81  {
82  if (!_prefixExhausted)
83  {
84  Token nextToken = GetNextPrefixInputToken(_reusableToken);
85  if (nextToken == null)
86  {
87  _prefixExhausted = true;
88  }
89  else
90  {
91  _previousPrefixToken.Reinit(nextToken);
92  // Make it a deep copy
93  Payload p = _previousPrefixToken.Payload;
94  if (p != null)
95  {
96  _previousPrefixToken.Payload = (Payload) p.Clone();
97  }
98  SetCurrentToken(nextToken);
99  return true;
100  }
101  }
102 
103  Token nextSuffixToken = GetNextSuffixInputToken(_reusableToken);
104  if (nextSuffixToken == null)
105  {
106  return false;
107  }
108 
109  nextSuffixToken = UpdateSuffixToken(nextSuffixToken, _previousPrefixToken);
110  SetCurrentToken(nextSuffixToken);
111  return true;
112  }
113 
114  private void SetCurrentToken(Token token)
115  {
116  if (token == null) return;
117  ClearAttributes();
118  _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
119  _posIncrAtt.PositionIncrement = token.PositionIncrement;
120  _flagsAtt.Flags =token.Flags;
121  _offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
122  _typeAtt.Type = token.Type;
123  _payloadAtt.Payload = token.Payload;
124  }
125 
126  private Token GetNextPrefixInputToken(Token token)
127  {
128  if (!Prefix.IncrementToken()) return null;
129  token.SetTermBuffer(_pTermAtt.TermBuffer(), 0, _pTermAtt.TermLength());
130  token.PositionIncrement = _pPosIncrAtt.PositionIncrement;
131  token.Flags = _pFlagsAtt.Flags;
132  token.SetOffset(_pOffsetAtt.StartOffset, _pOffsetAtt.EndOffset);
133  token.Type = _pTypeAtt.Type;
134  token.Payload = _pPayloadAtt.Payload;
135  return token;
136  }
137 
138  private Token GetNextSuffixInputToken(Token token)
139  {
140  if (!Suffix.IncrementToken()) return null;
141  token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
142  token.PositionIncrement = _posIncrAtt.PositionIncrement;
143  token.Flags = _flagsAtt.Flags;
144  token.SetOffset(_offsetAtt.StartOffset, _offsetAtt.EndOffset);
145  token.Type = _typeAtt.Type;
146  token.Payload = _payloadAtt.Payload;
147  return token;
148  }
149 
150  /// <summary>
151  /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
152  /// </summary>
153  /// <param name="suffixToken">a token from the suffix stream</param>
154  /// <param name="lastPrefixToken">the last token from the prefix stream</param>
155  /// <returns>consumer token</returns>
156  public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
157  {
158  suffixToken.StartOffset = lastPrefixToken.EndOffset + suffixToken.StartOffset;
159  suffixToken.EndOffset = lastPrefixToken.EndOffset + suffixToken.EndOffset;
160  return suffixToken;
161  }
162 
163  protected override void Dispose(bool disposing)
164  {
165  Prefix.Dispose();
166  Suffix.Dispose();
167  }
168 
169  public override void Reset()
170  {
171  base.Reset();
172 
173  if (Prefix != null)
174  {
175  _prefixExhausted = false;
176  Prefix.Reset();
177  }
178 
179  if (Suffix != null)
180  Suffix.Reset();
181  }
182  }
183 }