Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
DelimitedPayloadTokenFilter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using Lucene.Net.Analysis.Tokenattributes;
19 
20 namespace Lucene.Net.Analysis.Payloads
21 {
22  /// <summary>
23  /// Characters before the delimiter are the "token", those after are the payload.
24  /// <p/>
25  /// For example, if the delimiter is '|', then for the string "foo|bar", foo is the token
26  /// and "bar" is a payload.
27  /// <p/>
28  /// Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the
29  /// payload in an appropriate way (from characters to bytes).
30  /// <p/>
31  /// Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
32  /// </summary>
33  /// <seealso cref="PayloadEncoder"/>
35  {
36  public static readonly char DEFAULT_DELIMITER = '|';
37  internal char delimiter = DEFAULT_DELIMITER;
38  internal ITermAttribute termAtt;
39  internal IPayloadAttribute payAtt;
40  internal PayloadEncoder encoder;
41 
42  /// <summary>
43  /// Construct a token stream filtering the given input.
44  /// </summary>
46  : this(input, DEFAULT_DELIMITER, new IdentityEncoder())
47  {
48 
49  }
50 
51 
52  public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder)
53  : base(input)
54  {
55  termAtt = AddAttribute<ITermAttribute>();
56  payAtt = AddAttribute<IPayloadAttribute>();
57  this.delimiter = delimiter;
58  this.encoder = encoder;
59  }
60 
61  public override bool IncrementToken()
62  {
63  bool result = false;
64  if (input.IncrementToken())
65  {
66  char[] buffer = termAtt.TermBuffer();
67  int length = termAtt.TermLength();
68  //look for the delimiter
69  bool seen = false;
70  for (int i = 0; i < length; i++)
71  {
72  if (buffer[i] == delimiter)
73  {
74  termAtt.SetTermBuffer(buffer, 0, i);
75  payAtt.Payload = encoder.Encode(buffer, i + 1, (length - (i + 1)));
76  seen = true;
77  break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same
78  }
79  }
80  if (seen == false)
81  {
82  //no delimiter
83  payAtt.Payload = null;
84  }
85  result = true;
86  }
87  return result;
88  }
89  }
90 }