Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
ReverseStringFilter.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.Linq;
25 using System.Text;
26 using Lucene.Net.Analysis.Tokenattributes;
27 
28 namespace Lucene.Net.Analysis.Reverse
29 {
30  /*
31  * Reverse token string, for example "country" => "yrtnuoc".
32  * <p>
33  * If <c>marker</c> is supplied, then tokens will be also prepended by
34  * that character. For example, with a marker of &#x5C;u0001, "country" =>
35  * "&#x5C;u0001yrtnuoc". This is useful when implementing efficient leading
36  * wildcards search.
37  * </p>
38  */
39  public sealed class ReverseStringFilter : TokenFilter
40  {
41 
42  private ITermAttribute termAtt;
43  private readonly char marker;
44  private const char NOMARKER = '\uFFFF';
45 
46  /*
47  * Example marker character: U+0001 (START OF HEADING)
48  */
49  public const char START_OF_HEADING_MARKER = '\u0001';
50 
51  /*
52  * Example marker character: U+001F (INFORMATION SEPARATOR ONE)
53  */
54  public const char INFORMATION_SEPARATOR_MARKER = '\u001F';
55 
56  /*
57  * Example marker character: U+EC00 (PRIVATE USE AREA: EC00)
58  */
59  public const char PUA_EC00_MARKER = '\uEC00';
60 
61  /*
62  * Example marker character: U+200F (RIGHT-TO-LEFT MARK)
63  */
64  public const char RTL_DIRECTION_MARKER = '\u200F';
65 
66  /*
67  * Create a new ReverseStringFilter that reverses all tokens in the
68  * supplied {@link TokenStream}.
69  * <p>
70  * The reversed tokens will not be marked.
71  * </p>
72  *
73  * @param in {@link TokenStream} to filter
74  */
76  : this(_in, NOMARKER)
77  {
78 
79  }
80 
81  /*
82  * Create a new ReverseStringFilter that reverses and marks all tokens in the
83  * supplied {@link TokenStream}.
84  * <p>
85  * The reversed tokens will be prepended (marked) by the <c>marker</c>
86  * character.
87  * </p>
88  *
89  * @param in {@link TokenStream} to filter
90  * @param marker A character used to mark reversed tokens
91  */
92  public ReverseStringFilter(TokenStream _in, char marker)
93  : base(_in)
94  {
95  this.marker = marker;
96  termAtt = AddAttribute<ITermAttribute>();
97  }
98 
99  public override bool IncrementToken()
100  {
101  if (input.IncrementToken())
102  {
103  int len = termAtt.TermLength();
104  if (marker != NOMARKER)
105  {
106  len++;
107  termAtt.ResizeTermBuffer(len);
108  termAtt.TermBuffer()[len - 1] = marker;
109  }
110  Reverse(termAtt.TermBuffer(), len);
111  termAtt.SetTermLength(len);
112  return true;
113  }
114  else
115  {
116  return false;
117  }
118  }
119 
120  public static String Reverse(String input)
121  {
122  char[] charInput = input.ToCharArray();
123  Reverse(charInput);
124  return new String(charInput);
125  }
126 
127  public static void Reverse(char[] buffer)
128  {
129  Reverse(buffer, buffer.Length);
130  }
131 
132  public static void Reverse(char[] buffer, int len)
133  {
134  Reverse(buffer, 0, len);
135  }
136 
137  public static void Reverse(char[] buffer, int start, int len)
138  {
139  if (len <= 1) return;
140  int num = len >> 1;
141  for (int i = start; i < (start + num); i++)
142  {
143  char c = buffer[i];
144  buffer[i] = buffer[start * 2 + len - i - 1];
145  buffer[start * 2 + len - i - 1] = c;
146  }
147  }
148  }
149 }