Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
MappingCharFilter.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System.Collections.Generic;
19 
20 namespace Lucene.Net.Analysis
21 {
22 
23  /// <summary> Simplistic <see cref="CharFilter" /> that applies the mappings
24  /// contained in a <see cref="NormalizeCharMap" /> to the character
25  /// stream, and correcting the resulting changes to the
26  /// offsets.
27  /// </summary>
29  {
30  private readonly NormalizeCharMap normMap;
31  private LinkedList<char> buffer;
32  private System.String replacement;
33  private int charPointer;
34  private int nextCharCounter;
35 
36  /// Default constructor that takes a <see cref="CharStream" />.
38  : base(@in)
39  {
40  this.normMap = normMap;
41  }
42 
43  /// Easy-use constructor that takes a <see cref="System.IO.TextReader" />.
44  public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in)
45  : base(CharReader.Get(@in))
46  {
47  this.normMap = normMap;
48  }
49 
50  public override int Read()
51  {
52  while (true)
53  {
54  if (replacement != null && charPointer < replacement.Length)
55  {
56  return replacement[charPointer++];
57  }
58 
59  int firstChar = NextChar();
60  if (firstChar == - 1)
61  return - 1;
62  NormalizeCharMap nm = normMap.submap != null
63  ? normMap.submap[(char) firstChar]
64  : null;
65  if (nm == null)
66  return firstChar;
67  NormalizeCharMap result = Match(nm);
68  if (result == null)
69  return firstChar;
70  replacement = result.normStr;
71  charPointer = 0;
72  if (result.diff != 0)
73  {
74  int prevCumulativeDiff = LastCumulativeDiff;
75  if (result.diff < 0)
76  {
77  for (int i = 0; i < - result.diff; i++)
78  AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
79  }
80  else
81  {
82  AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
83  }
84  }
85  }
86  }
87 
88  private int NextChar()
89  {
90  nextCharCounter++;
91  if (buffer != null && buffer.Count != 0)
92  {
93  char tempObject = buffer.First.Value;
94  buffer.RemoveFirst();
95  return (tempObject);
96  }
97  return input.Read();
98  }
99 
100  private void PushChar(int c)
101  {
102  nextCharCounter--;
103  if (buffer == null)
104  {
105  buffer = new LinkedList<char>();
106  }
107  buffer.AddFirst((char)c);
108  }
109 
110  private void PushLastChar(int c)
111  {
112  if (buffer == null)
113  {
114  buffer = new LinkedList<char>();
115  }
116  buffer.AddLast((char)c);
117  }
118 
119  private NormalizeCharMap Match(NormalizeCharMap map)
120  {
121  NormalizeCharMap result = null;
122  if (map.submap != null)
123  {
124  int chr = NextChar();
125  if (chr != - 1)
126  {
127  NormalizeCharMap subMap = map.submap[(char)chr];
128  if (subMap != null)
129  {
130  result = Match(subMap);
131  }
132  if (result == null)
133  {
134  PushChar(chr);
135  }
136  }
137  }
138  if (result == null && map.normStr != null)
139  {
140  result = map;
141  }
142  return result;
143  }
144 
145  public override int Read(System.Char[] cbuf, int off, int len)
146  {
147  var tmp = new char[len];
148  int l = input.Read(tmp, 0, len);
149  if (l != 0)
150  {
151  for (int i = 0; i < l; i++)
152  PushLastChar(tmp[i]);
153  }
154  l = 0;
155  for (int i = off; i < off + len; i++)
156  {
157  int c = Read();
158  if (c == - 1)
159  break;
160  cbuf[i] = (char) c;
161  l++;
162  }
163  return l == 0?- 1:l;
164  }
165  }
166 }