Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
SnowballProgram.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 using System;
18 using System.Text;
19 
20 namespace SF.Snowball
21 {
22  /// <summary>
23  /// This is the rev 500 of the snowball SVN trunk,
24  /// but modified:
25  /// made abstract and introduced abstract method stem to avoid expensive reflection in filter class
26  /// </summary>
27  public abstract class SnowballProgram
28  {
29  protected internal SnowballProgram()
30  {
31  current = new System.Text.StringBuilder();
32  SetCurrent("");
33  }
34 
35  public abstract bool Stem();
36 
37  /// <summary> Set the current string.</summary>
38  public virtual void SetCurrent(System.String value)
39  {
40  //// current.Replace(current.ToString(0, current.Length - 0), value_Renamed, 0, current.Length - 0);
41  current.Remove(0, current.Length);
42  current.Append(value);
43  cursor = 0;
44  limit = current.Length;
45  limit_backward = 0;
46  bra = cursor;
47  ket = limit;
48  }
49 
50  /// <summary> Get the current string.</summary>
51  virtual public System.String GetCurrent()
52  {
53  string result = current.ToString();
54  // Make a new StringBuffer. If we reuse the old one, and a user of
55  // the library keeps a reference to the buffer returned (for example,
56  // by converting it to a String in a way which doesn't force a copy),
57  // the buffer size will not decrease, and we will risk wasting a large
58  // amount of memory.
59  // Thanks to Wolfram Esser for spotting this problem.
60  current = new StringBuilder();
61  return result;
62  }
63 
64  // current string
65  protected internal System.Text.StringBuilder current;
66 
67  protected internal int cursor;
68  protected internal int limit;
69  protected internal int limit_backward;
70  protected internal int bra;
71  protected internal int ket;
72 
73  protected internal virtual void copy_from(SnowballProgram other)
74  {
75  current = other.current;
76  cursor = other.cursor;
77  limit = other.limit;
78  limit_backward = other.limit_backward;
79  bra = other.bra;
80  ket = other.ket;
81  }
82 
83  protected internal virtual bool in_grouping(char[] s, int min, int max)
84  {
85  if (cursor >= limit)
86  return false;
87  char ch = current[cursor];
88  if (ch > max || ch < min)
89  return false;
90  ch -= (char) (min);
91  if ((s[ch >> 3] & (0x1 << (ch & 0x7))) == 0)
92  return false;
93  cursor++;
94  return true;
95  }
96 
97  protected internal virtual bool in_grouping_b(char[] s, int min, int max)
98  {
99  if (cursor <= limit_backward)
100  return false;
101  char ch = current[cursor - 1];
102  if (ch > max || ch < min)
103  return false;
104  ch -= (char) (min);
105  if ((s[ch >> 3] & (0x1 << (ch & 0x7))) == 0)
106  return false;
107  cursor--;
108  return true;
109  }
110 
111  protected internal virtual bool out_grouping(char[] s, int min, int max)
112  {
113  if (cursor >= limit)
114  return false;
115  char ch = current[cursor];
116  if (ch > max || ch < min)
117  {
118  cursor++;
119  return true;
120  }
121  ch -= (char) (min);
122  if ((s[ch >> 3] & (0x1 << (ch & 0x7))) == 0)
123  {
124  cursor++;
125  return true;
126  }
127  return false;
128  }
129 
130  protected internal virtual bool out_grouping_b(char[] s, int min, int max)
131  {
132  if (cursor <= limit_backward)
133  return false;
134  char ch = current[cursor - 1];
135  if (ch > max || ch < min)
136  {
137  cursor--;
138  return true;
139  }
140  ch -= (char) (min);
141  if ((s[ch >> 3] & (0x1 << (ch & 0x7))) == 0)
142  {
143  cursor--;
144  return true;
145  }
146  return false;
147  }
148 
149  protected internal virtual bool in_range(int min, int max)
150  {
151  if (cursor >= limit)
152  return false;
153  char ch = current[cursor];
154  if (ch > max || ch < min)
155  return false;
156  cursor++;
157  return true;
158  }
159 
160  protected internal virtual bool in_range_b(int min, int max)
161  {
162  if (cursor <= limit_backward)
163  return false;
164  char ch = current[cursor - 1];
165  if (ch > max || ch < min)
166  return false;
167  cursor--;
168  return true;
169  }
170 
171  protected internal virtual bool out_range(int min, int max)
172  {
173  if (cursor >= limit)
174  return false;
175  char ch = current[cursor];
176  if (!(ch > max || ch < min))
177  return false;
178  cursor++;
179  return true;
180  }
181 
182  protected internal virtual bool out_range_b(int min, int max)
183  {
184  if (cursor <= limit_backward)
185  return false;
186  char ch = current[cursor - 1];
187  if (!(ch > max || ch < min))
188  return false;
189  cursor--;
190  return true;
191  }
192 
193  protected internal virtual bool eq_s(int s_size, System.String s)
194  {
195  if (limit - cursor < s_size)
196  return false;
197  int i;
198  for (i = 0; i != s_size; i++)
199  {
200  if (current[cursor + i] != s[i])
201  return false;
202  }
203  cursor += s_size;
204  return true;
205  }
206 
207  protected internal virtual bool eq_s_b(int s_size, System.String s)
208  {
209  if (cursor - limit_backward < s_size)
210  return false;
211  int i;
212  for (i = 0; i != s_size; i++)
213  {
214  if (current[cursor - s_size + i] != s[i])
215  return false;
216  }
217  cursor -= s_size;
218  return true;
219  }
220 
221  protected internal virtual bool eq_v(System.Text.StringBuilder s)
222  {
223  return eq_s(s.Length, s.ToString());
224  }
225 
226  protected internal virtual bool eq_v_b(System.Text.StringBuilder s)
227  {
228  return eq_s_b(s.Length, s.ToString());
229  }
230 
231  protected internal virtual int find_among(Among[] v, int v_size)
232  {
233  int i = 0;
234  int j = v_size;
235 
236  int c = cursor;
237  int l = limit;
238 
239  int common_i = 0;
240  int common_j = 0;
241 
242  bool first_key_inspected = false;
243 
244  while (true)
245  {
246  int k = i + ((j - i) >> 1);
247  int diff = 0;
248  int common = common_i < common_j?common_i:common_j; // smaller
249  Among w = v[k];
250  int i2;
251  for (i2 = common; i2 < w.s_size; i2++)
252  {
253  if (c + common == l)
254  {
255  diff = - 1;
256  break;
257  }
258  diff = current[c + common] - w.s[i2];
259  if (diff != 0)
260  break;
261  common++;
262  }
263  if (diff < 0)
264  {
265  j = k;
266  common_j = common;
267  }
268  else
269  {
270  i = k;
271  common_i = common;
272  }
273  if (j - i <= 1)
274  {
275  if (i > 0)
276  break; // v->s has been inspected
277  if (j == i)
278  break; // only one item in v
279 
280  // - but now we need to go round once more to get
281  // v->s inspected. This looks messy, but is actually
282  // the optimal approach.
283 
284  if (first_key_inspected)
285  break;
286  first_key_inspected = true;
287  }
288  }
289  while (true)
290  {
291  Among w = v[i];
292  if (common_i >= w.s_size)
293  {
294  cursor = c + w.s_size;
295  if (w.method == null)
296  return w.result;
297  bool res;
298  try
299  {
300  System.Object resobj = w.method.Invoke(w.methodobject, (System.Object[]) new System.Object[0]);
301  // {{Aroush}} UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Object.toString' may return a different value. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1043_3"'
302  res = resobj.ToString().Equals("true");
303  }
304  catch (System.Reflection.TargetInvocationException)
305  {
306  res = false;
307  // FIXME - debug message
308  }
309  catch (System.UnauthorizedAccessException)
310  {
311  res = false;
312  // FIXME - debug message
313  }
314  cursor = c + w.s_size;
315  if (res)
316  return w.result;
317  }
318  i = w.substring_i;
319  if (i < 0)
320  return 0;
321  }
322  }
323 
324  // find_among_b is for backwards processing. Same comments apply
325  protected internal virtual int find_among_b(Among[] v, int v_size)
326  {
327  int i = 0;
328  int j = v_size;
329 
330  int c = cursor;
331  int lb = limit_backward;
332 
333  int common_i = 0;
334  int common_j = 0;
335 
336  bool first_key_inspected = false;
337 
338  while (true)
339  {
340  int k = i + ((j - i) >> 1);
341  int diff = 0;
342  int common = common_i < common_j?common_i:common_j;
343  Among w = v[k];
344  int i2;
345  for (i2 = w.s_size - 1 - common; i2 >= 0; i2--)
346  {
347  if (c - common == lb)
348  {
349  diff = - 1;
350  break;
351  }
352  diff = current[c - 1 - common] - w.s[i2];
353  if (diff != 0)
354  break;
355  common++;
356  }
357  if (diff < 0)
358  {
359  j = k;
360  common_j = common;
361  }
362  else
363  {
364  i = k;
365  common_i = common;
366  }
367  if (j - i <= 1)
368  {
369  if (i > 0)
370  break;
371  if (j == i)
372  break;
373  if (first_key_inspected)
374  break;
375  first_key_inspected = true;
376  }
377  }
378  while (true)
379  {
380  Among w = v[i];
381  if (common_i >= w.s_size)
382  {
383  cursor = c - w.s_size;
384  if (w.method == null)
385  return w.result;
386 
387  bool res;
388  try
389  {
390  System.Object resobj = w.method.Invoke(w.methodobject, (System.Object[]) new System.Object[0]);
391  // {{Aroush}} UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Object.toString' may return a different value. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1043_3"'
392  res = resobj.ToString().Equals("true");
393  }
394  catch (System.Reflection.TargetInvocationException)
395  {
396  res = false;
397  // FIXME - debug message
398  }
399  catch (System.UnauthorizedAccessException)
400  {
401  res = false;
402  // FIXME - debug message
403  }
404  cursor = c - w.s_size;
405  if (res)
406  return w.result;
407  }
408  i = w.substring_i;
409  if (i < 0)
410  return 0;
411  }
412  }
413 
414  /* to replace chars between c_bra and c_ket in current by the
415  * chars in s.
416  */
417  protected internal virtual int replace_s(int c_bra, int c_ket, System.String s)
418  {
419  int adjustment = s.Length - (c_ket - c_bra);
420  if (current.Length > bra)
421  current.Replace(current.ToString(bra, ket - bra), s, bra, ket - bra);
422  else
423  current.Append(s);
424  limit += adjustment;
425  if (cursor >= c_ket)
426  cursor += adjustment;
427  else if (cursor > c_bra)
428  cursor = c_bra;
429  return adjustment;
430  }
431 
432  protected internal virtual void slice_check()
433  {
434  if (bra < 0 || bra > ket || ket > limit || limit > current.Length)
435  // this line could be removed
436  {
437  System.Console.Error.WriteLine("faulty slice operation");
438  // FIXME: report error somehow.
439  /*
440  fprintf(stderr, "faulty slice operation:\n");
441  debug(z, -1, 0);
442  exit(1);
443  */
444  }
445  }
446 
447  protected internal virtual void slice_from(System.String s)
448  {
449  slice_check();
450  replace_s(bra, ket, s);
451  }
452 
453  protected internal virtual void slice_from(System.Text.StringBuilder s)
454  {
455  slice_from(s.ToString());
456  }
457 
458  protected internal virtual void slice_del()
459  {
460  slice_from("");
461  }
462 
463  protected internal virtual void insert(int c_bra, int c_ket, System.String s)
464  {
465  int adjustment = replace_s(c_bra, c_ket, s);
466  if (c_bra <= bra)
467  bra += adjustment;
468  if (c_bra <= ket)
469  ket += adjustment;
470  }
471 
472  protected internal virtual void insert(int c_bra, int c_ket, System.Text.StringBuilder s)
473  {
474  insert(c_bra, c_ket, s.ToString());
475  }
476 
477  /* Copy the slice into the supplied StringBuffer */
478  protected internal virtual System.Text.StringBuilder slice_to(System.Text.StringBuilder s)
479  {
480  slice_check();
481  int len = ket - bra;
482  //// s.Replace(s.ToString(0, s.Length - 0), current.ToString(bra, ket), 0, s.Length - 0);
483  s.Remove(0, s.Length);
484  s.Append(current.ToString(bra, len));
485  return s;
486  }
487 
488  protected internal virtual System.Text.StringBuilder assign_to(System.Text.StringBuilder s)
489  {
490  //// s.Replace(s.ToString(0, s.Length - 0), current.ToString(0, limit), 0, s.Length - 0);
491  s.Remove(0, s.Length);
492  s.Append(current.ToString(0, limit));
493  return s;
494  }
495 
496  /*
497  extern void debug(struct SN_env * z, int number, int line_count)
498  { int i;
499  int limit = SIZE(z->p);
500  //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
501  if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
502  for (i = 0; i <= limit; i++)
503  { if (z->lb == i) printf("{");
504  if (z->bra == i) printf("[");
505  if (z->c == i) printf("|");
506  if (z->ket == i) printf("]");
507  if (z->l == i) printf("}");
508  if (i < limit)
509  { int ch = z->p[i];
510  if (ch == 0) ch = '#';
511  printf("%c", ch);
512  }
513  }
514  printf("'\n");
515  }*/
516  }
517 
518 }