Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
RussianStemmer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 // This file was generated automatically by the Snowball to Java compiler
19 using System;
20 using Among = SF.Snowball.Among;
21 using SnowballProgram = SF.Snowball.SnowballProgram;
22 namespace SF.Snowball.Ext
23 {
24 #pragma warning disable 162,164
25 
28  {
29  public RussianStemmer()
30  {
31  InitBlock();
32  }
33  private void InitBlock()
34  {
35  a_0 = new Among[]{new Among("\u00D7\u00DB\u00C9", - 1, 1, "", this), new Among("\u00C9\u00D7\u00DB\u00C9", 0, 2, "", this), new Among("\u00D9\u00D7\u00DB\u00C9", 0, 2, "", this), new Among("\u00D7", - 1, 1, "", this), new Among("\u00C9\u00D7", 3, 2, "", this), new Among("\u00D9\u00D7", 3, 2, "", this), new Among("\u00D7\u00DB\u00C9\u00D3\u00D8", - 1, 1, "", this), new Among("\u00C9\u00D7\u00DB\u00C9\u00D3\u00D8", 6, 2, "", this), new Among("\u00D9\u00D7\u00DB\u00C9\u00D3\u00D8", 6, 2, "", this)};
36  a_1 = new Among[]{new Among("\u00C0\u00C0", - 1, 1, "", this), new Among("\u00C5\u00C0", - 1, 1, "", this), new Among("\u00CF\u00C0", - 1, 1, "", this), new Among("\u00D5\u00C0", - 1, 1, "", this), new Among("\u00C5\u00C5", - 1, 1, "", this), new Among("\u00C9\u00C5", - 1, 1, "", this), new Among("\u00CF\u00C5", - 1, 1, "", this), new Among("\u00D9\u00C5", - 1, 1, "", this), new Among("\u00C9\u00C8", - 1, 1, "", this), new Among("\u00D9\u00C8", - 1, 1, "", this), new Among("\u00C9\u00CD\u00C9", - 1, 1, "", this), new Among("\u00D9\u00CD\u00C9", - 1, 1, "", this), new Among("\u00C5\u00CA", - 1, 1, "", this), new Among("\u00C9\u00CA", - 1, 1, "", this), new Among("\u00CF\u00CA", - 1, 1, "", this), new Among("\u00D9\u00CA", - 1, 1, "", this), new Among("\u00C5\u00CD", - 1, 1, "", this), new Among("\u00C9\u00CD", - 1, 1, "", this), new Among("\u00CF\u00CD", - 1, 1, "", this), new Among("\u00D9\u00CD", - 1, 1, "", this), new Among("\u00C5\u00C7\u00CF", - 1, 1, "", this), new Among("\u00CF\u00C7\u00CF", - 1, 1, "", this), new Among("\u00C1\u00D1", - 1, 1, "", this), new Among("\u00D1\u00D1", - 1, 1, "", this), new Among("\u00C5\u00CD\u00D5", - 1, 1, "", this), new Among("\u00CF\u00CD\u00D5", - 1, 1, "", this)};
37  a_2 = new Among[]{new Among("\u00C5\u00CD", - 1, 1, "", this), new Among("\u00CE\u00CE", - 1, 1, "", this), new Among("\u00D7\u00DB", - 1, 1, "", this), new Among("\u00C9\u00D7\u00DB", 2, 2, "", this), new Among("\u00D9\u00D7\u00DB", 2, 2, "", this), new Among("\u00DD", - 1, 1, "", this), new Among("\u00C0\u00DD", 5, 1, "", this), new Among("\u00D5\u00C0\u00DD", 6, 2, "", this)};
38  a_3 = new Among[]{new Among("\u00D3\u00D1", - 1, 1, "", this), new Among("\u00D3\u00D8", - 1, 1, "", this)};
39  a_4 = new Among[]{new Among("\u00C0", - 1, 2, "", this), new Among("\u00D5\u00C0", 0, 2, "", this), new Among("\u00CC\u00C1", - 1, 1, "", this), new Among("\u00C9\u00CC\u00C1", 2, 2, "", this), new Among("\u00D9\u00CC\u00C1", 2, 2, "", this), new Among("\u00CE\u00C1", - 1, 1, "", this), new Among("\u00C5\u00CE\u00C1", 5, 2, "", this), new Among("\u00C5\u00D4\u00C5", - 1, 1, "", this), new Among("\u00C9\u00D4\u00C5", - 1, 2, "", this), new Among("\u00CA\u00D4\u00C5", - 1, 1, "", this), new Among("\u00C5\u00CA\u00D4\u00C5", 9, 2, "", this), new Among("\u00D5\u00CA\u00D4\u00C5", 9, 2, "", this), new Among("\u00CC\u00C9", - 1, 1, "", this), new Among("\u00C9\u00CC\u00C9", 12, 2, "", this), new Among("\u00D9\u00CC\u00C9", 12, 2, "", this), new Among("\u00CA", - 1, 1, "", this), new Among("\u00C5\u00CA", 15, 2, "", this), new Among("\u00D5\u00CA", 15, 2, "", this), new Among("\u00CC", - 1, 1, "", this), new Among("\u00C9\u00CC", 18, 2, "", this), new Among("\u00D9\u00CC", 18, 2, "", this), new Among("\u00C5\u00CD", - 1, 1, "", this), new Among("\u00C9\u00CD", - 1, 2, "", this), new Among("\u00D9\u00CD", - 1, 2, "", this), new Among("\u00CE", - 1, 1, "", this), new Among("\u00C5\u00CE", 24, 2, "", this), new Among("\u00CC\u00CF", - 1, 1, "", this), new Among("\u00C9\u00CC\u00CF", 26, 2, "", this), new Among("\u00D9\u00CC\u00CF", 26, 2, "", this), new Among("\u00CE\u00CF", - 1, 1, "", this), new Among("\u00C5\u00CE\u00CF", 29, 2, "", this), new Among("\u00CE\u00CE\u00CF", 29, 1, "", this), new Among("\u00C0\u00D4", - 1, 1, "", this), new Among("\u00D5\u00C0\u00D4", 32, 2, "", this), new Among("\u00C5\u00D4", - 1, 1, "", this), new Among("\u00D5\u00C5\u00D4", 34, 2, "", this), new Among("\u00C9\u00D4", - 1, 2, "", this), new Among("\u00D1\u00D4", - 1, 2, "", this), new Among("\u00D9\u00D4", - 1, 2, "", this), new Among("\u00D4\u00D8", - 1, 1, "", this), new Among("\u00C9\u00D4\u00D8", 39, 2, "", this), new Among("\u00D9\u00D4\u00D8", 39, 2, "", this), new Among("\u00C5\u00DB\u00D8", - 1, 1, "", this),
40  new Among("\u00C9\u00DB\u00D8", - 1, 2, "", this), new Among("\u00CE\u00D9", - 1, 1, "", this), new Among("\u00C5\u00CE\u00D9", 44, 2, "", this)};
41  a_5 = new Among[]{new Among("\u00C0", - 1, 1, "", this), new Among("\u00C9\u00C0", 0, 1, "", this), new Among("\u00D8\u00C0", 0, 1, "", this), new Among("\u00C1", - 1, 1, "", this), new Among("\u00C5", - 1, 1, "", this), new Among("\u00C9\u00C5", 4, 1, "", this), new Among("\u00D8\u00C5", 4, 1, "", this), new Among("\u00C1\u00C8", - 1, 1, "", this), new Among("\u00D1\u00C8", - 1, 1, "", this), new Among("\u00C9\u00D1\u00C8", 8, 1, "", this), new Among("\u00C9", - 1, 1, "", this), new Among("\u00C5\u00C9", 10, 1, "", this), new Among("\u00C9\u00C9", 10, 1, "", this), new Among("\u00C1\u00CD\u00C9", 10, 1, "", this), new Among("\u00D1\u00CD\u00C9", 10, 1, "", this), new Among("\u00C9\u00D1\u00CD\u00C9", 14, 1, "", this), new Among("\u00CA", - 1, 1, "", this), new Among("\u00C5\u00CA", 16, 1, "", this), new Among("\u00C9\u00C5\u00CA", 17, 1, "", this), new Among("\u00C9\u00CA", 16, 1, "", this), new Among("\u00CF\u00CA", 16, 1, "", this), new Among("\u00C1\u00CD", - 1, 1, "", this), new Among("\u00C5\u00CD", - 1, 1, "", this), new Among("\u00C9\u00C5\u00CD", 22, 1, "", this), new Among("\u00CF\u00CD", - 1, 1, "", this), new Among("\u00D1\u00CD", - 1, 1, "", this), new Among("\u00C9\u00D1\u00CD", 25, 1, "", this), new Among("\u00CF", - 1, 1, "", this), new Among("\u00D1", - 1, 1, "", this), new Among("\u00C9\u00D1", 28, 1, "", this), new Among("\u00D8\u00D1", 28, 1, "", this), new Among("\u00D5", - 1, 1, "", this), new Among("\u00C5\u00D7", - 1, 1, "", this), new Among("\u00CF\u00D7", - 1, 1, "", this), new Among("\u00D8", - 1, 1, "", this), new Among("\u00D9", - 1, 1, "", this)};
42  a_6 = new Among[]{new Among("\u00CF\u00D3\u00D4", - 1, 1, "", this), new Among("\u00CF\u00D3\u00D4\u00D8", - 1, 1, "", this)};
43  a_7 = new Among[]{new Among("\u00C5\u00CA\u00DB\u00C5", - 1, 1, "", this), new Among("\u00CE", - 1, 2, "", this), new Among("\u00D8", - 1, 3, "", this), new Among("\u00C5\u00CA\u00DB", - 1, 1, "", this)};
44  }
45 
46  private Among[] a_0;
47  private Among[] a_1;
48  private Among[] a_2;
49  private Among[] a_3;
50  private Among[] a_4;
51  private Among[] a_5;
52  private Among[] a_6;
53  private Among[] a_7;
54  private static readonly char[] g_v = new char[]{(char) (35), (char) (130), (char) (34), (char) (18)};
55 
56  private int I_p2;
57  private int I_pV;
58 
59  protected internal virtual void copy_from(RussianStemmer other)
60  {
61  I_p2 = other.I_p2;
62  I_pV = other.I_pV;
63  base.copy_from(other);
64  }
65 
66  private bool r_mark_regions()
67  {
68  int v_1;
69  // (, line 96
70  I_pV = limit;
71  I_p2 = limit;
72  // do, line 100
73  v_1 = cursor;
74  do
75  {
76  // (, line 100
77  // gopast, line 101
78  while (true)
79  {
80  do
81  {
82  if (!(in_grouping(g_v, 192, 220)))
83  {
84  goto lab2_brk;
85  }
86  goto golab1_brk;
87  }
88  while (false);
89 
90 lab2_brk: ;
91 
92  if (cursor >= limit)
93  {
94  goto lab0_brk;
95  }
96  cursor++;
97  }
98 
99 golab1_brk: ;
100 
101  // setmark pV, line 101
102  I_pV = cursor;
103  // gopast, line 101
104  while (true)
105  {
106  do
107  {
108  if (!(out_grouping(g_v, 192, 220)))
109  {
110  goto lab4_brk;
111  }
112  goto golab3_brk;
113  }
114  while (false);
115 
116 lab4_brk: ;
117 
118  if (cursor >= limit)
119  {
120  goto lab0_brk;
121  }
122  cursor++;
123  }
124 
125 golab3_brk: ;
126 
127  // gopast, line 102
128  while (true)
129  {
130  do
131  {
132  if (!(in_grouping(g_v, 192, 220)))
133  {
134  goto lab6_brk;
135  }
136  goto golab5_brk;
137  }
138  while (false);
139 
140 lab6_brk: ;
141 
142  if (cursor >= limit)
143  {
144  goto lab0_brk;
145  }
146  cursor++;
147  }
148 
149 golab5_brk: ;
150 
151  // gopast, line 102
152  while (true)
153  {
154  do
155  {
156  if (!(out_grouping(g_v, 192, 220)))
157  {
158  goto lab8_brk;
159  }
160  goto golab7_brk;
161  }
162  while (false);
163 
164 lab8_brk: ;
165 
166  if (cursor >= limit)
167  {
168  goto lab0_brk;
169  }
170  cursor++;
171  }
172 
173 golab7_brk: ;
174 
175  // setmark p2, line 102
176  I_p2 = cursor;
177  }
178  while (false);
179 
180 lab0_brk: ;
181 
182  cursor = v_1;
183  return true;
184  }
185 
186  private bool r_R2()
187  {
188  if (!(I_p2 <= cursor))
189  {
190  return false;
191  }
192  return true;
193  }
194 
195  private bool r_perfective_gerund()
196  {
197  int among_var;
198  int v_1;
199  // (, line 110
200  // [, line 111
201  ket = cursor;
202  // substring, line 111
203  among_var = find_among_b(a_0, 9);
204  if (among_var == 0)
205  {
206  return false;
207  }
208  // ], line 111
209  bra = cursor;
210  switch (among_var)
211  {
212 
213  case 0:
214  return false;
215 
216  case 1:
217  // (, line 115
218  // or, line 115
219 lab1:
220  do
221  {
222  v_1 = limit - cursor;
223  do
224  {
225  // literal, line 115
226  if (!(eq_s_b(1, "\u00C1")))
227  {
228  goto lab1_brk;
229  }
230  goto lab1_brk;
231  }
232  while (false);
233 
234 lab1_brk: ;
235 
236  cursor = limit - v_1;
237  // literal, line 115
238  if (!(eq_s_b(1, "\u00D1")))
239  {
240  return false;
241  }
242  }
243  while (false);
244  // delete, line 115
245  slice_del();
246  break;
247 
248  case 2:
249  // (, line 122
250  // delete, line 122
251  slice_del();
252  break;
253  }
254  return true;
255  }
256 
257  private bool r_adjective()
258  {
259  int among_var;
260  // (, line 126
261  // [, line 127
262  ket = cursor;
263  // substring, line 127
264  among_var = find_among_b(a_1, 26);
265  if (among_var == 0)
266  {
267  return false;
268  }
269  // ], line 127
270  bra = cursor;
271  switch (among_var)
272  {
273 
274  case 0:
275  return false;
276 
277  case 1:
278  // (, line 136
279  // delete, line 136
280  slice_del();
281  break;
282  }
283  return true;
284  }
285 
286  private bool r_adjectival()
287  {
288  int among_var;
289  int v_1;
290  int v_2;
291  // (, line 140
292  // call adjective, line 141
293  if (!r_adjective())
294  {
295  return false;
296  }
297  // try, line 148
298  v_1 = limit - cursor;
299  do
300  {
301  // (, line 148
302  // [, line 149
303  ket = cursor;
304  // substring, line 149
305  among_var = find_among_b(a_2, 8);
306  if (among_var == 0)
307  {
308  cursor = limit - v_1;
309  goto lab0_brk;
310  }
311  // ], line 149
312  bra = cursor;
313  switch (among_var)
314  {
315 
316  case 0:
317  cursor = limit - v_1;
318  goto lab0_brk;
319 
320  case 1:
321  // (, line 154
322  // or, line 154
323  do
324  {
325  v_2 = limit - cursor;
326  do
327  {
328  // literal, line 154
329  if (!(eq_s_b(1, "\u00C1")))
330  {
331  goto lab2_brk;
332  }
333  goto lab1_brk;
334  }
335  while (false);
336 
337 lab2_brk: ;
338 
339  cursor = limit - v_2;
340  // literal, line 154
341  if (!(eq_s_b(1, "\u00D1")))
342  {
343  cursor = limit - v_1;
344  goto lab0_brk;
345  }
346  }
347  while (false);
348 
349 lab1_brk: ;
350 
351  // delete, line 154
352  slice_del();
353  break;
354 
355  case 2:
356  // (, line 161
357  // delete, line 161
358  slice_del();
359  break;
360  }
361  }
362  while (false);
363 
364 lab0_brk: ;
365 
366  return true;
367  }
368 
369  private bool r_reflexive()
370  {
371  int among_var;
372  // (, line 167
373  // [, line 168
374  ket = cursor;
375  // substring, line 168
376  among_var = find_among_b(a_3, 2);
377  if (among_var == 0)
378  {
379  return false;
380  }
381  // ], line 168
382  bra = cursor;
383  switch (among_var)
384  {
385 
386  case 0:
387  return false;
388 
389  case 1:
390  // (, line 171
391  // delete, line 171
392  slice_del();
393  break;
394  }
395  return true;
396  }
397 
398  private bool r_verb()
399  {
400  int among_var;
401  int v_1;
402  // (, line 175
403  // [, line 176
404  ket = cursor;
405  // substring, line 176
406  among_var = find_among_b(a_4, 46);
407  if (among_var == 0)
408  {
409  return false;
410  }
411  // ], line 176
412  bra = cursor;
413  switch (among_var)
414  {
415 
416  case 0:
417  return false;
418 
419  case 1:
420  // (, line 182
421  // or, line 182
422 lab3:
423  do
424  {
425  v_1 = limit - cursor;
426  do
427  {
428  // literal, line 182
429  if (!(eq_s_b(1, "\u00C1")))
430  {
431  goto lab3_brk;
432  }
433  goto lab3_brk;
434  }
435  while (false);
436 
437 lab3_brk: ;
438 
439  cursor = limit - v_1;
440  // literal, line 182
441  if (!(eq_s_b(1, "\u00D1")))
442  {
443  return false;
444  }
445  }
446  while (false);
447  // delete, line 182
448  slice_del();
449  break;
450 
451  case 2:
452  // (, line 190
453  // delete, line 190
454  slice_del();
455  break;
456  }
457  return true;
458  }
459 
460  private bool r_noun()
461  {
462  int among_var;
463  // (, line 198
464  // [, line 199
465  ket = cursor;
466  // substring, line 199
467  among_var = find_among_b(a_5, 36);
468  if (among_var == 0)
469  {
470  return false;
471  }
472  // ], line 199
473  bra = cursor;
474  switch (among_var)
475  {
476 
477  case 0:
478  return false;
479 
480  case 1:
481  // (, line 206
482  // delete, line 206
483  slice_del();
484  break;
485  }
486  return true;
487  }
488 
489  private bool r_derivational()
490  {
491  int among_var;
492  // (, line 214
493  // [, line 215
494  ket = cursor;
495  // substring, line 215
496  among_var = find_among_b(a_6, 2);
497  if (among_var == 0)
498  {
499  return false;
500  }
501  // ], line 215
502  bra = cursor;
503  // call R2, line 215
504  if (!r_R2())
505  {
506  return false;
507  }
508  switch (among_var)
509  {
510 
511  case 0:
512  return false;
513 
514  case 1:
515  // (, line 218
516  // delete, line 218
517  slice_del();
518  break;
519  }
520  return true;
521  }
522 
523  private bool r_tidy_up()
524  {
525  int among_var;
526  // (, line 222
527  // [, line 223
528  ket = cursor;
529  // substring, line 223
530  among_var = find_among_b(a_7, 4);
531  if (among_var == 0)
532  {
533  return false;
534  }
535  // ], line 223
536  bra = cursor;
537  switch (among_var)
538  {
539 
540  case 0:
541  return false;
542 
543  case 1:
544  // (, line 227
545  // delete, line 227
546  slice_del();
547  // [, line 228
548  ket = cursor;
549  // literal, line 228
550  if (!(eq_s_b(1, "\u00CE")))
551  {
552  return false;
553  }
554  // ], line 228
555  bra = cursor;
556  // literal, line 228
557  if (!(eq_s_b(1, "\u00CE")))
558  {
559  return false;
560  }
561  // delete, line 228
562  slice_del();
563  break;
564 
565  case 2:
566  // (, line 231
567  // literal, line 231
568  if (!(eq_s_b(1, "\u00CE")))
569  {
570  return false;
571  }
572  // delete, line 231
573  slice_del();
574  break;
575 
576  case 3:
577  // (, line 233
578  // delete, line 233
579  slice_del();
580  break;
581  }
582  return true;
583  }
584 
585  public override bool Stem()
586  {
587  int v_1;
588  int v_2;
589  int v_3;
590  int v_4;
591  int v_5;
592  int v_6;
593  int v_7;
594  int v_8;
595  int v_9;
596  int v_10;
597  // (, line 238
598  // do, line 240
599  v_1 = cursor;
600  do
601  {
602  // call mark_regions, line 240
603  if (!r_mark_regions())
604  {
605  goto lab0_brk;
606  }
607  }
608  while (false);
609 
610 lab0_brk: ;
611 
612  cursor = v_1;
613  // backwards, line 241
614  limit_backward = cursor; cursor = limit;
615  // setlimit, line 241
616  v_2 = limit - cursor;
617  // tomark, line 241
618  if (cursor < I_pV)
619  {
620  return false;
621  }
622  cursor = I_pV;
623  v_3 = limit_backward;
624  limit_backward = cursor;
625  cursor = limit - v_2;
626  // (, line 241
627  // do, line 242
628  v_4 = limit - cursor;
629  do
630  {
631  // (, line 242
632  // or, line 243
633  do
634  {
635  v_5 = limit - cursor;
636  do
637  {
638  // call perfective_gerund, line 243
639  if (!r_perfective_gerund())
640  {
641  goto lab3_brk;
642  }
643  goto lab3_brk;
644  }
645  while (false);
646 
647 lab3_brk: ;
648 
649  cursor = limit - v_5;
650  // (, line 244
651  // try, line 244
652  v_6 = limit - cursor;
653  do
654  {
655  // call reflexive, line 244
656  if (!r_reflexive())
657  {
658  cursor = limit - v_6;
659  goto lab4_brk;
660  }
661  }
662  while (false);
663 
664 lab4_brk: ;
665 
666  // or, line 245
667  do
668  {
669  v_7 = limit - cursor;
670  do
671  {
672  // call adjectival, line 245
673  if (!r_adjectival())
674  {
675  goto lab6_brk;
676  }
677  goto lab5_brk;
678  }
679  while (false);
680 
681 lab6_brk: ;
682 
683  cursor = limit - v_7;
684  do
685  {
686  // call verb, line 245
687  if (!r_verb())
688  {
689  goto lab7_brk;
690  }
691  goto lab5_brk;
692  }
693  while (false);
694 
695 lab7_brk: ;
696 
697  cursor = limit - v_7;
698  // call noun, line 245
699  if (!r_noun())
700  {
701  goto lab1_brk;
702  }
703  }
704  while (false);
705 
706 lab5_brk: ;
707 
708  }
709  while (false);
710 
711 lab2_brk: ;
712 
713  }
714  while (false);
715 
716 lab1_brk: ;
717 
718  cursor = limit - v_4;
719  // try, line 248
720  v_8 = limit - cursor;
721  do
722  {
723  // (, line 248
724  // [, line 248
725  ket = cursor;
726  // literal, line 248
727  if (!(eq_s_b(1, "\u00C9")))
728  {
729  cursor = limit - v_8;
730  goto lab8_brk;
731  }
732  // ], line 248
733  bra = cursor;
734  // delete, line 248
735  slice_del();
736  }
737  while (false);
738 
739 lab8_brk: ;
740 
741  // do, line 251
742  v_9 = limit - cursor;
743  do
744  {
745  // call derivational, line 251
746  if (!r_derivational())
747  {
748  goto lab9_brk;
749  }
750  }
751  while (false);
752 
753 lab9_brk: ;
754 
755  cursor = limit - v_9;
756  // do, line 252
757  v_10 = limit - cursor;
758  do
759  {
760  // call tidy_up, line 252
761  if (!r_tidy_up())
762  {
763  goto lab10_brk;
764  }
765  }
766  while (false);
767 
768 lab10_brk: ;
769 
770  cursor = limit - v_10;
771  limit_backward = v_3;
772  cursor = limit_backward; return true;
773  }
774  }
775 }