25 namespace Lucene.Net.Analysis.Ru
33 private int RV, R1, R2;
36 private const char A =
'\u0430';
38 private const char V =
'\u0432';
39 private const char G =
'\u0433';
41 private const char E =
'\u0435';
44 private const char I =
'\u0438';
45 private const char I_ =
'\u0439';
47 private const char L =
'\u043B';
48 private const char M =
'\u043C';
49 private const char N =
'\u043D';
50 private const char O =
'\u043E';
53 private const char S =
'\u0441';
54 private const char T =
'\u0442';
55 private const char U =
'\u0443';
57 private const char X =
'\u0445';
60 private const char SH =
'\u0448';
61 private const char SHCH =
'\u0449';
63 private const char Y =
'\u044B';
64 private const char SOFT =
'\u044C';
65 private const char AE =
'\u044D';
66 private const char IU =
'\u044E';
67 private const char IA =
'\u044F';
70 private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
72 private static char[][] perfectiveGerundEndings1 = {
75 new[] {V, SH, I, S, SOFT}
78 private static char[][] perfectiveGerund1Predessors = {
83 private static char[][] perfectiveGerundEndings2 = {
88 new[] {I, V, SH, I, S, SOFT},
89 new[] {Y, V, SH, I, S, SOFT}
92 private static char[][] adjectiveEndings = {
121 private static char[][] participleEndings1 = {
129 private static char[][] participleEndings2 = {
135 private static char[][] participle1Predessors = {
140 private static char[][] reflexiveEndings = {
145 private static char[][] verbEndings1 = {
165 private static char[][] verbEndings2 = {
197 private static char[][] verb1Predessors = {
202 private static char[][] nounEndings = {
241 private static char[][] superlativeEndings = {
246 private static char[][] derivationalEndings = {
248 new[] {O, S, T, SOFT}
264 private bool adjectival(StringBuilder stemmingZone)
267 if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
273 findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
275 findAndRemoveEnding(stemmingZone, participleEndings2);
284 private bool derivational(StringBuilder stemmingZone)
286 int endingLength = findEnding(stemmingZone, derivationalEndings);
287 if (endingLength == 0)
293 if (R2 - RV <= stemmingZone.Length - endingLength)
295 stemmingZone.Length = stemmingZone.Length - endingLength;
309 private int findEnding(StringBuilder stemmingZone,
int startIndex,
char[][] theEndingClass)
312 for (
int i = theEndingClass.Length - 1; i >= 0; i--)
314 char[] theEnding = theEndingClass[i];
316 if (startIndex < theEnding.Length - 1)
322 int stemmingIndex = startIndex;
323 for (
int j = theEnding.Length - 1; j >= 0; j--)
325 if (stemmingZone[stemmingIndex--] != theEnding[j])
334 return theEndingClass[i].Length;
340 private int findEnding(StringBuilder stemmingZone,
char[][] theEndingClass)
342 return findEnding(stemmingZone, stemmingZone.Length - 1, theEndingClass);
349 private bool findAndRemoveEnding(StringBuilder stemmingZone,
char[][] theEndingClass)
351 int endingLength = findEnding(stemmingZone, theEndingClass);
352 if (endingLength == 0)
357 stemmingZone.Length = stemmingZone.Length - endingLength;
368 private bool findAndRemoveEnding(StringBuilder stemmingZone,
369 char[][] theEndingClass,
char[][] thePredessors)
371 int endingLength = findEnding(stemmingZone, theEndingClass);
372 if (endingLength == 0)
377 int predessorLength =
378 findEnding(stemmingZone,
379 stemmingZone.Length - endingLength - 1,
381 if (predessorLength == 0)
385 stemmingZone.Length = stemmingZone.Length - endingLength;
397 private void markPositions(String word)
404 while (word.Length > i && !isVowel(word[i]))
408 if (word.Length - 1 < ++i)
412 while (word.Length > i && isVowel(word[i]))
416 if (word.Length - 1 < ++i)
420 while (word.Length > i && !isVowel(word[i]))
424 if (word.Length - 1 < ++i)
426 while (word.Length > i && isVowel(word[i]))
430 if (word.Length - 1 < ++i)
441 private bool isVowel(
char letter)
443 for (
int i = 0; i < vowels.Length; i++)
445 if (letter == vowels[i])
456 private bool noun(StringBuilder stemmingZone)
458 return findAndRemoveEnding(stemmingZone, nounEndings);
466 private bool perfectiveGerund(StringBuilder stemmingZone)
468 return findAndRemoveEnding(
470 perfectiveGerundEndings1,
471 perfectiveGerund1Predessors)
472 || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
480 private bool reflexive(StringBuilder stemmingZone)
482 return findAndRemoveEnding(stemmingZone, reflexiveEndings);
490 private bool removeI(StringBuilder stemmingZone)
492 if (stemmingZone.Length > 0
493 && stemmingZone[stemmingZone.Length - 1] == I)
495 stemmingZone.Length = stemmingZone.Length - 1;
509 private bool removeSoft(StringBuilder stemmingZone)
511 if (stemmingZone.Length > 0
512 && stemmingZone[stemmingZone.Length - 1] == SOFT)
514 stemmingZone.Length = stemmingZone.Length - 1;
529 public virtual String Stem(String input)
531 markPositions(input);
534 StringBuilder stemmingZone =
new StringBuilder(input.Substring(RV));
538 if (!perfectiveGerund(stemmingZone))
540 reflexive(stemmingZone);
545 adjectival(stemmingZone)
546 || Verb(stemmingZone)
547 || noun(stemmingZone);
550 removeI(stemmingZone);
552 derivational(stemmingZone);
554 Superlative(stemmingZone);
555 UndoubleN(stemmingZone);
556 removeSoft(stemmingZone);
558 return input.Substring(0, RV) + stemmingZone.ToString();
566 private bool Superlative(StringBuilder stemmingZone)
568 return findAndRemoveEnding(stemmingZone, superlativeEndings);
576 private bool UndoubleN(StringBuilder stemmingZone)
581 if (findEnding(stemmingZone, doubleN) != 0)
583 stemmingZone.Length = stemmingZone.Length - 1;
597 private bool Verb(StringBuilder stemmingZone)
599 return findAndRemoveEnding(
603 || findAndRemoveEnding(stemmingZone, verbEndings2);
609 public static String StemWord(String theWord)
612 return stemmer.
Stem(theWord);