Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
BrazilianStemmer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * A stemmer for Brazilian words.
20  */
21 namespace Lucene.Net.Analysis.BR
22 {
23 
24  public class BrazilianStemmer
25  {
26 
27  /*
28  * Changed term
29  */
30  private string TERM;
31  private string CT;
32  private string R1;
33  private string R2;
34  private string RV;
35 
36 
38  {
39  }
40 
41  /*
42  * Stemms the given term to an unique <tt>discriminator</tt>.
43  *
44  * <param name="term"> The term that should be stemmed.</param>
45  * <returns> Discriminator for <tt>term</tt></returns>
46  */
47  public string Stem(string term)
48  {
49  bool altered = false; // altered the term
50 
51  // creates CT
52  createCT(term);
53 
54  if (!isIndexable(CT))
55  {
56  return null;
57  }
58  if (!isStemmable(CT))
59  {
60  return CT;
61  }
62 
63  R1 = getR1(CT);
64  R2 = getR1(R1);
65  RV = getRV(CT);
66  TERM = term + ";" + CT;
67 
68  altered = step1();
69  if (!altered)
70  {
71  altered = step2();
72  }
73 
74  if (altered)
75  {
76  step3();
77  }
78  else
79  {
80  step4();
81  }
82 
83  step5();
84 
85  return CT;
86  }
87 
88  /*
89  * Checks a term if it can be processed correctly.
90  *
91  * <returns> true if, and only if, the given term consists in letters.</returns>
92  */
93  private bool isStemmable(string term)
94  {
95  for (int c = 0; c < term.Length; c++)
96  {
97  // Discard terms that contain non-letter characters.
98  if (!char.IsLetter(term[c]))
99  {
100  return false;
101  }
102  }
103  return true;
104  }
105 
106  /*
107  * Checks a term if it can be processed indexed.
108  *
109  * <returns> true if it can be indexed</returns>
110  */
111  private bool isIndexable(string term)
112  {
113  return (term.Length < 30) && (term.Length > 2);
114  }
115 
116  /*
117  * See if string is 'a','e','i','o','u'
118  *
119  * <returns>true if is vowel</returns>
120  */
121  private bool isVowel(char value)
122  {
123  return (value == 'a') ||
124  (value == 'e') ||
125  (value == 'i') ||
126  (value == 'o') ||
127  (value == 'u');
128  }
129 
130  /*
131  * Gets R1
132  *
133  * R1 - is the region after the first non-vowel follwing a vowel,
134  * or is the null region at the end of the word if there is
135  * no such non-vowel.
136  *
137  * <returns>null or a string representing R1</returns>
138  */
139  private string getR1(string value)
140  {
141  int i;
142  int j;
143 
144  // be-safe !!!
145  if (value == null)
146  {
147  return null;
148  }
149 
150  // find 1st vowel
151  i = value.Length - 1;
152  for (j = 0; j < i; j++)
153  {
154  if (isVowel(value[j]))
155  {
156  break;
157  }
158  }
159 
160  if (!(j < i))
161  {
162  return null;
163  }
164 
165  // find 1st non-vowel
166  for (; j < i; j++)
167  {
168  if (!(isVowel(value[j])))
169  {
170  break;
171  }
172  }
173 
174  if (!(j < i))
175  {
176  return null;
177  }
178 
179  return value.Substring(j + 1);
180  }
181 
182  /*
183  * Gets RV
184  *
185  * RV - IF the second letter is a consoant, RV is the region after
186  * the next following vowel,
187  *
188  * OR if the first two letters are vowels, RV is the region
189  * after the next consoant,
190  *
191  * AND otherwise (consoant-vowel case) RV is the region after
192  * the third letter.
193  *
194  * BUT RV is the end of the word if this positions cannot be
195  * found.
196  *
197  * <returns>null or a string representing RV</returns>
198  */
199  private string getRV(string value)
200  {
201  int i;
202  int j;
203 
204  // be-safe !!!
205  if (value == null)
206  {
207  return null;
208  }
209 
210  i = value.Length - 1;
211 
212  // RV - IF the second letter is a consoant, RV is the region after
213  // the next following vowel,
214  if ((i > 0) && !isVowel(value[1]))
215  {
216  // find 1st vowel
217  for (j = 2; j < i; j++)
218  {
219  if (isVowel(value[j]))
220  {
221  break;
222  }
223  }
224 
225  if (j < i)
226  {
227  return value.Substring(j + 1);
228  }
229  }
230 
231 
232  // RV - OR if the first two letters are vowels, RV is the region
233  // after the next consoant,
234  if ((i > 1) &&
235  isVowel(value[0]) &&
236  isVowel(value[1]))
237  {
238  // find 1st consoant
239  for (j = 2; j < i; j++)
240  {
241  if (!isVowel(value[j]))
242  {
243  break;
244  }
245  }
246 
247  if (j < i)
248  {
249  return value.Substring(j + 1);
250  }
251  }
252 
253  // RV - AND otherwise (consoant-vowel case) RV is the region after
254  // the third letter.
255  if (i > 2)
256  {
257  return value.Substring(3);
258  }
259 
260  return null;
261  }
262 
263  /*
264  * 1) Turn to lowercase
265  * 2) Remove accents
266  * 3) ã -> a ; õ -> o
267  * 4) ç -> c
268  *
269  * <returns>null or a string transformed</returns>
270  */
271  private string changeTerm(string value)
272  {
273  int j;
274  string r = "";
275 
276  // be-safe !!!
277  if (value == null)
278  {
279  return null;
280  }
281 
282  value = value.ToLower();
283  for (j = 0; j < value.Length; j++)
284  {
285  if ((value[j] == 'á') ||
286  (value[j] == 'â') ||
287  (value[j] == 'ã'))
288  {
289  r = r + "a"; continue;
290  }
291  if ((value[j] == 'é') ||
292  (value[j] == 'ê'))
293  {
294  r = r + "e"; continue;
295  }
296  if (value[j] == 'í')
297  {
298  r = r + "i"; continue;
299  }
300  if ((value[j] == 'ó') ||
301  (value[j] == 'ô') ||
302  (value[j] == 'õ'))
303  {
304  r = r + "o"; continue;
305  }
306  if ((value[j] == 'ú') ||
307  (value[j] == 'ü'))
308  {
309  r = r + "u"; continue;
310  }
311  if (value[j] == 'ç')
312  {
313  r = r + "c"; continue;
314  }
315  if (value[j] == 'ñ')
316  {
317  r = r + "n"; continue;
318  }
319 
320  r = r + value[j];
321  }
322 
323  return r;
324  }
325 
326  /*
327  * Check if a string ends with a suffix
328  *
329  * <returns>true if the string ends with the specified suffix</returns>
330  */
331  private bool suffix(string value, string suffix)
332  {
333 
334  // be-safe !!!
335  if ((value == null) || (suffix == null))
336  {
337  return false;
338  }
339 
340  if (suffix.Length > value.Length)
341  {
342  return false;
343  }
344 
345  return value.Substring(value.Length - suffix.Length).Equals(suffix);
346  }
347 
348  /*
349  * Replace a string suffix by another
350  *
351  * <returns>the replaced string</returns>
352  */
353  private string replaceSuffix(string value, string toReplace, string changeTo)
354  {
355  string vvalue;
356 
357  // be-safe !!!
358  if ((value == null) ||
359  (toReplace == null) ||
360  (changeTo == null))
361  {
362  return value;
363  }
364 
365  vvalue = removeSuffix(value, toReplace);
366 
367  if (value.Equals(vvalue))
368  {
369  return value;
370  }
371  else
372  {
373  return vvalue + changeTo;
374  }
375  }
376 
377  /*
378  * Remove a string suffix
379  *
380  * <returns>the string without the suffix</returns>
381  */
382  private string removeSuffix(string value, string toRemove)
383  {
384  // be-safe !!!
385  if ((value == null) ||
386  (toRemove == null) ||
387  !suffix(value, toRemove))
388  {
389  return value;
390  }
391 
392  return value.Substring(0, value.Length - toRemove.Length);
393  }
394 
395  /*
396  * See if a suffix is preceded by a string
397  *
398  * <returns>true if the suffix is preceded</returns>
399  */
400  private bool suffixPreceded(string value, string _suffix, string preceded)
401  {
402  // be-safe !!!
403  if ((value == null) ||
404  (_suffix == null) ||
405  (preceded == null) ||
406  !suffix(value, _suffix))
407  {
408  return false;
409  }
410 
411  return suffix(removeSuffix(value, _suffix), preceded);
412  }
413 
414 
415 
416 
417  /*
418  * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
419  */
420  private void createCT(string term)
421  {
422  CT = changeTerm(term);
423 
424  if (CT.Length < 2) return;
425 
426  // if the first character is ... , remove it
427  if ((CT[0] == '"') ||
428  (CT[0] == '\'') ||
429  (CT[0] == '-') ||
430  (CT[0] == ',') ||
431  (CT[0] == ';') ||
432  (CT[0] == '.') ||
433  (CT[0] == '?') ||
434  (CT[0] == '!')
435  )
436  {
437  CT = CT.Substring(1);
438  }
439 
440  if (CT.Length < 2) return;
441 
442  // if the last character is ... , remove it
443  if ((CT[CT.Length - 1] == '-') ||
444  (CT[CT.Length - 1] == ',') ||
445  (CT[CT.Length - 1] == ';') ||
446  (CT[CT.Length - 1] == '.') ||
447  (CT[CT.Length - 1] == '?') ||
448  (CT[CT.Length - 1] == '!') ||
449  (CT[CT.Length - 1] == '\'') ||
450  (CT[CT.Length - 1] == '"')
451  )
452  {
453  CT = CT.Substring(0, CT.Length - 1);
454  }
455  }
456 
457 
458  /*
459  * Standart suffix removal.
460  * Search for the longest among the following suffixes, and perform
461  * the following actions:
462  *
463  * <returns>false if no ending was removed</returns>
464  */
465  private bool step1()
466  {
467  if (CT == null) return false;
468 
469  // suffix lenght = 7
470  if (suffix(CT, "uciones") && suffix(R2, "uciones"))
471  {
472  CT = replaceSuffix(CT, "uciones", "u"); return true;
473  }
474 
475  // suffix lenght = 6
476  if (CT.Length >= 6)
477  {
478  if (suffix(CT, "imentos") && suffix(R2, "imentos"))
479  {
480  CT = removeSuffix(CT, "imentos"); return true;
481  }
482  if (suffix(CT, "amentos") && suffix(R2, "amentos"))
483  {
484  CT = removeSuffix(CT, "amentos"); return true;
485  }
486  if (suffix(CT, "adores") && suffix(R2, "adores"))
487  {
488  CT = removeSuffix(CT, "adores"); return true;
489  }
490  if (suffix(CT, "adoras") && suffix(R2, "adoras"))
491  {
492  CT = removeSuffix(CT, "adoras"); return true;
493  }
494  if (suffix(CT, "logias") && suffix(R2, "logias"))
495  {
496  replaceSuffix(CT, "logias", "log"); return true;
497  }
498  if (suffix(CT, "encias") && suffix(R2, "encias"))
499  {
500  CT = replaceSuffix(CT, "encias", "ente"); return true;
501  }
502  if (suffix(CT, "amente") && suffix(R1, "amente"))
503  {
504  CT = removeSuffix(CT, "amente"); return true;
505  }
506  if (suffix(CT, "idades") && suffix(R2, "idades"))
507  {
508  CT = removeSuffix(CT, "idades"); return true;
509  }
510  }
511 
512  // suffix lenght = 5
513  if (CT.Length >= 5)
514  {
515  if (suffix(CT, "acoes") && suffix(R2, "acoes"))
516  {
517  CT = removeSuffix(CT, "acoes"); return true;
518  }
519  if (suffix(CT, "imento") && suffix(R2, "imento"))
520  {
521  CT = removeSuffix(CT, "imento"); return true;
522  }
523  if (suffix(CT, "amento") && suffix(R2, "amento"))
524  {
525  CT = removeSuffix(CT, "amento"); return true;
526  }
527  if (suffix(CT, "adora") && suffix(R2, "adora"))
528  {
529  CT = removeSuffix(CT, "adora"); return true;
530  }
531  if (suffix(CT, "ismos") && suffix(R2, "ismos"))
532  {
533  CT = removeSuffix(CT, "ismos"); return true;
534  }
535  if (suffix(CT, "istas") && suffix(R2, "istas"))
536  {
537  CT = removeSuffix(CT, "istas"); return true;
538  }
539  if (suffix(CT, "logia") && suffix(R2, "logia"))
540  {
541  CT = replaceSuffix(CT, "logia", "log"); return true;
542  }
543  if (suffix(CT, "ucion") && suffix(R2, "ucion"))
544  {
545  CT = replaceSuffix(CT, "ucion", "u"); return true;
546  }
547  if (suffix(CT, "encia") && suffix(R2, "encia"))
548  {
549  CT = replaceSuffix(CT, "encia", "ente"); return true;
550  }
551  if (suffix(CT, "mente") && suffix(R2, "mente"))
552  {
553  CT = removeSuffix(CT, "mente"); return true;
554  }
555  if (suffix(CT, "idade") && suffix(R2, "idade"))
556  {
557  CT = removeSuffix(CT, "idade"); return true;
558  }
559  }
560 
561  // suffix lenght = 4
562  if (CT.Length >= 4)
563  {
564  if (suffix(CT, "acao") && suffix(R2, "acao"))
565  {
566  CT = removeSuffix(CT, "acao"); return true;
567  }
568  if (suffix(CT, "ezas") && suffix(R2, "ezas"))
569  {
570  CT = removeSuffix(CT, "ezas"); return true;
571  }
572  if (suffix(CT, "icos") && suffix(R2, "icos"))
573  {
574  CT = removeSuffix(CT, "icos"); return true;
575  }
576  if (suffix(CT, "icas") && suffix(R2, "icas"))
577  {
578  CT = removeSuffix(CT, "icas"); return true;
579  }
580  if (suffix(CT, "ismo") && suffix(R2, "ismo"))
581  {
582  CT = removeSuffix(CT, "ismo"); return true;
583  }
584  if (suffix(CT, "avel") && suffix(R2, "avel"))
585  {
586  CT = removeSuffix(CT, "avel"); return true;
587  }
588  if (suffix(CT, "ivel") && suffix(R2, "ivel"))
589  {
590  CT = removeSuffix(CT, "ivel"); return true;
591  }
592  if (suffix(CT, "ista") && suffix(R2, "ista"))
593  {
594  CT = removeSuffix(CT, "ista"); return true;
595  }
596  if (suffix(CT, "osos") && suffix(R2, "osos"))
597  {
598  CT = removeSuffix(CT, "osos"); return true;
599  }
600  if (suffix(CT, "osas") && suffix(R2, "osas"))
601  {
602  CT = removeSuffix(CT, "osas"); return true;
603  }
604  if (suffix(CT, "ador") && suffix(R2, "ador"))
605  {
606  CT = removeSuffix(CT, "ador"); return true;
607  }
608  if (suffix(CT, "ivas") && suffix(R2, "ivas"))
609  {
610  CT = removeSuffix(CT, "ivas"); return true;
611  }
612  if (suffix(CT, "ivos") && suffix(R2, "ivos"))
613  {
614  CT = removeSuffix(CT, "ivos"); return true;
615  }
616  if (suffix(CT, "iras") &&
617  suffix(RV, "iras") &&
618  suffixPreceded(CT, "iras", "e"))
619  {
620  CT = replaceSuffix(CT, "iras", "ir"); return true;
621  }
622  }
623 
624  // suffix lenght = 3
625  if (CT.Length >= 3)
626  {
627  if (suffix(CT, "eza") && suffix(R2, "eza"))
628  {
629  CT = removeSuffix(CT, "eza"); return true;
630  }
631  if (suffix(CT, "ico") && suffix(R2, "ico"))
632  {
633  CT = removeSuffix(CT, "ico"); return true;
634  }
635  if (suffix(CT, "ica") && suffix(R2, "ica"))
636  {
637  CT = removeSuffix(CT, "ica"); return true;
638  }
639  if (suffix(CT, "oso") && suffix(R2, "oso"))
640  {
641  CT = removeSuffix(CT, "oso"); return true;
642  }
643  if (suffix(CT, "osa") && suffix(R2, "osa"))
644  {
645  CT = removeSuffix(CT, "osa"); return true;
646  }
647  if (suffix(CT, "iva") && suffix(R2, "iva"))
648  {
649  CT = removeSuffix(CT, "iva"); return true;
650  }
651  if (suffix(CT, "ivo") && suffix(R2, "ivo"))
652  {
653  CT = removeSuffix(CT, "ivo"); return true;
654  }
655  if (suffix(CT, "ira") &&
656  suffix(RV, "ira") &&
657  suffixPreceded(CT, "ira", "e"))
658  {
659  CT = replaceSuffix(CT, "ira", "ir"); return true;
660  }
661  }
662 
663  // no ending was removed by step1
664  return false;
665  }
666 
667 
668  /*
669  * Verb suffixes.
670  *
671  * Search for the longest among the following suffixes in RV,
672  * and if found, delete.
673  *
674  * <returns>false if no ending was removed</returns>
675  */
676  private bool step2()
677  {
678  if (RV == null) return false;
679 
680  // suffix lenght = 7
681  if (RV.Length >= 7)
682  {
683  if (suffix(RV, "issemos"))
684  {
685  CT = removeSuffix(CT, "issemos"); return true;
686  }
687  if (suffix(RV, "essemos"))
688  {
689  CT = removeSuffix(CT, "essemos"); return true;
690  }
691  if (suffix(RV, "assemos"))
692  {
693  CT = removeSuffix(CT, "assemos"); return true;
694  }
695  if (suffix(RV, "ariamos"))
696  {
697  CT = removeSuffix(CT, "ariamos"); return true;
698  }
699  if (suffix(RV, "eriamos"))
700  {
701  CT = removeSuffix(CT, "eriamos"); return true;
702  }
703  if (suffix(RV, "iriamos"))
704  {
705  CT = removeSuffix(CT, "iriamos"); return true;
706  }
707  }
708 
709  // suffix lenght = 6
710  if (RV.Length >= 6)
711  {
712  if (suffix(RV, "iremos"))
713  {
714  CT = removeSuffix(CT, "iremos"); return true;
715  }
716  if (suffix(RV, "eremos"))
717  {
718  CT = removeSuffix(CT, "eremos"); return true;
719  }
720  if (suffix(RV, "aremos"))
721  {
722  CT = removeSuffix(CT, "aremos"); return true;
723  }
724  if (suffix(RV, "avamos"))
725  {
726  CT = removeSuffix(CT, "avamos"); return true;
727  }
728  if (suffix(RV, "iramos"))
729  {
730  CT = removeSuffix(CT, "iramos"); return true;
731  }
732  if (suffix(RV, "eramos"))
733  {
734  CT = removeSuffix(CT, "eramos"); return true;
735  }
736  if (suffix(RV, "aramos"))
737  {
738  CT = removeSuffix(CT, "aramos"); return true;
739  }
740  if (suffix(RV, "asseis"))
741  {
742  CT = removeSuffix(CT, "asseis"); return true;
743  }
744  if (suffix(RV, "esseis"))
745  {
746  CT = removeSuffix(CT, "esseis"); return true;
747  }
748  if (suffix(RV, "isseis"))
749  {
750  CT = removeSuffix(CT, "isseis"); return true;
751  }
752  if (suffix(RV, "arieis"))
753  {
754  CT = removeSuffix(CT, "arieis"); return true;
755  }
756  if (suffix(RV, "erieis"))
757  {
758  CT = removeSuffix(CT, "erieis"); return true;
759  }
760  if (suffix(RV, "irieis"))
761  {
762  CT = removeSuffix(CT, "irieis"); return true;
763  }
764  }
765 
766 
767  // suffix lenght = 5
768  if (RV.Length >= 5)
769  {
770  if (suffix(RV, "irmos"))
771  {
772  CT = removeSuffix(CT, "irmos"); return true;
773  }
774  if (suffix(RV, "iamos"))
775  {
776  CT = removeSuffix(CT, "iamos"); return true;
777  }
778  if (suffix(RV, "armos"))
779  {
780  CT = removeSuffix(CT, "armos"); return true;
781  }
782  if (suffix(RV, "ermos"))
783  {
784  CT = removeSuffix(CT, "ermos"); return true;
785  }
786  if (suffix(RV, "areis"))
787  {
788  CT = removeSuffix(CT, "areis"); return true;
789  }
790  if (suffix(RV, "ereis"))
791  {
792  CT = removeSuffix(CT, "ereis"); return true;
793  }
794  if (suffix(RV, "ireis"))
795  {
796  CT = removeSuffix(CT, "ireis"); return true;
797  }
798  if (suffix(RV, "asses"))
799  {
800  CT = removeSuffix(CT, "asses"); return true;
801  }
802  if (suffix(RV, "esses"))
803  {
804  CT = removeSuffix(CT, "esses"); return true;
805  }
806  if (suffix(RV, "isses"))
807  {
808  CT = removeSuffix(CT, "isses"); return true;
809  }
810  if (suffix(RV, "astes"))
811  {
812  CT = removeSuffix(CT, "astes"); return true;
813  }
814  if (suffix(RV, "assem"))
815  {
816  CT = removeSuffix(CT, "assem"); return true;
817  }
818  if (suffix(RV, "essem"))
819  {
820  CT = removeSuffix(CT, "essem"); return true;
821  }
822  if (suffix(RV, "issem"))
823  {
824  CT = removeSuffix(CT, "issem"); return true;
825  }
826  if (suffix(RV, "ardes"))
827  {
828  CT = removeSuffix(CT, "ardes"); return true;
829  }
830  if (suffix(RV, "erdes"))
831  {
832  CT = removeSuffix(CT, "erdes"); return true;
833  }
834  if (suffix(RV, "irdes"))
835  {
836  CT = removeSuffix(CT, "irdes"); return true;
837  }
838  if (suffix(RV, "ariam"))
839  {
840  CT = removeSuffix(CT, "ariam"); return true;
841  }
842  if (suffix(RV, "eriam"))
843  {
844  CT = removeSuffix(CT, "eriam"); return true;
845  }
846  if (suffix(RV, "iriam"))
847  {
848  CT = removeSuffix(CT, "iriam"); return true;
849  }
850  if (suffix(RV, "arias"))
851  {
852  CT = removeSuffix(CT, "arias"); return true;
853  }
854  if (suffix(RV, "erias"))
855  {
856  CT = removeSuffix(CT, "erias"); return true;
857  }
858  if (suffix(RV, "irias"))
859  {
860  CT = removeSuffix(CT, "irias"); return true;
861  }
862  if (suffix(RV, "estes"))
863  {
864  CT = removeSuffix(CT, "estes"); return true;
865  }
866  if (suffix(RV, "istes"))
867  {
868  CT = removeSuffix(CT, "istes"); return true;
869  }
870  if (suffix(RV, "areis"))
871  {
872  CT = removeSuffix(CT, "areis"); return true;
873  }
874  if (suffix(RV, "aveis"))
875  {
876  CT = removeSuffix(CT, "aveis"); return true;
877  }
878  }
879 
880  // suffix lenght = 4
881  if (RV.Length >= 4)
882  {
883  if (suffix(RV, "aria"))
884  {
885  CT = removeSuffix(CT, "aria"); return true;
886  }
887  if (suffix(RV, "eria"))
888  {
889  CT = removeSuffix(CT, "eria"); return true;
890  }
891  if (suffix(RV, "iria"))
892  {
893  CT = removeSuffix(CT, "iria"); return true;
894  }
895  if (suffix(RV, "asse"))
896  {
897  CT = removeSuffix(CT, "asse"); return true;
898  }
899  if (suffix(RV, "esse"))
900  {
901  CT = removeSuffix(CT, "esse"); return true;
902  }
903  if (suffix(RV, "isse"))
904  {
905  CT = removeSuffix(CT, "isse"); return true;
906  }
907  if (suffix(RV, "aste"))
908  {
909  CT = removeSuffix(CT, "aste"); return true;
910  }
911  if (suffix(RV, "este"))
912  {
913  CT = removeSuffix(CT, "este"); return true;
914  }
915  if (suffix(RV, "iste"))
916  {
917  CT = removeSuffix(CT, "iste"); return true;
918  }
919  if (suffix(RV, "arei"))
920  {
921  CT = removeSuffix(CT, "arei"); return true;
922  }
923  if (suffix(RV, "erei"))
924  {
925  CT = removeSuffix(CT, "erei"); return true;
926  }
927  if (suffix(RV, "irei"))
928  {
929  CT = removeSuffix(CT, "irei"); return true;
930  }
931  if (suffix(RV, "aram"))
932  {
933  CT = removeSuffix(CT, "aram"); return true;
934  }
935  if (suffix(RV, "eram"))
936  {
937  CT = removeSuffix(CT, "eram"); return true;
938  }
939  if (suffix(RV, "iram"))
940  {
941  CT = removeSuffix(CT, "iram"); return true;
942  }
943  if (suffix(RV, "avam"))
944  {
945  CT = removeSuffix(CT, "avam"); return true;
946  }
947  if (suffix(RV, "arem"))
948  {
949  CT = removeSuffix(CT, "arem"); return true;
950  }
951  if (suffix(RV, "erem"))
952  {
953  CT = removeSuffix(CT, "erem"); return true;
954  }
955  if (suffix(RV, "irem"))
956  {
957  CT = removeSuffix(CT, "irem"); return true;
958  }
959  if (suffix(RV, "ando"))
960  {
961  CT = removeSuffix(CT, "ando"); return true;
962  }
963  if (suffix(RV, "endo"))
964  {
965  CT = removeSuffix(CT, "endo"); return true;
966  }
967  if (suffix(RV, "indo"))
968  {
969  CT = removeSuffix(CT, "indo"); return true;
970  }
971  if (suffix(RV, "arao"))
972  {
973  CT = removeSuffix(CT, "arao"); return true;
974  }
975  if (suffix(RV, "erao"))
976  {
977  CT = removeSuffix(CT, "erao"); return true;
978  }
979  if (suffix(RV, "irao"))
980  {
981  CT = removeSuffix(CT, "irao"); return true;
982  }
983  if (suffix(RV, "adas"))
984  {
985  CT = removeSuffix(CT, "adas"); return true;
986  }
987  if (suffix(RV, "idas"))
988  {
989  CT = removeSuffix(CT, "idas"); return true;
990  }
991  if (suffix(RV, "aras"))
992  {
993  CT = removeSuffix(CT, "aras"); return true;
994  }
995  if (suffix(RV, "eras"))
996  {
997  CT = removeSuffix(CT, "eras"); return true;
998  }
999  if (suffix(RV, "iras"))
1000  {
1001  CT = removeSuffix(CT, "iras"); return true;
1002  }
1003  if (suffix(RV, "avas"))
1004  {
1005  CT = removeSuffix(CT, "avas"); return true;
1006  }
1007  if (suffix(RV, "ares"))
1008  {
1009  CT = removeSuffix(CT, "ares"); return true;
1010  }
1011  if (suffix(RV, "eres"))
1012  {
1013  CT = removeSuffix(CT, "eres"); return true;
1014  }
1015  if (suffix(RV, "ires"))
1016  {
1017  CT = removeSuffix(CT, "ires"); return true;
1018  }
1019  if (suffix(RV, "ados"))
1020  {
1021  CT = removeSuffix(CT, "ados"); return true;
1022  }
1023  if (suffix(RV, "idos"))
1024  {
1025  CT = removeSuffix(CT, "idos"); return true;
1026  }
1027  if (suffix(RV, "amos"))
1028  {
1029  CT = removeSuffix(CT, "amos"); return true;
1030  }
1031  if (suffix(RV, "emos"))
1032  {
1033  CT = removeSuffix(CT, "emos"); return true;
1034  }
1035  if (suffix(RV, "imos"))
1036  {
1037  CT = removeSuffix(CT, "imos"); return true;
1038  }
1039  if (suffix(RV, "iras"))
1040  {
1041  CT = removeSuffix(CT, "iras"); return true;
1042  }
1043  if (suffix(RV, "ieis"))
1044  {
1045  CT = removeSuffix(CT, "ieis"); return true;
1046  }
1047  }
1048 
1049  // suffix lenght = 3
1050  if (RV.Length >= 3)
1051  {
1052  if (suffix(RV, "ada"))
1053  {
1054  CT = removeSuffix(CT, "ada"); return true;
1055  }
1056  if (suffix(RV, "ida"))
1057  {
1058  CT = removeSuffix(CT, "ida"); return true;
1059  }
1060  if (suffix(RV, "ara"))
1061  {
1062  CT = removeSuffix(CT, "ara"); return true;
1063  }
1064  if (suffix(RV, "era"))
1065  {
1066  CT = removeSuffix(CT, "era"); return true;
1067  }
1068  if (suffix(RV, "ira"))
1069  {
1070  CT = removeSuffix(CT, "ava"); return true;
1071  }
1072  if (suffix(RV, "iam"))
1073  {
1074  CT = removeSuffix(CT, "iam"); return true;
1075  }
1076  if (suffix(RV, "ado"))
1077  {
1078  CT = removeSuffix(CT, "ado"); return true;
1079  }
1080  if (suffix(RV, "ido"))
1081  {
1082  CT = removeSuffix(CT, "ido"); return true;
1083  }
1084  if (suffix(RV, "ias"))
1085  {
1086  CT = removeSuffix(CT, "ias"); return true;
1087  }
1088  if (suffix(RV, "ais"))
1089  {
1090  CT = removeSuffix(CT, "ais"); return true;
1091  }
1092  if (suffix(RV, "eis"))
1093  {
1094  CT = removeSuffix(CT, "eis"); return true;
1095  }
1096  if (suffix(RV, "ira"))
1097  {
1098  CT = removeSuffix(CT, "ira"); return true;
1099  }
1100  if (suffix(RV, "ear"))
1101  {
1102  CT = removeSuffix(CT, "ear"); return true;
1103  }
1104  }
1105 
1106  // suffix lenght = 2
1107  if (RV.Length >= 2)
1108  {
1109  if (suffix(RV, "ia"))
1110  {
1111  CT = removeSuffix(CT, "ia"); return true;
1112  }
1113  if (suffix(RV, "ei"))
1114  {
1115  CT = removeSuffix(CT, "ei"); return true;
1116  }
1117  if (suffix(RV, "am"))
1118  {
1119  CT = removeSuffix(CT, "am"); return true;
1120  }
1121  if (suffix(RV, "em"))
1122  {
1123  CT = removeSuffix(CT, "em"); return true;
1124  }
1125  if (suffix(RV, "ar"))
1126  {
1127  CT = removeSuffix(CT, "ar"); return true;
1128  }
1129  if (suffix(RV, "er"))
1130  {
1131  CT = removeSuffix(CT, "er"); return true;
1132  }
1133  if (suffix(RV, "ir"))
1134  {
1135  CT = removeSuffix(CT, "ir"); return true;
1136  }
1137  if (suffix(RV, "as"))
1138  {
1139  CT = removeSuffix(CT, "as"); return true;
1140  }
1141  if (suffix(RV, "es"))
1142  {
1143  CT = removeSuffix(CT, "es"); return true;
1144  }
1145  if (suffix(RV, "is"))
1146  {
1147  CT = removeSuffix(CT, "is"); return true;
1148  }
1149  if (suffix(RV, "eu"))
1150  {
1151  CT = removeSuffix(CT, "eu"); return true;
1152  }
1153  if (suffix(RV, "iu"))
1154  {
1155  CT = removeSuffix(CT, "iu"); return true;
1156  }
1157  if (suffix(RV, "iu"))
1158  {
1159  CT = removeSuffix(CT, "iu"); return true;
1160  }
1161  if (suffix(RV, "ou"))
1162  {
1163  CT = removeSuffix(CT, "ou"); return true;
1164  }
1165  }
1166 
1167  // no ending was removed by step2
1168  return false;
1169  }
1170 
1171  /*
1172  * Delete suffix 'i' if in RV and preceded by 'c'
1173  *
1174  */
1175  private void step3()
1176  {
1177  if (RV == null) return;
1178 
1179  if (suffix(RV, "i") && suffixPreceded(RV, "i", "c"))
1180  {
1181  CT = removeSuffix(CT, "i");
1182  }
1183 
1184  }
1185 
1186  /*
1187  * Residual suffix
1188  *
1189  * If the word ends with one of the suffixes (os a i o á í ó)
1190  * in RV, delete it
1191  *
1192  */
1193  private void step4()
1194  {
1195  if (RV == null) return;
1196 
1197  if (suffix(RV, "os"))
1198  {
1199  CT = removeSuffix(CT, "os"); return;
1200  }
1201  if (suffix(RV, "a"))
1202  {
1203  CT = removeSuffix(CT, "a"); return;
1204  }
1205  if (suffix(RV, "i"))
1206  {
1207  CT = removeSuffix(CT, "i"); return;
1208  }
1209  if (suffix(RV, "o"))
1210  {
1211  CT = removeSuffix(CT, "o"); return;
1212  }
1213 
1214  }
1215 
1216  /*
1217  * If the word ends with one of ( e é ê) in RV,delete it,
1218  * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
1219  * delete the 'u' (or 'i')
1220  *
1221  * Or if the word ends ç remove the cedilha
1222  *
1223  */
1224  private void step5()
1225  {
1226  if (RV == null) return;
1227 
1228  if (suffix(RV, "e"))
1229  {
1230  if (suffixPreceded(RV, "e", "gu"))
1231  {
1232  CT = removeSuffix(CT, "e");
1233  CT = removeSuffix(CT, "u");
1234  return;
1235  }
1236 
1237  if (suffixPreceded(RV, "e", "ci"))
1238  {
1239  CT = removeSuffix(CT, "e");
1240  CT = removeSuffix(CT, "i");
1241  return;
1242  }
1243 
1244  CT = removeSuffix(CT, "e"); return;
1245  }
1246  }
1247 
1248  /*
1249  * For log and debug purpose
1250  *
1251  * <returns> TERM, CT, RV, R1 and R2</returns>
1252  */
1253  public string Log()
1254  {
1255  return " (TERM = " + TERM + ")" +
1256  " (CT = " + CT + ")" +
1257  " (RV = " + RV + ")" +
1258  " (R1 = " + R1 + ")" +
1259  " (R2 = " + R2 + ")";
1260  }
1261 
1262  }
1263 
1264 }