21 namespace Lucene.Net.Analysis.BR
47 public string Stem(
string term)
66 TERM = term +
";" + CT;
93 private bool isStemmable(
string term)
95 for (
int c = 0; c < term.Length; c++)
98 if (!
char.IsLetter(term[c]))
111 private bool isIndexable(
string term)
113 return (term.Length < 30) && (term.Length > 2);
121 private bool isVowel(
char value)
123 return (value ==
'a') ||
139 private string getR1(
string value)
151 i = value.Length - 1;
152 for (j = 0; j < i; j++)
154 if (isVowel(value[j]))
168 if (!(isVowel(value[j])))
179 return value.Substring(j + 1);
199 private string getRV(
string value)
210 i = value.Length - 1;
214 if ((i > 0) && !isVowel(value[1]))
217 for (j = 2; j < i; j++)
219 if (isVowel(value[j]))
227 return value.Substring(j + 1);
239 for (j = 2; j < i; j++)
241 if (!isVowel(value[j]))
249 return value.Substring(j + 1);
257 return value.Substring(3);
271 private string changeTerm(
string value)
282 value = value.ToLower();
283 for (j = 0; j < value.Length; j++)
285 if ((value[j] ==
'á') ||
289 r = r +
"a";
continue;
291 if ((value[j] ==
'é') ||
294 r = r +
"e";
continue;
298 r = r +
"i";
continue;
300 if ((value[j] ==
'ó') ||
304 r = r +
"o";
continue;
306 if ((value[j] ==
'ú') ||
309 r = r +
"u";
continue;
313 r = r +
"c";
continue;
317 r = r +
"n";
continue;
331 private bool suffix(
string value,
string suffix)
335 if ((value == null) || (suffix == null))
340 if (suffix.Length > value.Length)
345 return value.Substring(value.Length - suffix.Length).Equals(suffix);
353 private string replaceSuffix(
string value,
string toReplace,
string changeTo)
358 if ((value == null) ||
359 (toReplace == null) ||
365 vvalue = removeSuffix(value, toReplace);
367 if (value.Equals(vvalue))
373 return vvalue + changeTo;
382 private string removeSuffix(
string value,
string toRemove)
385 if ((value == null) ||
386 (toRemove == null) ||
387 !suffix(value, toRemove))
392 return value.Substring(0, value.Length - toRemove.Length);
400 private bool suffixPreceded(
string value,
string _suffix,
string preceded)
403 if ((value == null) ||
405 (preceded == null) ||
406 !suffix(value, _suffix))
411 return suffix(removeSuffix(value, _suffix), preceded);
420 private void createCT(
string term)
422 CT = changeTerm(term);
424 if (CT.Length < 2)
return;
427 if ((CT[0] ==
'"') ||
437 CT = CT.Substring(1);
440 if (CT.Length < 2)
return;
443 if ((CT[CT.Length - 1] ==
'-') ||
444 (CT[CT.Length - 1] ==
',') ||
445 (CT[CT.Length - 1] ==
';') ||
446 (CT[CT.Length - 1] ==
'.') ||
447 (CT[CT.Length - 1] ==
'?') ||
448 (CT[CT.Length - 1] ==
'!') ||
449 (CT[CT.Length - 1] ==
'\'') ||
450 (CT[CT.Length - 1] ==
'"')
453 CT = CT.Substring(0, CT.Length - 1);
467 if (CT == null)
return false;
470 if (suffix(CT,
"uciones") && suffix(R2,
"uciones"))
472 CT = replaceSuffix(CT,
"uciones",
"u");
return true;
478 if (suffix(CT,
"imentos") && suffix(R2,
"imentos"))
480 CT = removeSuffix(CT,
"imentos");
return true;
482 if (suffix(CT,
"amentos") && suffix(R2,
"amentos"))
484 CT = removeSuffix(CT,
"amentos");
return true;
486 if (suffix(CT,
"adores") && suffix(R2,
"adores"))
488 CT = removeSuffix(CT,
"adores");
return true;
490 if (suffix(CT,
"adoras") && suffix(R2,
"adoras"))
492 CT = removeSuffix(CT,
"adoras");
return true;
494 if (suffix(CT,
"logias") && suffix(R2,
"logias"))
496 replaceSuffix(CT,
"logias",
"log");
return true;
498 if (suffix(CT,
"encias") && suffix(R2,
"encias"))
500 CT = replaceSuffix(CT,
"encias",
"ente");
return true;
502 if (suffix(CT,
"amente") && suffix(R1,
"amente"))
504 CT = removeSuffix(CT,
"amente");
return true;
506 if (suffix(CT,
"idades") && suffix(R2,
"idades"))
508 CT = removeSuffix(CT,
"idades");
return true;
515 if (suffix(CT,
"acoes") && suffix(R2,
"acoes"))
517 CT = removeSuffix(CT,
"acoes");
return true;
519 if (suffix(CT,
"imento") && suffix(R2,
"imento"))
521 CT = removeSuffix(CT,
"imento");
return true;
523 if (suffix(CT,
"amento") && suffix(R2,
"amento"))
525 CT = removeSuffix(CT,
"amento");
return true;
527 if (suffix(CT,
"adora") && suffix(R2,
"adora"))
529 CT = removeSuffix(CT,
"adora");
return true;
531 if (suffix(CT,
"ismos") && suffix(R2,
"ismos"))
533 CT = removeSuffix(CT,
"ismos");
return true;
535 if (suffix(CT,
"istas") && suffix(R2,
"istas"))
537 CT = removeSuffix(CT,
"istas");
return true;
539 if (suffix(CT,
"logia") && suffix(R2,
"logia"))
541 CT = replaceSuffix(CT,
"logia",
"log");
return true;
543 if (suffix(CT,
"ucion") && suffix(R2,
"ucion"))
545 CT = replaceSuffix(CT,
"ucion",
"u");
return true;
547 if (suffix(CT,
"encia") && suffix(R2,
"encia"))
549 CT = replaceSuffix(CT,
"encia",
"ente");
return true;
551 if (suffix(CT,
"mente") && suffix(R2,
"mente"))
553 CT = removeSuffix(CT,
"mente");
return true;
555 if (suffix(CT,
"idade") && suffix(R2,
"idade"))
557 CT = removeSuffix(CT,
"idade");
return true;
564 if (suffix(CT,
"acao") && suffix(R2,
"acao"))
566 CT = removeSuffix(CT,
"acao");
return true;
568 if (suffix(CT,
"ezas") && suffix(R2,
"ezas"))
570 CT = removeSuffix(CT,
"ezas");
return true;
572 if (suffix(CT,
"icos") && suffix(R2,
"icos"))
574 CT = removeSuffix(CT,
"icos");
return true;
576 if (suffix(CT,
"icas") && suffix(R2,
"icas"))
578 CT = removeSuffix(CT,
"icas");
return true;
580 if (suffix(CT,
"ismo") && suffix(R2,
"ismo"))
582 CT = removeSuffix(CT,
"ismo");
return true;
584 if (suffix(CT,
"avel") && suffix(R2,
"avel"))
586 CT = removeSuffix(CT,
"avel");
return true;
588 if (suffix(CT,
"ivel") && suffix(R2,
"ivel"))
590 CT = removeSuffix(CT,
"ivel");
return true;
592 if (suffix(CT,
"ista") && suffix(R2,
"ista"))
594 CT = removeSuffix(CT,
"ista");
return true;
596 if (suffix(CT,
"osos") && suffix(R2,
"osos"))
598 CT = removeSuffix(CT,
"osos");
return true;
600 if (suffix(CT,
"osas") && suffix(R2,
"osas"))
602 CT = removeSuffix(CT,
"osas");
return true;
604 if (suffix(CT,
"ador") && suffix(R2,
"ador"))
606 CT = removeSuffix(CT,
"ador");
return true;
608 if (suffix(CT,
"ivas") && suffix(R2,
"ivas"))
610 CT = removeSuffix(CT,
"ivas");
return true;
612 if (suffix(CT,
"ivos") && suffix(R2,
"ivos"))
614 CT = removeSuffix(CT,
"ivos");
return true;
616 if (suffix(CT,
"iras") &&
617 suffix(RV,
"iras") &&
618 suffixPreceded(CT,
"iras",
"e"))
620 CT = replaceSuffix(CT,
"iras",
"ir");
return true;
627 if (suffix(CT,
"eza") && suffix(R2,
"eza"))
629 CT = removeSuffix(CT,
"eza");
return true;
631 if (suffix(CT,
"ico") && suffix(R2,
"ico"))
633 CT = removeSuffix(CT,
"ico");
return true;
635 if (suffix(CT,
"ica") && suffix(R2,
"ica"))
637 CT = removeSuffix(CT,
"ica");
return true;
639 if (suffix(CT,
"oso") && suffix(R2,
"oso"))
641 CT = removeSuffix(CT,
"oso");
return true;
643 if (suffix(CT,
"osa") && suffix(R2,
"osa"))
645 CT = removeSuffix(CT,
"osa");
return true;
647 if (suffix(CT,
"iva") && suffix(R2,
"iva"))
649 CT = removeSuffix(CT,
"iva");
return true;
651 if (suffix(CT,
"ivo") && suffix(R2,
"ivo"))
653 CT = removeSuffix(CT,
"ivo");
return true;
655 if (suffix(CT,
"ira") &&
657 suffixPreceded(CT,
"ira",
"e"))
659 CT = replaceSuffix(CT,
"ira",
"ir");
return true;
678 if (RV == null)
return false;
683 if (suffix(RV,
"issemos"))
685 CT = removeSuffix(CT,
"issemos");
return true;
687 if (suffix(RV,
"essemos"))
689 CT = removeSuffix(CT,
"essemos");
return true;
691 if (suffix(RV,
"assemos"))
693 CT = removeSuffix(CT,
"assemos");
return true;
695 if (suffix(RV,
"ariamos"))
697 CT = removeSuffix(CT,
"ariamos");
return true;
699 if (suffix(RV,
"eriamos"))
701 CT = removeSuffix(CT,
"eriamos");
return true;
703 if (suffix(RV,
"iriamos"))
705 CT = removeSuffix(CT,
"iriamos");
return true;
712 if (suffix(RV,
"iremos"))
714 CT = removeSuffix(CT,
"iremos");
return true;
716 if (suffix(RV,
"eremos"))
718 CT = removeSuffix(CT,
"eremos");
return true;
720 if (suffix(RV,
"aremos"))
722 CT = removeSuffix(CT,
"aremos");
return true;
724 if (suffix(RV,
"avamos"))
726 CT = removeSuffix(CT,
"avamos");
return true;
728 if (suffix(RV,
"iramos"))
730 CT = removeSuffix(CT,
"iramos");
return true;
732 if (suffix(RV,
"eramos"))
734 CT = removeSuffix(CT,
"eramos");
return true;
736 if (suffix(RV,
"aramos"))
738 CT = removeSuffix(CT,
"aramos");
return true;
740 if (suffix(RV,
"asseis"))
742 CT = removeSuffix(CT,
"asseis");
return true;
744 if (suffix(RV,
"esseis"))
746 CT = removeSuffix(CT,
"esseis");
return true;
748 if (suffix(RV,
"isseis"))
750 CT = removeSuffix(CT,
"isseis");
return true;
752 if (suffix(RV,
"arieis"))
754 CT = removeSuffix(CT,
"arieis");
return true;
756 if (suffix(RV,
"erieis"))
758 CT = removeSuffix(CT,
"erieis");
return true;
760 if (suffix(RV,
"irieis"))
762 CT = removeSuffix(CT,
"irieis");
return true;
770 if (suffix(RV,
"irmos"))
772 CT = removeSuffix(CT,
"irmos");
return true;
774 if (suffix(RV,
"iamos"))
776 CT = removeSuffix(CT,
"iamos");
return true;
778 if (suffix(RV,
"armos"))
780 CT = removeSuffix(CT,
"armos");
return true;
782 if (suffix(RV,
"ermos"))
784 CT = removeSuffix(CT,
"ermos");
return true;
786 if (suffix(RV,
"areis"))
788 CT = removeSuffix(CT,
"areis");
return true;
790 if (suffix(RV,
"ereis"))
792 CT = removeSuffix(CT,
"ereis");
return true;
794 if (suffix(RV,
"ireis"))
796 CT = removeSuffix(CT,
"ireis");
return true;
798 if (suffix(RV,
"asses"))
800 CT = removeSuffix(CT,
"asses");
return true;
802 if (suffix(RV,
"esses"))
804 CT = removeSuffix(CT,
"esses");
return true;
806 if (suffix(RV,
"isses"))
808 CT = removeSuffix(CT,
"isses");
return true;
810 if (suffix(RV,
"astes"))
812 CT = removeSuffix(CT,
"astes");
return true;
814 if (suffix(RV,
"assem"))
816 CT = removeSuffix(CT,
"assem");
return true;
818 if (suffix(RV,
"essem"))
820 CT = removeSuffix(CT,
"essem");
return true;
822 if (suffix(RV,
"issem"))
824 CT = removeSuffix(CT,
"issem");
return true;
826 if (suffix(RV,
"ardes"))
828 CT = removeSuffix(CT,
"ardes");
return true;
830 if (suffix(RV,
"erdes"))
832 CT = removeSuffix(CT,
"erdes");
return true;
834 if (suffix(RV,
"irdes"))
836 CT = removeSuffix(CT,
"irdes");
return true;
838 if (suffix(RV,
"ariam"))
840 CT = removeSuffix(CT,
"ariam");
return true;
842 if (suffix(RV,
"eriam"))
844 CT = removeSuffix(CT,
"eriam");
return true;
846 if (suffix(RV,
"iriam"))
848 CT = removeSuffix(CT,
"iriam");
return true;
850 if (suffix(RV,
"arias"))
852 CT = removeSuffix(CT,
"arias");
return true;
854 if (suffix(RV,
"erias"))
856 CT = removeSuffix(CT,
"erias");
return true;
858 if (suffix(RV,
"irias"))
860 CT = removeSuffix(CT,
"irias");
return true;
862 if (suffix(RV,
"estes"))
864 CT = removeSuffix(CT,
"estes");
return true;
866 if (suffix(RV,
"istes"))
868 CT = removeSuffix(CT,
"istes");
return true;
870 if (suffix(RV,
"areis"))
872 CT = removeSuffix(CT,
"areis");
return true;
874 if (suffix(RV,
"aveis"))
876 CT = removeSuffix(CT,
"aveis");
return true;
883 if (suffix(RV,
"aria"))
885 CT = removeSuffix(CT,
"aria");
return true;
887 if (suffix(RV,
"eria"))
889 CT = removeSuffix(CT,
"eria");
return true;
891 if (suffix(RV,
"iria"))
893 CT = removeSuffix(CT,
"iria");
return true;
895 if (suffix(RV,
"asse"))
897 CT = removeSuffix(CT,
"asse");
return true;
899 if (suffix(RV,
"esse"))
901 CT = removeSuffix(CT,
"esse");
return true;
903 if (suffix(RV,
"isse"))
905 CT = removeSuffix(CT,
"isse");
return true;
907 if (suffix(RV,
"aste"))
909 CT = removeSuffix(CT,
"aste");
return true;
911 if (suffix(RV,
"este"))
913 CT = removeSuffix(CT,
"este");
return true;
915 if (suffix(RV,
"iste"))
917 CT = removeSuffix(CT,
"iste");
return true;
919 if (suffix(RV,
"arei"))
921 CT = removeSuffix(CT,
"arei");
return true;
923 if (suffix(RV,
"erei"))
925 CT = removeSuffix(CT,
"erei");
return true;
927 if (suffix(RV,
"irei"))
929 CT = removeSuffix(CT,
"irei");
return true;
931 if (suffix(RV,
"aram"))
933 CT = removeSuffix(CT,
"aram");
return true;
935 if (suffix(RV,
"eram"))
937 CT = removeSuffix(CT,
"eram");
return true;
939 if (suffix(RV,
"iram"))
941 CT = removeSuffix(CT,
"iram");
return true;
943 if (suffix(RV,
"avam"))
945 CT = removeSuffix(CT,
"avam");
return true;
947 if (suffix(RV,
"arem"))
949 CT = removeSuffix(CT,
"arem");
return true;
951 if (suffix(RV,
"erem"))
953 CT = removeSuffix(CT,
"erem");
return true;
955 if (suffix(RV,
"irem"))
957 CT = removeSuffix(CT,
"irem");
return true;
959 if (suffix(RV,
"ando"))
961 CT = removeSuffix(CT,
"ando");
return true;
963 if (suffix(RV,
"endo"))
965 CT = removeSuffix(CT,
"endo");
return true;
967 if (suffix(RV,
"indo"))
969 CT = removeSuffix(CT,
"indo");
return true;
971 if (suffix(RV,
"arao"))
973 CT = removeSuffix(CT,
"arao");
return true;
975 if (suffix(RV,
"erao"))
977 CT = removeSuffix(CT,
"erao");
return true;
979 if (suffix(RV,
"irao"))
981 CT = removeSuffix(CT,
"irao");
return true;
983 if (suffix(RV,
"adas"))
985 CT = removeSuffix(CT,
"adas");
return true;
987 if (suffix(RV,
"idas"))
989 CT = removeSuffix(CT,
"idas");
return true;
991 if (suffix(RV,
"aras"))
993 CT = removeSuffix(CT,
"aras");
return true;
995 if (suffix(RV,
"eras"))
997 CT = removeSuffix(CT,
"eras");
return true;
999 if (suffix(RV,
"iras"))
1001 CT = removeSuffix(CT,
"iras");
return true;
1003 if (suffix(RV,
"avas"))
1005 CT = removeSuffix(CT,
"avas");
return true;
1007 if (suffix(RV,
"ares"))
1009 CT = removeSuffix(CT,
"ares");
return true;
1011 if (suffix(RV,
"eres"))
1013 CT = removeSuffix(CT,
"eres");
return true;
1015 if (suffix(RV,
"ires"))
1017 CT = removeSuffix(CT,
"ires");
return true;
1019 if (suffix(RV,
"ados"))
1021 CT = removeSuffix(CT,
"ados");
return true;
1023 if (suffix(RV,
"idos"))
1025 CT = removeSuffix(CT,
"idos");
return true;
1027 if (suffix(RV,
"amos"))
1029 CT = removeSuffix(CT,
"amos");
return true;
1031 if (suffix(RV,
"emos"))
1033 CT = removeSuffix(CT,
"emos");
return true;
1035 if (suffix(RV,
"imos"))
1037 CT = removeSuffix(CT,
"imos");
return true;
1039 if (suffix(RV,
"iras"))
1041 CT = removeSuffix(CT,
"iras");
return true;
1043 if (suffix(RV,
"ieis"))
1045 CT = removeSuffix(CT,
"ieis");
return true;
1052 if (suffix(RV,
"ada"))
1054 CT = removeSuffix(CT,
"ada");
return true;
1056 if (suffix(RV,
"ida"))
1058 CT = removeSuffix(CT,
"ida");
return true;
1060 if (suffix(RV,
"ara"))
1062 CT = removeSuffix(CT,
"ara");
return true;
1064 if (suffix(RV,
"era"))
1066 CT = removeSuffix(CT,
"era");
return true;
1068 if (suffix(RV,
"ira"))
1070 CT = removeSuffix(CT,
"ava");
return true;
1072 if (suffix(RV,
"iam"))
1074 CT = removeSuffix(CT,
"iam");
return true;
1076 if (suffix(RV,
"ado"))
1078 CT = removeSuffix(CT,
"ado");
return true;
1080 if (suffix(RV,
"ido"))
1082 CT = removeSuffix(CT,
"ido");
return true;
1084 if (suffix(RV,
"ias"))
1086 CT = removeSuffix(CT,
"ias");
return true;
1088 if (suffix(RV,
"ais"))
1090 CT = removeSuffix(CT,
"ais");
return true;
1092 if (suffix(RV,
"eis"))
1094 CT = removeSuffix(CT,
"eis");
return true;
1096 if (suffix(RV,
"ira"))
1098 CT = removeSuffix(CT,
"ira");
return true;
1100 if (suffix(RV,
"ear"))
1102 CT = removeSuffix(CT,
"ear");
return true;
1109 if (suffix(RV,
"ia"))
1111 CT = removeSuffix(CT,
"ia");
return true;
1113 if (suffix(RV,
"ei"))
1115 CT = removeSuffix(CT,
"ei");
return true;
1117 if (suffix(RV,
"am"))
1119 CT = removeSuffix(CT,
"am");
return true;
1121 if (suffix(RV,
"em"))
1123 CT = removeSuffix(CT,
"em");
return true;
1125 if (suffix(RV,
"ar"))
1127 CT = removeSuffix(CT,
"ar");
return true;
1129 if (suffix(RV,
"er"))
1131 CT = removeSuffix(CT,
"er");
return true;
1133 if (suffix(RV,
"ir"))
1135 CT = removeSuffix(CT,
"ir");
return true;
1137 if (suffix(RV,
"as"))
1139 CT = removeSuffix(CT,
"as");
return true;
1141 if (suffix(RV,
"es"))
1143 CT = removeSuffix(CT,
"es");
return true;
1145 if (suffix(RV,
"is"))
1147 CT = removeSuffix(CT,
"is");
return true;
1149 if (suffix(RV,
"eu"))
1151 CT = removeSuffix(CT,
"eu");
return true;
1153 if (suffix(RV,
"iu"))
1155 CT = removeSuffix(CT,
"iu");
return true;
1157 if (suffix(RV,
"iu"))
1159 CT = removeSuffix(CT,
"iu");
return true;
1161 if (suffix(RV,
"ou"))
1163 CT = removeSuffix(CT,
"ou");
return true;
1175 private void step3()
1177 if (RV == null)
return;
1179 if (suffix(RV,
"i") && suffixPreceded(RV,
"i",
"c"))
1181 CT = removeSuffix(CT,
"i");
1193 private void step4()
1195 if (RV == null)
return;
1197 if (suffix(RV,
"os"))
1199 CT = removeSuffix(CT,
"os");
return;
1201 if (suffix(RV,
"a"))
1203 CT = removeSuffix(CT,
"a");
return;
1205 if (suffix(RV,
"i"))
1207 CT = removeSuffix(CT,
"i");
return;
1209 if (suffix(RV,
"o"))
1211 CT = removeSuffix(CT,
"o");
return;
1224 private void step5()
1226 if (RV == null)
return;
1228 if (suffix(RV,
"e"))
1230 if (suffixPreceded(RV,
"e",
"gu"))
1232 CT = removeSuffix(CT,
"e");
1233 CT = removeSuffix(CT,
"u");
1237 if (suffixPreceded(RV,
"e",
"ci"))
1239 CT = removeSuffix(CT,
"e");
1240 CT = removeSuffix(CT,
"i");
1244 CT = removeSuffix(CT,
"e");
return;
1255 return " (TERM = " + TERM +
")" +
1256 " (CT = " + CT +
")" +
1257 " (RV = " + RV +
")" +
1258 " (R1 = " + R1 +
")" +
1259 " (R2 = " + R2 +
")";