25 namespace Lucene.Net.Analysis.Fr
43 private StringBuilder sb =
new StringBuilder();
48 private StringBuilder tb =
new StringBuilder();
86 private bool modified;
95 protected internal String Stem( String term ) {
96 if ( !IsStemmable( term ) ) {
101 term = term.ToLower();
105 sb.Insert( 0, term );
111 sb = TreatVowels( sb );
117 if (!modified || suite)
127 if (modified || suite)
136 return sb.ToString();
143 private void SetStrings() {
146 RV = RetrieveRV( sb );
147 R1 = RetrieveR( sb );
152 R2 = RetrieveR( tb );
162 private void Step1( ) {
163 String[] suffix = {
"ances",
"iqUes",
"ismes",
"ables",
"istes",
"ance",
"iqUe",
"isme",
"able",
"iste" };
164 DeleteFrom( R2, suffix );
166 ReplaceFrom( R2,
new String[] {
"logies",
"logie" },
"log" );
167 ReplaceFrom( R2,
new String[] {
"usions",
"utions",
"usion",
"ution" },
"u" );
168 ReplaceFrom( R2,
new String[] {
"ences",
"ence" },
"ent" );
170 String[] search = {
"atrices",
"ateurs",
"ations",
"atrice",
"ateur",
"ation"};
171 DeleteButSuffixFromElseReplace( R2, search,
"ic",
true, R0,
"iqU" );
173 DeleteButSuffixFromElseReplace( R2,
new String[] {
"ements",
"ement" },
"eus",
false, R0,
"eux" );
174 DeleteButSuffixFrom( R2,
new String[] {
"ements",
"ement" },
"ativ", false );
175 DeleteButSuffixFrom( R2,
new String[] {
"ements",
"ement" },
"iv", false );
176 DeleteButSuffixFrom( R2,
new String[] {
"ements",
"ement" },
"abl", false );
177 DeleteButSuffixFrom( R2,
new String[] {
"ements",
"ement" },
"iqU", false );
179 DeleteFromIfTestVowelBeforeIn( R1,
new String[] {
"issements",
"issement" },
false, R0 );
180 DeleteFrom( RV,
new String[] {
"ements",
"ement" } );
182 DeleteButSuffixFromElseReplace(R2,
new [] {
"it\u00e9s",
"it\u00e9" },
"abil",
false, R0,
"abl");
183 DeleteButSuffixFromElseReplace(R2,
new [] {
"it\u00e9s",
"it\u00e9" },
"ic",
false, R0,
"iqU");
184 DeleteButSuffixFrom(R2,
new [] {
"it\u00e9s",
"it\u00e9" },
"iv",
true);
186 String[] autre = {
"ifs",
"ives",
"if",
"ive" };
187 DeleteButSuffixFromElseReplace( R2, autre,
"icat",
false, R0,
"iqU" );
188 DeleteButSuffixFromElseReplace( R2, autre,
"at",
true, R2,
"iqU" );
190 ReplaceFrom( R0,
new String[] {
"eaux" },
"eau" );
192 ReplaceFrom( R1,
new String[] {
"aux" },
"al" );
194 DeleteButSuffixFromElseReplace( R2,
new String[] {
"euses",
"euse" },
"",
true, R1,
"eux" );
196 DeleteFrom( R2,
new String[] {
"eux" } );
200 temp = ReplaceFrom( RV,
new String[] {
"amment" },
"ant" );
203 temp = ReplaceFrom( RV,
new String[] {
"emment" },
"ent" );
206 temp = DeleteFromIfTestVowelBeforeIn( RV,
new String[] {
"ments",
"ment" },
true, RV );
220 private bool Step2A() {
221 String[] search = {
"\u00eemes",
"\u00eetes",
"iraIent",
"irait",
"irais",
"irai",
"iras",
"ira",
222 "irent",
"iriez",
"irez",
"irions",
"irons",
"iront",
223 "issaIent",
"issais",
"issantes",
"issante",
"issants",
"issant",
224 "issait",
"issais",
"issions",
"issons",
"issiez",
"issez",
"issent",
225 "isses",
"isse",
"ir",
"is",
"\u00eet",
"it",
"ies",
"ie",
"i" };
226 return DeleteFromIfTestVowelBeforeIn( RV, search,
false, RV );
234 private void Step2B() {
235 String[] suffix = {
"eraIent",
"erais",
"erait",
"erai",
"eras",
"erions",
"eriez",
236 "erons",
"eront",
"erez",
"\u00e8rent",
"era",
"\u00e9es",
"iez",
237 "\u00e9e",
"\u00e9s",
"er",
"ez",
"\u00e9" };
238 DeleteFrom( RV, suffix );
240 String[] search = {
"assions",
"assiez",
"assent",
"asses",
"asse",
"aIent",
241 "antes",
"aIent",
"Aient",
"ante",
"\u00e2mes",
"\u00e2tes",
"ants",
"ant",
242 "ait",
"a\u00eet",
"ais",
"Ait",
"A\u00eet",
"Ais",
"\u00e2t",
"as",
"ai",
"Ai",
"a" };
243 DeleteButSuffixFrom( RV, search,
"e",
true );
245 DeleteFrom( R2,
new String[] {
"ions" } );
252 private void Step3() {
255 char ch = sb[ sb.Length -1];
258 sb[sb.Length -1] =
'i' ;
263 sb[sb.Length -1] =
'c';
273 private void Step4() {
276 char ch = sb[ sb.Length -1];
279 char b = sb[ sb.Length -2];
280 if (b !=
'a' && b !=
'i' && b !=
'o' && b !=
'u' && b !=
'è' && b !=
's')
282 sb.Length = sb.Length - 1;
287 bool found = DeleteFromIfPrecededIn( R2,
new String[] {
"ion" }, RV,
"s" );
289 found = DeleteFromIfPrecededIn( R2,
new String[] {
"ion" }, RV,
"t" );
291 ReplaceFrom(RV,
new String[] {
"I\u00e8re",
"i\u00e8re",
"Ier",
"ier" },
"i");
292 DeleteFrom( RV,
new String[] {
"e" } );
293 DeleteFromIfPrecededIn(RV,
new String[] {
"\u00eb" }, R0,
"gu");
300 private void Step5() {
303 if (R0.EndsWith(
"enn") || R0.EndsWith(
"onn") || R0.EndsWith(
"ett") || R0.EndsWith(
"ell") || R0.EndsWith(
"eill"))
305 sb.Length = sb.Length - 1;
315 private void Step6() {
316 if (R0!=null && R0.Length>0)
318 bool seenVowel =
false;
319 bool seenConson =
false;
321 for (
int i = R0.Length-1; i > -1; i--)
328 if (ch ==
'é' || ch ==
'è')
344 if (pos > -1 && seenConson && !seenVowel)
358 private bool DeleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
362 for (
int i = 0; i < search.Length; i++) {
363 if ( source.EndsWith( search[i] ))
365 if (from!=null && from.EndsWith( prefix + search[i] ))
367 sb.Length = sb.Length - search[i].Length;
387 private bool DeleteFromIfTestVowelBeforeIn( String source, String[] search,
bool vowel, String from ) {
389 if (source!=null && from!=null)
391 for (
int i = 0; i < search.Length; i++) {
392 if ( source.EndsWith( search[i] ))
394 if ((search[i].Length + 1) <= from.Length)
396 bool test = IsVowel(sb[sb.Length -(search[i].Length+1)]);
399 sb.Length = sb.Length - search[i].Length;
420 private void DeleteButSuffixFrom( String source, String[] search, String prefix,
bool without ) {
423 for (
int i = 0; i < search.Length; i++) {
424 if ( source.EndsWith( prefix + search[i] ))
426 sb.Length = sb.Length - (prefix.Length + search[i].Length);
431 else if ( without && source.EndsWith( search[i] ))
433 sb.Length = sb.Length - search[i].Length;
452 private void DeleteButSuffixFromElseReplace( String source, String[] search, String prefix,
bool without, String from, String replace ) {
455 for (
int i = 0; i < search.Length; i++) {
456 if ( source.EndsWith( prefix + search[i] ))
458 sb.Length = sb.Length - (prefix.Length + search[i].Length);
463 else if ( from!=null && from.EndsWith( prefix + search[i] ))
466 sb.Length = sb.Length - (prefix.Length + search[i].Length);
473 else if ( without && source.EndsWith( search[i] ))
475 sb.Length = sb.Length - search[i].Length;
491 private bool ReplaceFrom( String source, String[] search, String replace ) {
495 for (
int i = 0; i < search.Length; i++) {
496 if ( source.EndsWith( search[i] ))
499 sb.Length = sb.Length - search[i].Length;
518 private void DeleteFrom(String source, String[] suffix ) {
521 for (
int i = 0; i < suffix.Length; i++) {
522 if (source.EndsWith( suffix[i] ))
524 sb.Length = sb.Length - suffix[i].Length;
539 private bool IsVowel(
char ch) {
573 private String RetrieveR( StringBuilder buffer ) {
574 int len = buffer.Length;
576 for (
int c = 0; c < len; c++) {
577 if (IsVowel( buffer[ c ] ))
586 for (
int c = pos; c < len; c++) {
587 if (!IsVowel(buffer[ c ] ))
593 if (consonne > -1 && (consonne+1) < len)
594 return buffer.ToString(consonne + 1, len - (consonne+1));
610 private String RetrieveRV( StringBuilder buffer ) {
611 int len = buffer.Length;
612 if ( buffer.Length > 3)
614 if ( IsVowel(buffer[ 0 ] ) && IsVowel(buffer[ 1 ] )) {
615 return buffer.ToString(3, len - 3);
620 for (
int c = 1; c < len; c++) {
621 if (IsVowel( buffer[ c ] ))
628 return buffer.ToString(pos + 1, len - (pos+1));
647 private StringBuilder TreatVowels( StringBuilder buffer ) {
648 for (
int c = 0; c < buffer.Length; c++ ) {
649 char ch = buffer[ c ] ;
655 if (ch ==
'y' && IsVowel(buffer[ c + 1 ] ))
659 else if (c == buffer.Length-1)
661 if (ch ==
'u' && buffer[ c - 1 ] ==
'q')
663 if (ch ==
'y' && IsVowel(buffer[ c - 1 ] ))
670 if (buffer[ c - 1] ==
'q')
672 else if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
677 if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
682 if (IsVowel(buffer[ c - 1 ] ) || IsVowel(buffer[ c + 1 ] ))
696 private bool IsStemmable( String term ) {
699 for (
int c = 0; c < term.Length; c++ ) {
701 if ( !
char.IsLetter( term[c] ) ) {
705 if (
char.IsUpper( term[ c] ) ) {