25 using System.Collections;
26 using System.Collections.Generic;
28 namespace Lucene.Net.Analysis.Nl
44 private StringBuilder sb =
new StringBuilder();
45 private bool _removedE;
46 private IDictionary<string, string> _stemDict;
58 public String Stem(String term)
60 term = term.ToLower();
61 if (!isStemmable(term))
63 if (_stemDict != null && _stemDict.ContainsKey(term))
64 if (_stemDict[term] is String)
65 return (String)_stemDict[term];
75 _R1 = getRIndex(sb, 0);
76 _R1 = Math.Max(3, _R1);
79 _R2 = getRIndex(sb, _R1);
87 private bool enEnding(StringBuilder sb)
89 String[] enend =
new String[] {
"ene",
"en" };
90 for (
int i = 0; i < enend.Length; i++)
92 String end = enend[i];
93 String s = sb.ToString();
94 int index = s.Length - end.Length;
95 if (s.EndsWith(end) &&
97 isValidEnEnding(sb, index - 1)
100 sb.Remove(index, end.Length);
109 private void step1(StringBuilder sb)
111 if (_R1 >= sb.Length)
114 String s = sb.ToString();
115 int LengthR1 = sb.Length - _R1;
118 if (s.EndsWith(
"heden"))
120 var toReplace = sb.ToString(_R1, LengthR1).Replace(
"heden",
"heid");
121 sb.Remove(_R1, LengthR1);
122 sb.Insert(_R1, toReplace);
129 if (s.EndsWith(
"se") &&
130 (index = s.Length - 2) >= _R1 &&
131 isValidSEnding(sb, index - 1)
137 if (s.EndsWith(
"s") &&
138 (index = s.Length - 1) >= _R1 &&
139 isValidSEnding(sb, index - 1))
151 private void step2(StringBuilder sb)
154 if (_R1 >= sb.Length)
156 String s = sb.ToString();
157 int index = s.Length - 1;
160 !isVowel(sb[index - 1]))
173 private void step3a(StringBuilder sb)
175 if (_R2 >= sb.Length)
177 String s = sb.ToString();
178 int index = s.Length - 4;
179 if (s.EndsWith(
"heid") && index >= _R2 && sb[index - 1] !=
'c')
200 private void step3b(StringBuilder sb)
202 if (_R2 >= sb.Length)
204 String s = sb.ToString();
207 if ((s.EndsWith(
"end") || s.EndsWith(
"ing")) &&
208 (index = s.Length - 3) >= _R2)
211 if (sb[index - 2] ==
'i' &&
212 sb[index - 1] ==
'g')
214 if (sb[index - 3] !=
'e' & index - 2 >= _R2)
226 if (s.EndsWith(
"ig") &&
227 (index = s.Length - 2) >= _R2
230 if (sb[index - 1] !=
'e')
234 if (s.EndsWith(
"lijk") &&
235 (index = s.Length - 4) >= _R2
242 if (s.EndsWith(
"baar") &&
243 (index = s.Length - 4) >= _R2
249 if (s.EndsWith(
"bar") &&
250 (index = s.Length - 3) >= _R2
265 private void step4(StringBuilder sb)
269 String end = sb.ToString(sb.Length - 4, 4);
281 sb.Remove(sb.Length - 2, 1);
290 private bool isStemmable(String term)
292 for (
int c = 0; c < term.Length; c++)
294 if (!
char.IsLetter(term[c]))
return false;
302 private void substitute(StringBuilder buffer)
304 for (
int i = 0; i < buffer.Length; i++)
346 private bool isValidSEnding(StringBuilder sb,
int index)
349 if (isVowel(c) || c ==
'j')
358 private bool isValidEnEnding(StringBuilder sb,
int index)
366 if (c ==
'm' && sb[index - 2] ==
'g' && sb[index - 1] ==
'e')
371 private void unDouble(StringBuilder sb)
373 unDouble(sb, sb.Length);
376 private void unDouble(StringBuilder sb,
int endIndex)
378 String s = sb.ToString(0, endIndex);
379 if (s.EndsWith(
"kk") || s.EndsWith(
"tt") || s.EndsWith(
"dd") || s.EndsWith(
"nn") || s.EndsWith(
"mm") || s.EndsWith(
"ff"))
381 sb.Remove(endIndex - 1, 1);
385 private int getRIndex(StringBuilder sb,
int start)
390 for (; i < sb.Length; i++)
393 if (!isVowel(sb[i]) && isVowel(sb[i - 1]))
401 private void storeYandI(StringBuilder sb)
406 int last = sb.Length - 1;
408 for (
int i = 1; i < last; i++)
414 if (isVowel(sb[i - 1]) &&
422 if (isVowel(sb[i - 1]))
428 if (last > 0 && sb[last] ==
'y' && isVowel(sb[last - 1]))
432 private void reStoreYandI(StringBuilder sb)
434 String tmp = sb.ToString();
436 sb.Insert(0, tmp.Replace(
"I",
"i").Replace(
"Y",
"y"));
439 private bool isVowel(
char c)
457 protected internal void SetStemDictionary(IDictionary<string, string> dict)