25 using System.Collections;
27 namespace Lucene.Net.Analysis.De
39 private StringBuilder sb =
new StringBuilder();
44 protected int substCount = 0;
51 internal String Stem( String term )
54 term = term.ToLower();
55 if ( !IsStemmable( term ) )
58 sb.Remove(0, sb.Length);
65 RemoveParticleDenotion( sb );
74 private bool IsStemmable( String term )
76 for (
int c = 0; c < term.Length; c++ )
78 if ( !Char.IsLetter(term[c]))
return false;
92 private void Strip( StringBuilder buffer )
95 while ( doMore && buffer.Length > 3 )
97 if ( ( buffer.Length + substCount > 5 ) &&
98 buffer.ToString().Substring(buffer.Length - 2, 2).Equals(
"nd" ) )
100 buffer.Remove( buffer.Length - 2, 2 );
102 else if ( ( buffer.Length + substCount > 4 ) &&
103 buffer.ToString().Substring( buffer.Length - 2, 2).Equals(
"em" ) )
105 buffer.Remove( buffer.Length - 2, 2 );
107 else if ( ( buffer.Length + substCount > 4 ) &&
108 buffer.ToString().Substring( buffer.Length - 2, 2).Equals(
"er" ) )
110 buffer.Remove( buffer.Length - 2, 2 );
112 else if ( buffer[buffer.Length - 1] ==
'e' )
114 buffer.Remove(buffer.Length - 1, 1);
116 else if ( buffer[buffer.Length - 1] ==
's' )
118 buffer.Remove(buffer.Length - 1, 1);
120 else if ( buffer[buffer.Length - 1] ==
'n' )
122 buffer.Remove(buffer.Length - 1, 1);
125 else if ( buffer[buffer.Length - 1] ==
't')
127 buffer.Remove(buffer.Length - 1, 1);
140 private void Optimize( StringBuilder buffer )
143 if ( buffer.Length > 5 && buffer.ToString().Substring(buffer.Length - 5, 5).Equals(
"erin*" ))
145 buffer.Remove(buffer.Length - 1, 1);
149 if ( buffer[buffer.Length - 1] == (
'z') )
151 buffer[buffer.Length - 1] =
'x';
159 private void RemoveParticleDenotion( StringBuilder buffer )
161 if ( buffer.Length > 4 )
163 for (
int c = 0; c < buffer.Length - 3; c++ )
165 if ( buffer.ToString().Substring( c, 4 ).Equals(
"gege" ) )
184 protected virtual void Substitute( StringBuilder buffer )
187 for (
int c = 0; c < buffer.Length; c++ )
190 if (c > 0 && buffer[c] == buffer[c - 1])
195 else if (buffer[c] ==
'ä')
199 else if (buffer[c] ==
'ö')
203 else if (buffer[c] ==
'ü')
208 else if (buffer[c] ==
'ß')
211 buffer.Insert(c + 1,
's');
216 if ( c < buffer.Length - 1 )
219 if ( ( c < buffer.Length - 2 ) && buffer[c] ==
's' &&
220 buffer[c + 1] ==
'c' && buffer[c + 2] ==
'h' )
223 buffer.Remove(c + 1, 2);
226 else if ( buffer[c] ==
'c' && buffer[c + 1] ==
'h' )
229 buffer.Remove(c + 1, 1);
232 else if ( buffer[c] ==
'e' && buffer[c + 1] ==
'i' )
235 buffer.Remove(c + 1, 1);
238 else if ( buffer[c] ==
'i' && buffer[c + 1] ==
'e' )
241 buffer.Remove(c + 1, 1);
244 else if ( buffer[c] ==
'i' && buffer[c + 1] ==
'g' )
247 buffer.Remove(c + 1, 1);
250 else if ( buffer[c] ==
's' && buffer[c + 1] ==
't' )
253 buffer.Remove(c + 1, 1);
266 private void Resubstitute( StringBuilder buffer )
268 for (
int c = 0; c < buffer.Length; c++ )
270 if ( buffer[c] ==
'*' )
272 char x = buffer[c - 1];
275 else if ( buffer[c] ==
'$' )
278 buffer.Insert( c + 1,
new char[]{
'c',
'h'}, 0, 2);
280 else if ( buffer[c] ==
'§' )
283 buffer.Insert( c + 1,
'h' );
285 else if ( buffer[c] ==
'%' )
288 buffer.Insert( c + 1,
'i' );
290 else if ( buffer[c] ==
'&' )
293 buffer.Insert( c + 1,
'e' );
295 else if ( buffer[c] ==
'#' )
298 buffer.Insert( c + 1,
'g' );
300 else if ( buffer[c] ==
'!' )
303 buffer.Insert( c + 1,
't' );