20 using System.Collections;
22 using Lucene.Net.Analysis;
23 using Lucene.Net.Analysis.Tokenattributes;
24 using Lucene.Net.Util;
27 namespace Lucene.Net.Analysis.AR
45 public const char ALEF =
'\u0627';
46 public const char BEH =
'\u0628';
47 public const char TEH_MARBUTA =
'\u0629';
48 public const char TEH =
'\u062A';
49 public const char FEH =
'\u0641';
50 public const char KAF =
'\u0643';
51 public const char LAM =
'\u0644';
52 public const char NOON =
'\u0646';
53 public const char HEH =
'\u0647';
54 public const char WAW =
'\u0648';
55 public const char YEH =
'\u064A';
57 public static readonly
char[][] prefixes = {
58 (
"" + ALEF + LAM).ToCharArray(),
59 (
"" + WAW + ALEF + LAM).ToCharArray(),
60 (
"" + BEH + ALEF + LAM).ToCharArray(),
61 (
"" + KAF + ALEF + LAM).ToCharArray(),
62 (
"" + FEH + ALEF + LAM).ToCharArray(),
63 (
"" + LAM + LAM).ToCharArray(),
64 (
"" + WAW).ToCharArray(),
67 public static readonly
char[][] suffixes = {
68 (
"" + HEH + ALEF).ToCharArray(),
69 (
"" + ALEF + NOON).ToCharArray(),
70 (
"" + ALEF + TEH).ToCharArray(),
71 (
"" + WAW + NOON).ToCharArray(),
72 (
"" + YEH + NOON).ToCharArray(),
73 (
"" + YEH + HEH).ToCharArray(),
74 (
"" + YEH + TEH_MARBUTA).ToCharArray(),
75 (
"" + HEH).ToCharArray(),
76 (
"" + TEH_MARBUTA).ToCharArray(),
77 (
"" + YEH).ToCharArray(),
88 public int Stem(
char[] s,
int len)
90 len = StemPrefix(s, len);
91 len = StemSuffix(s, len);
102 public int StemPrefix(
char[] s,
int len)
104 for (
int i = 0; i < prefixes.Length; i++)
105 if (StartsWith(s, len, prefixes[i]))
106 return DeleteN(s, 0, len, prefixes[i].Length);
116 public int StemSuffix(
char[] s,
int len)
118 for (
int i = 0; i < suffixes.Length; i++)
119 if (EndsWith(s, len, suffixes[i]))
120 len = DeleteN(s, len - suffixes[i].Length, len, suffixes[i].Length);
131 bool StartsWith(
char[] s,
int len,
char[] prefix)
133 if (prefix.Length == 1 && len < 4)
137 else if (len < prefix.Length + 2)
143 for (
int i = 0; i < prefix.Length; i++)
144 if (s[i] != prefix[i])
158 bool EndsWith(
char[] s,
int len,
char[] suffix)
160 if (len < suffix.Length + 2)
166 for (
int i = 0; i < suffix.Length; i++)
167 if (s[len - suffix.Length + i] != suffix[i])
184 protected int DeleteN(
char[] s,
int pos,
int len,
int nChars)
186 for (
int i = 0; i < nChars; i++)
187 len = Delete(s, pos, len);
199 protected int Delete(
char[] s,
int pos,
int len)
202 Array.Copy(s, pos + 1, s, pos, len - pos - 1);