Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
TRStringDistance.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 
19 namespace SpellChecker.Net.Search.Spell
20 {
21 
22  /// <summary> Edit distance class</summary>
23  public class TRStringDistance
24  {
25 
26  internal char[] sa;
27  internal int n;
28  internal int[][][] cache = new int[30][][];
29 
30 
31  /// <summary> Optimized to run a bit faster than the static getDistance().
32  /// In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster.
33  /// </summary>
34  public TRStringDistance(System.String target)
35  {
36  sa = target.ToCharArray();
37  n = sa.Length;
38  }
39 
40 
41  //***************************
42  // Compute Levenshtein distance
43  //***************************
44  public int GetDistance(System.String other)
45  {
46  int[][] d; // matrix
47 
48  // Step 1
49  char[] ta = other.ToCharArray();
50  int m = ta.Length;
51  if (n == 0)
52  {
53  return m;
54  }
55  if (m == 0)
56  {
57  return n;
58  }
59 
60  if (m >= cache.Length)
61  {
62  d = Form(n, m);
63  }
64  else if (cache[m] != null)
65  {
66  d = cache[m];
67  }
68  else
69  {
70  d = cache[m] = Form(n, m);
71 
72  // Step 3
73  }
74  for (int i = 1; i <= n; i++)
75  {
76  char s_i = sa[i - 1];
77 
78  // Step 4
79 
80  for (int j = 1; j <= m; j++)
81  {
82  char t_j = ta[j - 1];
83 
84  // Step 5
85 
86  int cost = s_i == t_j ? 0 : 1;
87  d[i][j] = Min3(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
88  }
89  }
90 
91  // Step 7
92  return d[n][m];
93  }
94 
95 
96  /// <summary> </summary>
97  private static int[][] Form(int n, int m)
98  {
99  int[][] d = new int[n + 1][];
100  for (int i = 0; i < n + 1; i++)
101  {
102  d[i] = new int[m + 1];
103  }
104  // Step 2
105 
106  for (int i = 0; i <= n; i++)
107  {
108  d[i][0] = i;
109  }
110  for (int j = 0; j <= m; j++)
111  {
112  d[0][j] = j;
113  }
114  return d;
115  }
116 
117 
118  //**************************
119  // Get minimum of three values
120  //**************************
121  private static int Min3(int a, int b, int c)
122  {
123  int mi = a;
124  if (b < mi)
125  {
126  mi = b;
127  }
128  if (c < mi)
129  {
130  mi = c;
131  }
132  return mi;
133  }
134  }
135 }