Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
JaroWinklerDistance.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 
19 using System.Linq;
20 
21 namespace SpellChecker.Net.Search.Spell
22 {
23 
24  using System;
25 
27  {
28  private float threshold = 0.7f;
29 
30  private static int[] Matches(String s1, String s2)
31  {
32  String Max, Min;
33 
34  if (s1.Length > s2.Length)
35  {
36  Max = s1;
37  Min = s2;
38  }
39  else
40  {
41  Max = s2;
42  Min = s1;
43  }
44 
45  var range = Math.Max(Max.Length / 2 - 1, 0);
46  var matchIndexes = new int[Min.Length];
47 
48  for (var i = 0; i < matchIndexes.Length; i++)
49  matchIndexes[i] = -1;
50 
51  var matchFlags = new bool[Max.Length];
52  var matches = 0;
53 
54  for (var mi = 0; mi < Min.Length; mi++)
55  {
56  var c1 = Min[mi];
57  for (int xi = Math.Max(mi - range, 0),
58  xn = Math.Min(mi + range + 1, Max.Length); xi < xn; xi++)
59  {
60  if (matchFlags[xi] || c1 != Max[xi]) continue;
61 
62  matchIndexes[mi] = xi;
63  matchFlags[xi] = true;
64  matches++;
65  break;
66  }
67  }
68 
69  var ms1 = new char[matches];
70  var ms2 = new char[matches];
71 
72  for (int i = 0, si = 0; i < Min.Length; i++)
73  {
74  if (matchIndexes[i] != -1)
75  {
76  ms1[si] = Min[i];
77  si++;
78  }
79  }
80 
81  for (int i = 0, si = 0; i < Max.Length; i++)
82  {
83  if (matchFlags[i])
84  {
85  ms2[si] = Max[i];
86  si++;
87  }
88  }
89 
90  var transpositions = ms1.Where((t, mi) => t != ms2[mi]).Count();
91 
92  var prefix = 0;
93  for (var mi = 0; mi < Min.Length; mi++)
94  {
95  if (s1[mi] == s2[mi])
96  {
97  prefix++;
98  }
99  else
100  {
101  break;
102  }
103  }
104 
105  return new int[] { matches, transpositions / 2, prefix, Max.Length };
106  }
107 
108  public float GetDistance(String s1, String s2)
109  {
110  var mtp = Matches(s1, s2);
111  var m = (float)mtp[0];
112 
113  if (m == 0)
114  return 0f;
115 
116  float j = ((m / s1.Length + m / s2.Length + (m - mtp[1]) / m)) / 3;
117  float jw = j < Threshold ? j : j + Math.Min(0.1f, 1f / mtp[3]) * mtp[2] * (1 - j);
118  return jw;
119  }
120 
121  /// <summary>
122  /// Gets or sets the current value of the threshold used for adding the Winkler bonus.
123  /// Set to a negative value to get the Jaro distance. The default value is 0.7.
124  /// </summary>
125  public float Threshold
126  {
127  get { return threshold; }
128  set { this.threshold = value; }
129  }
130  }
131 }