Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
CharArraySet.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections;
20 using System.Linq;
21 using System.Collections.Generic;
22 
23 namespace Lucene.Net.Analysis
24 {
25  /// <summary> A simple class that stores Strings as char[]'s in a
26  /// hash table. Note that this is not a general purpose
27  /// class. For example, it cannot remove items from the
28  /// set, nor does it resize its hash table to be smaller,
29  /// etc. It is designed to be quick to test if a char[]
30  /// is in the set without the necessity of converting it
31  /// to a String first.
32  /// <p/>
33  /// <em>Please note:</em> This class implements <see cref="System.Collections.Generic.ISet{T}"/> but
34  /// does not behave like it should in all cases. The generic type is
35  /// <see cref="System.Collections.Generic.ICollection{T}"/>, because you can add any object to it,
36  /// that has a string representation. The add methods will use
37  /// <see cref="object.ToString()"/> and store the result using a <see cref="char"/>
38  /// buffer. The same behaviour have the <see cref="Contains(object)"/> methods.
39  /// The <see cref="GetEnumerator"/> method returns an <see cref="string"/> IEnumerable.
40  /// For type safety also {@link #stringIterator()} is provided.
41  /// </summary>
42  // TODO: java uses wildcards, .net doesn't have this, easiest way is to
43  // make the entire class generic. Ultimately, though, since this
44  // works with strings, I can't think of a reason not to just declare
45  // this as an ISet<string>.
46  public class CharArraySet : ISet<string>
47  {
48  bool _ReadOnly = false;
49  const int INIT_SIZE = 8;
50  char[][] _Entries;
51  int _Count;
52  bool _IgnoreCase;
53  public static CharArraySet EMPTY_SET = UnmodifiableSet(new CharArraySet(0, false));
54 
55  private void Init(int startSize, bool ignoreCase)
56  {
57  this._IgnoreCase = ignoreCase;
58  int size = INIT_SIZE;
59  while (startSize + (startSize >> 2) > size)
60  size <<= 1;
61  _Entries = new char[size][];
62  }
63 
64  /// <summary>Create set with enough capacity to hold startSize
65  /// terms
66  /// </summary>
67  public CharArraySet(int startSize, bool ignoreCase)
68  {
69  Init(startSize, ignoreCase);
70  }
71 
72  public CharArraySet(IEnumerable<string> c, bool ignoreCase)
73  {
74  Init(c.Count(), ignoreCase);
75  AddItems(c);
76  }
77 
78  /// <summary>Create set from a Collection of char[] or String </summary>
79  public CharArraySet(IEnumerable<object> c, bool ignoreCase)
80  {
81  Init(c.Count(), ignoreCase);
82  AddItems(c);
83  }
84 
85  private void AddItems<T>(IEnumerable<T> items)
86  {
87  foreach(var item in items)
88  {
89  Add(item.ToString());
90  }
91  }
92 
93  /// <summary>Create set from entries </summary>
94  private CharArraySet(char[][] entries, bool ignoreCase, int count)
95  {
96  this._Entries = entries;
97  this._IgnoreCase = ignoreCase;
98  this._Count = count;
99  }
100 
101  /// <summary>true if the <c>len</c> chars of <c>text</c> starting at <c>off</c>
102  /// are in the set
103  /// </summary>
104  public virtual bool Contains(char[] text, int off, int len)
105  {
106  return _Entries[GetSlot(text, off, len)] != null;
107  }
108 
109  public virtual bool Contains(string text)
110  {
111  return _Entries[GetSlot(text)] != null;
112  }
113 
114 
115  private int GetSlot(char[] text, int off, int len)
116  {
117  int code = GetHashCode(text, off, len);
118  int pos = code & (_Entries.Length - 1);
119  char[] text2 = _Entries[pos];
120  if (text2 != null && !Equals(text, off, len, text2))
121  {
122  int inc = ((code >> 8) + code) | 1;
123  do
124  {
125  code += inc;
126  pos = code & (_Entries.Length - 1);
127  text2 = _Entries[pos];
128  }
129  while (text2 != null && !Equals(text, off, len, text2));
130  }
131  return pos;
132  }
133 
134  /// <summary>Returns true if the String is in the set </summary>
135  private int GetSlot(string text)
136  {
137  int code = GetHashCode(text);
138  int pos = code & (_Entries.Length - 1);
139  char[] text2 = _Entries[pos];
140  if (text2 != null && !Equals(text, text2))
141  {
142  int inc = ((code >> 8) + code) | 1;
143  do
144  {
145  code += inc;
146  pos = code & (_Entries.Length - 1);
147  text2 = _Entries[pos];
148  }
149  while (text2 != null && !Equals(text, text2));
150  }
151  return pos;
152  }
153 
154  public bool Add(string text)
155  {
156  if (_ReadOnly) throw new NotSupportedException();
157  return Add(text.ToCharArray());
158  }
159 
160  /// <summary>Add this char[] directly to the set.
161  /// If ignoreCase is true for this Set, the text array will be directly modified.
162  /// The user should never modify this text array after calling this method.
163  /// </summary>
164  public bool Add(char[] text)
165  {
166  if (_ReadOnly) throw new NotSupportedException();
167 
168  if (_IgnoreCase)
169  for (int i = 0; i < text.Length; i++)
170  text[i] = Char.ToLower(text[i]);
171  int slot = GetSlot(text, 0, text.Length);
172  if (_Entries[slot] != null)
173  return false;
174  _Entries[slot] = text;
175  _Count++;
176 
177  if (_Count + (_Count >> 2) > _Entries.Length)
178  {
179  Rehash();
180  }
181 
182  return true;
183  }
184 
185  private bool Equals(char[] text1, int off, int len, char[] text2)
186  {
187  if (len != text2.Length)
188  return false;
189  if (_IgnoreCase)
190  {
191  for (int i = 0; i < len; i++)
192  {
193  if (char.ToLower(text1[off + i]) != text2[i])
194  return false;
195  }
196  }
197  else
198  {
199  for (int i = 0; i < len; i++)
200  {
201  if (text1[off + i] != text2[i])
202  return false;
203  }
204  }
205  return true;
206  }
207 
208  private bool Equals(string text1, char[] text2)
209  {
210  int len = text1.Length;
211  if (len != text2.Length)
212  return false;
213  if (_IgnoreCase)
214  {
215  for (int i = 0; i < len; i++)
216  {
217  if (char.ToLower(text1[i]) != text2[i])
218  return false;
219  }
220  }
221  else
222  {
223  for (int i = 0; i < len; i++)
224  {
225  if (text1[i] != text2[i])
226  return false;
227  }
228  }
229  return true;
230  }
231 
232  private void Rehash()
233  {
234  int newSize = 2 * _Entries.Length;
235  char[][] oldEntries = _Entries;
236  _Entries = new char[newSize][];
237 
238  for (int i = 0; i < oldEntries.Length; i++)
239  {
240  char[] text = oldEntries[i];
241  if (text != null)
242  {
243  // todo: could be faster... no need to compare strings on collision
244  _Entries[GetSlot(text, 0, text.Length)] = text;
245  }
246  }
247  }
248 
249  private int GetHashCode(char[] text, int offset, int len)
250  {
251  int code = 0;
252  int stop = offset + len;
253  if (_IgnoreCase)
254  {
255  for (int i = offset; i < stop; i++)
256  {
257  code = code * 31 + char.ToLower(text[i]);
258  }
259  }
260  else
261  {
262  for (int i = offset; i < stop; i++)
263  {
264  code = code * 31 + text[i];
265  }
266  }
267  return code;
268  }
269 
270  private int GetHashCode(string text)
271  {
272  int code = 0;
273  int len = text.Length;
274  if (_IgnoreCase)
275  {
276  for (int i = 0; i < len; i++)
277  {
278  code = code * 31 + char.ToLower(text[i]);
279  }
280  }
281  else
282  {
283  for (int i = 0; i < len; i++)
284  {
285  code = code * 31 + text[i];
286  }
287  }
288  return code;
289  }
290 
291  public int Count
292  {
293  get { return _Count; }
294  }
295 
296  public bool IsEmpty
297  {
298  get { return _Count == 0; }
299  }
300 
301  public bool Contains(object item)
302  {
303  var text = item as char[];
304  return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString());
305  }
306 
307  public bool Add(object item)
308  {
309  return Add(item.ToString());
310  }
311 
312  void ICollection<string>.Add(string item)
313  {
314  this.Add(item);
315  }
316 
317  /// <summary>
318  /// Returns an unmodifiable <see cref="CharArraySet"/>. This allows to provide
319  /// unmodifiable views of internal sets for "read-only" use
320  /// </summary>
321  /// <param name="set">A Set for which the unmodifiable set it returns.</param>
322  /// <returns>A new unmodifiable <see cref="CharArraySet"/></returns>
323  /// <throws>ArgumentNullException of the given set is <c>null</c></throws>
324  public static CharArraySet UnmodifiableSet(CharArraySet set)
325  {
326  if(set == null)
327  throw new ArgumentNullException("Given set is null");
328  if (set == EMPTY_SET)
329  return EMPTY_SET;
330  if (set._ReadOnly)
331  return set;
332 
333  var newSet = new CharArraySet(set._Entries, set._IgnoreCase, set.Count) {IsReadOnly = true};
334  return newSet;
335  }
336 
337  /// <summary>
338  /// returns a copy of the given set as a <see cref="CharArraySet"/>. If the given set
339  /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.
340  /// </summary>
341  /// <param name="set">A set to copy</param>
342  /// <returns>a copy of the given set as a <see cref="CharArraySet"/>. If the given set
343  /// is a <see cref="CharArraySet"/> the ignoreCase property will be preserved.</returns>
344  public static CharArraySet Copy<T>(ISet<T> set)
345  {
346  if (set == null)
347  throw new ArgumentNullException("set", "Given set is null!");
348  if (set == EMPTY_SET)
349  return EMPTY_SET;
350  bool ignoreCase = set is CharArraySet && ((CharArraySet)set)._IgnoreCase;
351  var arrSet = new CharArraySet(set.Count, ignoreCase);
352  arrSet.AddItems(set);
353  return arrSet;
354  }
355 
356  public void Clear()
357  {
358  throw new NotSupportedException("Remove not supported!");
359  }
360 
361  public bool IsReadOnly
362  {
363  get { return _ReadOnly; }
364  private set { _ReadOnly = value; }
365  }
366 
367  /// <summary>Adds all of the elements in the specified collection to this collection </summary>
368  public void UnionWith(IEnumerable<string> other)
369  {
370  if (_ReadOnly) throw new NotSupportedException();
371 
372  foreach (string s in other)
373  {
374  Add(s.ToCharArray());
375  }
376  }
377 
378  /// <summary>Wrapper that calls UnionWith</summary>
379  public void AddAll(IEnumerable<string> coll)
380  {
381  UnionWith(coll);
382  }
383 
384  #region Unneeded methods
385  public void RemoveAll(ICollection<string> c)
386  {
387  throw new NotSupportedException();
388  }
389 
390  public void RetainAll(ICollection<string> c)
391  {
392  throw new NotSupportedException();
393  }
394 
395  void ICollection<string>.CopyTo(string[] array, int arrayIndex)
396  {
397  throw new NotSupportedException();
398  }
399 
400  void ISet<string>.IntersectWith(IEnumerable<string> other)
401  {
402  throw new NotSupportedException();
403  }
404 
405  void ISet<string>.ExceptWith(IEnumerable<string> other)
406  {
407  throw new NotSupportedException();
408  }
409 
410  void ISet<string>.SymmetricExceptWith(IEnumerable<string> other)
411  {
412  throw new NotSupportedException();
413  }
414 
415  bool ISet<string>.IsSubsetOf(IEnumerable<string> other)
416  {
417  throw new NotSupportedException();
418  }
419 
420  bool ISet<string>.IsSupersetOf(IEnumerable<string> other)
421  {
422  throw new NotSupportedException();
423  }
424 
425  bool ISet<string>.IsProperSupersetOf(IEnumerable<string> other)
426  {
427  throw new NotSupportedException();
428  }
429 
430  bool ISet<string>.IsProperSubsetOf(IEnumerable<string> other)
431  {
432  throw new NotSupportedException();
433  }
434 
435  bool ISet<string>.Overlaps(IEnumerable<string> other)
436  {
437  throw new NotSupportedException();
438  }
439 
440  bool ISet<string>.SetEquals(IEnumerable<string> other)
441  {
442  throw new NotSupportedException();
443  }
444 
445  bool ICollection<string>.Remove(string item)
446  {
447  throw new NotSupportedException();
448  }
449  #endregion
450 
451  /// <summary>
452  /// The IEnumerator&lt;String&gt; for this set. Strings are constructed on the fly,
453  /// so use <c>nextCharArray</c> for more efficient access
454  /// </summary>
455  public class CharArraySetEnumerator : IEnumerator<string>
456  {
457  readonly CharArraySet _Creator;
458  int pos = -1;
459  char[] cur;
460 
461  protected internal CharArraySetEnumerator(CharArraySet creator)
462  {
463  _Creator = creator;
464  }
465 
466  public bool MoveNext()
467  {
468  cur = null;
469  pos++;
470  while (pos < _Creator._Entries.Length && (cur = _Creator._Entries[pos]) == null)
471  pos++;
472  return cur != null;
473  }
474 
475  /// <summary>do not modify the returned char[] </summary>
476  public char[] NextCharArray()
477  {
478  return cur;
479  }
480 
481  public string Current
482  {
483  get { return new string(NextCharArray()); }
484  }
485 
486  public void Dispose()
487  {
488  }
489 
490  object IEnumerator.Current
491  {
492  get { return new string(NextCharArray()); }
493  }
494 
495  public void Reset()
496  {
497  throw new NotImplementedException();
498  }
499  }
500 
501  public IEnumerator<string> StringEnumerator()
502  {
503  return new CharArraySetEnumerator(this);
504  }
505 
506  public IEnumerator<string> GetEnumerator()
507  {
508  return new CharArraySetEnumerator(this);
509  }
510 
511  IEnumerator IEnumerable.GetEnumerator()
512  {
513  return GetEnumerator();
514  }
515  }
516 
517 }