Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
Analyzer.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Documents;
20 using Lucene.Net.Store;
21 using Lucene.Net.Util;
22 
23 namespace Lucene.Net.Analysis
24 {
25  /// <summary>An Analyzer builds TokenStreams, which analyze text. It thus represents a
26  /// policy for extracting index terms from text.
27  /// <p/>
28  /// Typical implementations first build a Tokenizer, which breaks the stream of
29  /// characters from the Reader into raw Tokens. One or more TokenFilters may
30  /// then be applied to the output of the Tokenizer.
31  /// </summary>
32  public abstract class Analyzer : IDisposable
33  {
34  /// <summary>Creates a TokenStream which tokenizes all the text in the provided
35  /// Reader. Must be able to handle null field name for
36  /// backward compatibility.
37  /// </summary>
38  public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader);
39 
40  /// <summary>Creates a TokenStream that is allowed to be re-used
41  /// from the previous time that the same thread called
42  /// this method. Callers that do not need to use more
43  /// than one TokenStream at the same time from this
44  /// analyzer should use this method for better
45  /// performance.
46  /// </summary>
47  public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader)
48  {
49  return TokenStream(fieldName, reader);
50  }
51 
53  private bool isDisposed;
54 
55  /// <summary>Used by Analyzers that implement reusableTokenStream
56  /// to retrieve previously saved TokenStreams for re-use
57  /// by the same thread.
58  /// </summary>
59  protected internal virtual object PreviousTokenStream
60  {
61  get
62  {
63  if (tokenStreams == null)
64  {
65  throw new AlreadyClosedException("this Analyzer is closed");
66  }
67  return tokenStreams.Get();
68  }
69  set
70  {
71  if (tokenStreams == null)
72  {
73  throw new AlreadyClosedException("this Analyzer is closed");
74  }
75  tokenStreams.Set(value);
76  }
77  }
78 
79  [Obsolete()]
80  protected internal bool overridesTokenStreamMethod = false;
81 
82  /// <deprecated> This is only present to preserve
83  /// back-compat of classes that subclass a core analyzer
84  /// and override tokenStream but not reusableTokenStream
85  /// </deprecated>
86  /// <summary>
87  /// Java uses Class&lt;? extends Analyer&gt; to constrain <typeparamref name="TClass"/> to
88  /// only Types that inherit from Analyzer. C# does not have a generic type class,
89  /// ie Type&lt;t&gt;. The method signature stays the same, and an exception may
90  /// still be thrown, if the method doesn't exist.
91  /// </summary>
92  [Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")]
93  protected internal virtual void SetOverridesTokenStreamMethod<TClass>()
94  where TClass : Analyzer
95  {
96  try
97  {
98  System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", new[] { typeof(string), typeof(System.IO.TextReader) });
99  overridesTokenStreamMethod = m.DeclaringType != typeof(TClass);
100  }
101  catch (MethodAccessException)
102  {
103  // can't happen, as baseClass is subclass of Analyzer
104  overridesTokenStreamMethod = false;
105  }
106  }
107 
108 
109  /// <summary> Invoked before indexing a Fieldable instance if
110  /// terms have already been added to that field. This allows custom
111  /// analyzers to place an automatic position increment gap between
112  /// Fieldable instances using the same field name. The default value
113  /// position increment gap is 0. With a 0 position increment gap and
114  /// the typical default token position increment of 1, all terms in a field,
115  /// including across Fieldable instances, are in successive positions, allowing
116  /// exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
117  ///
118  /// </summary>
119  /// <param name="fieldName">Fieldable name being indexed.
120  /// </param>
121  /// <returns> position increment gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
122  /// </returns>
123  public virtual int GetPositionIncrementGap(String fieldName)
124  {
125  return 0;
126  }
127 
128  /// <summary> Just like <see cref="GetPositionIncrementGap" />, except for
129  /// Token offsets instead. By default this returns 1 for
130  /// tokenized fields and, as if the fields were joined
131  /// with an extra space character, and 0 for un-tokenized
132  /// fields. This method is only called if the field
133  /// produced at least one token for indexing.
134  ///
135  /// </summary>
136  /// <param name="field">the field just indexed
137  /// </param>
138  /// <returns> offset gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
139  /// </returns>
140  public virtual int GetOffsetGap(IFieldable field)
141  {
142  return field.IsTokenized ? 1 : 0;
143  }
144 
145  /// <summary>Frees persistent resources used by this Analyzer </summary>
146  public void Close()
147  {
148  Dispose();
149  }
150 
151  public virtual void Dispose()
152  {
153  Dispose(true);
154  }
155 
156  protected virtual void Dispose(bool disposing)
157  {
158  if (isDisposed) return;
159 
160  if (disposing)
161  {
162  if (tokenStreams != null)
163  {
164  tokenStreams.Close();
165  tokenStreams = null;
166  }
167  }
168  isDisposed = true;
169  }
170  }
171 }