Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
ThaiAnalyzer.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
25 using System.Linq;
26 using System.Text;
27 using Lucene.Net.Analysis.Standard;
28 using Version=Lucene.Net.Util.Version;
29 
30 namespace Lucene.Net.Analysis.Th
31 {
32  /*
33  * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
34  * @version 0.2
35  *
36  * <p><b>NOTE</b>: This class uses the same {@link Version}
37  * dependent settings as {@link StandardAnalyzer}.</p>
38  */
39  public class ThaiAnalyzer : Analyzer
40  {
41  private readonly Version matchVersion;
42 
43  public ThaiAnalyzer(Version matchVersion)
44  {
45  SetOverridesTokenStreamMethod<ThaiAnalyzer>();
46  this.matchVersion = matchVersion;
47  }
48 
49  public override TokenStream TokenStream(String fieldName, TextReader reader)
50  {
51  TokenStream ts = new StandardTokenizer(matchVersion, reader);
52  ts = new StandardFilter(ts);
53  ts = new ThaiWordFilter(ts);
56  return ts;
57  }
58 
59  private class SavedStreams
60  {
61  protected internal Tokenizer source;
62  protected internal TokenStream result;
63  };
64 
65  public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
66  {
67  if (overridesTokenStreamMethod)
68  {
69  // LUCENE-1678: force fallback to tokenStream() if we
70  // have been subclassed and that subclass overrides
71  // tokenStream but not reusableTokenStream
72  return TokenStream(fieldName, reader);
73  }
74 
75  SavedStreams streams = (SavedStreams)PreviousTokenStream;
76  if (streams == null)
77  {
78  streams = new SavedStreams();
79  streams.source = new StandardTokenizer(matchVersion, reader);
80  streams.result = new StandardFilter(streams.source);
81  streams.result = new ThaiWordFilter(streams.result);
82  streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
83  streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
84  PreviousTokenStream = streams;
85  }
86  else
87  {
88  streams.source.Reset(reader);
89  streams.result.Reset(); // reset the ThaiWordFilter's state
90  }
91  return streams.result;
92  }
93  }
94 }