Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
CJKAnalyzer.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
25 using System.Collections;
26 using Lucene.Net.Analysis;
27 using Version = Lucene.Net.Util.Version;
28 
29 namespace Lucene.Net.Analysis.CJK
30 {
36  public class CJKAnalyzer : Analyzer
37  {
38  //~ Static fields/initializers ---------------------------------------------
39 
44  // TODO make this final in 3.1 -
45  // this might be revised and merged with StopFilter stop words too
46  [Obsolete("use GetDefaultStopSet() instead")] public static String[] STOP_WORDS =
47  {
48  "a", "and", "are", "as", "at", "be",
49  "but", "by", "for", "if", "in",
50  "into", "is", "it", "no", "not",
51  "of", "on", "or", "s", "such", "t",
52  "that", "the", "their", "then",
53  "there", "these", "they", "this",
54  "to", "was", "will", "with", "",
55  "www"
56  };
57 
58  //~ Instance fields --------------------------------------------------------
59 
64  public static ISet<string> GetDefaultStopSet()
65  {
66  return DefaultSetHolder.DEFAULT_STOP_SET;
67  }
68 
69  private static class DefaultSetHolder
70  {
71  internal static ISet<string> DEFAULT_STOP_SET =
72  CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)STOP_WORDS, false));
73  }
74 
78  private ISet<string> stopTable;
79 
80  private readonly Version matchVersion;
81 
82  //~ Constructors -----------------------------------------------------------
83 
84  public CJKAnalyzer(Version matchVersion)
85  : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
86  {
87 
88  }
89 
90  public CJKAnalyzer(Version matchVersion, ISet<string> stopWords)
91  {
92  stopTable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopWords));
93  this.matchVersion = matchVersion;
94  }
95 
100  public CJKAnalyzer(Version matchVersion, params string[] stopWords)
101  {
102  stopTable = StopFilter.MakeStopSet(stopWords);
103  this.matchVersion = matchVersion;
104  }
105 
106  //~ Methods ----------------------------------------------------------------
107 
114  public override sealed TokenStream TokenStream(String fieldName, TextReader reader)
115  {
117  new CJKTokenizer(reader), stopTable);
118  }
119 
120  private class SavedStreams
121  {
122  protected internal Tokenizer source;
123  protected internal TokenStream result;
124  };
125 
126  /*
127  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
128  * in the provided {@link Reader}.
129  *
130  * @param fieldName lucene field name
131  * @param reader Input {@link Reader}
132  * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
133  * {@link StopFilter}
134  */
135  public override sealed TokenStream ReusableTokenStream(String fieldName, TextReader reader)
136  {
137  /* tokenStream() is final, no back compat issue */
138  SavedStreams streams = (SavedStreams) PreviousTokenStream;
139  if (streams == null)
140  {
141  streams = new SavedStreams();
142  streams.source = new CJKTokenizer(reader);
143  streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
144  streams.source, stopTable);
145  PreviousTokenStream = streams;
146  }
147  else
148  {
149  streams.source.Reset(reader);
150  }
151  return streams.result;
152  }
153  }
154 }