Lucene.Net  3.0.3
Lucene.Net is a .NET port of the Java Lucene Indexing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties
GreekAnalyzer.cs
Go to the documentation of this file.
1 /*
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements. See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership. The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License. You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied. See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20 */
21 
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
25 using System.Linq;
26 using Lucene.Net.Analysis.Standard;
27 using Version = Lucene.Net.Util.Version;
28 
29 namespace Lucene.Net.Analysis.El
30 {
31  /*
32  * {@link Analyzer} for the Greek language.
33  * <p>
34  * Supports an external list of stopwords (words
35  * that will not be indexed at all).
36  * A default set of stopwords is used unless an alternative list is specified.
37  * </p>
38  *
39  * <p><b>NOTE</b>: This class uses the same {@link Version}
40  * dependent settings as {@link StandardAnalyzer}.</p>
41  */
42  public sealed class GreekAnalyzer : Analyzer
43  {
44  /*
45  * List of typical Greek stopwords.
46  */
47 
48  private static readonly String[] GREEK_STOP_WORDS = {
49  "ο", "η", "το", "οι", "τα", "του", "τησ", "των", "τον",
50  "την", "και",
51  "κι", "κ", "ειμαι", "εισαι", "ειναι", "ειμαστε", "ειστε"
52  , "στο", "στον",
53  "στη", "στην", "μα", "αλλα", "απο", "για", "προσ", "με",
54  "σε", "ωσ",
55  "παρα", "αντι", "κατα", "μετα", "θα", "να", "δε", "δεν",
56  "μη", "μην",
57  "επι", "ενω", "εαν", "αν", "τοτε", "που", "πωσ", "ποιοσ"
58  , "ποια", "ποιο",
59  "ποιοι", "ποιεσ", "ποιων", "ποιουσ", "αυτοσ", "αυτη",
60  "αυτο", "αυτοι",
61  "αυτων", "αυτουσ", "αυτεσ", "αυτα", "εκεινοσ", "εκεινη",
62  "εκεινο",
63  "εκεινοι", "εκεινεσ", "εκεινα", "εκεινων", "εκεινουσ",
64  "οπωσ", "ομωσ",
65  "ισωσ", "οσο", "οτι"
66  };
67 
68  /*
69  * Returns a set of default Greek-stopwords
70  * @return a set of default Greek-stopwords
71  */
72  public static ISet<string> GetDefaultStopSet()
73  {
74  return DefaultSetHolder.DEFAULT_SET;
75  }
76 
77  private static class DefaultSetHolder
78  {
79  internal static ISet<string> DEFAULT_SET = CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)GREEK_STOP_WORDS, false));
80  }
81 
82  /*
83  * Contains the stopwords used with the {@link StopFilter}.
84  */
85  private readonly ISet<string> stopSet;
86 
87  private readonly Version matchVersion;
88 
89  public GreekAnalyzer(Version matchVersion)
90  : this(matchVersion, DefaultSetHolder.DEFAULT_SET)
91  {
92  }
93 
94  /*
95  * Builds an analyzer with the given stop words
96  *
97  * @param matchVersion
98  * lucene compatibility version
99  * @param stopwords
100  * a stopword set
101  */
102  public GreekAnalyzer(Version matchVersion, ISet<string> stopwords)
103  {
104  stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
105  this.matchVersion = matchVersion;
106  }
107 
108  /*
109  * Builds an analyzer with the given stop words.
110  * @param stopwords Array of stopwords to use.
111  * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
112  */
113  public GreekAnalyzer(Version matchVersion, params string[] stopwords)
114  : this(matchVersion, StopFilter.MakeStopSet(stopwords))
115  {
116  }
117 
118  /*
119  * Builds an analyzer with the given stop words.
120  * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
121  */
122  public GreekAnalyzer(Version matchVersion, IDictionary<string, string> stopwords)
123  : this(matchVersion, stopwords.Keys.ToArray())
124  {
125  }
126 
127  /*
128  * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
129  *
130  * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
131  * {@link GreekLowerCaseFilter} and {@link StopFilter}
132  */
133  public override TokenStream TokenStream(String fieldName, TextReader reader)
134  {
135  TokenStream result = new StandardTokenizer(matchVersion, reader);
136  result = new GreekLowerCaseFilter(result);
138  result, stopSet);
139  return result;
140  }
141 
142  private class SavedStreams
143  {
144  protected internal Tokenizer source;
145  protected internal TokenStream result;
146  };
147 
148  /*
149  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
150  * in the provided {@link Reader}.
151  *
152  * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
153  * {@link GreekLowerCaseFilter} and {@link StopFilter}
154  */
155  public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
156  {
157  SavedStreams streams = (SavedStreams)PreviousTokenStream;
158  if (streams == null)
159  {
160  streams = new SavedStreams();
161  streams.source = new StandardTokenizer(matchVersion, reader);
162  streams.result = new GreekLowerCaseFilter(streams.source);
163  streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
164  streams.result, stopSet);
165  PreviousTokenStream = streams;
166  }
167  else
168  {
169  streams.source.Reset(reader);
170  }
171  return streams.result;
172  }
173  }
174 }