Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
QueryTermVector.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.Linq;
21 using Lucene.Net.Analysis.Tokenattributes;
22 using Lucene.Net.Index;
23 using Lucene.Net.Support;
24 using Analyzer = Lucene.Net.Analysis.Analyzer;
25 using TokenStream = Lucene.Net.Analysis.TokenStream;
26 
27 namespace Lucene.Net.Search
28 {
29 
30  /// <summary>
31  ///
32  ///
33  /// </summary>
35  {
36  private System.String[] terms = new System.String[0];
37  private int[] termFreqs = new int[0];
38 
39  public virtual string Field
40  {
41  get { return null; }
42  }
43 
44  /// <summary> </summary>
45  /// <param name="queryTerms">The original list of terms from the query, can contain duplicates
46  /// </param>
47  public QueryTermVector(System.String[] queryTerms)
48  {
49 
50  ProcessTerms(queryTerms);
51  }
52 
53  public QueryTermVector(System.String queryString, Analyzer analyzer)
54  {
55  if (analyzer != null)
56  {
57  TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
58  if (stream != null)
59  {
60  IList<string> terms = new List<string>();
61  try
62  {
63  bool hasMoreTokens = false;
64 
65  stream.Reset();
66  ITermAttribute termAtt = stream.AddAttribute<ITermAttribute>();
67 
68  hasMoreTokens = stream.IncrementToken();
69  while (hasMoreTokens)
70  {
71  terms.Add(termAtt.Term);
72  hasMoreTokens = stream.IncrementToken();
73  }
74  ProcessTerms(terms.ToArray());
75  }
76  catch (System.IO.IOException)
77  {
78  }
79  }
80  }
81  }
82 
83  private void ProcessTerms(System.String[] queryTerms)
84  {
85  if (queryTerms != null)
86  {
87  System.Array.Sort(queryTerms);
88  IDictionary<string, int> tmpSet = new HashMap<string, int>(queryTerms.Length);
89  //filter out duplicates
90  IList<string> tmpList = new List<string>(queryTerms.Length);
91  IList<int> tmpFreqs = new List<int>(queryTerms.Length);
92  int j = 0;
93  for (int i = 0; i < queryTerms.Length; i++)
94  {
95  var term = queryTerms[i];
96  var position = tmpSet[term];
97  if (!tmpSet.ContainsKey(term)) // if temp_position == null
98  {
99  tmpSet[term] = j++;
100  tmpList.Add(term);
101  tmpFreqs.Add(1);
102  }
103  else
104  {
105  int integer = tmpFreqs[position];
106  tmpFreqs[position] = (integer + 1);
107  }
108  }
109  terms = tmpList.ToArray();
110  //termFreqs = (int[])tmpFreqs.toArray(termFreqs);
111  termFreqs = new int[tmpFreqs.Count];
112  int i2 = 0;
113  foreach (int integer in tmpFreqs)
114  {
115  termFreqs[i2++] = integer;
116  }
117  }
118  }
119 
120  public override System.String ToString()
121  {
122  System.Text.StringBuilder sb = new System.Text.StringBuilder();
123  sb.Append('{');
124  for (int i = 0; i < terms.Length; i++)
125  {
126  if (i > 0)
127  sb.Append(", ");
128  sb.Append(terms[i]).Append('/').Append(termFreqs[i]);
129  }
130  sb.Append('}');
131  return sb.ToString();
132  }
133 
134 
135  public virtual int Size
136  {
137  get { return terms.Length; }
138  }
139 
140  public virtual System.String[] GetTerms()
141  {
142  return terms;
143  }
144 
145  public virtual int[] GetTermFrequencies()
146  {
147  return termFreqs;
148  }
149 
150  public virtual int IndexOf(System.String term)
151  {
152  int res = System.Array.BinarySearch(terms, term);
153  return res >= 0?res:- 1;
154  }
155 
156  public virtual int[] IndexesOf(System.String[] terms, int start, int len)
157  {
158  int[] res = new int[len];
159 
160  for (int i = 0; i < len; i++)
161  {
162  res[i] = IndexOf(terms[i]);
163  }
164  return res;
165  }
166  }
167 }