Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
FreqProxTermsWriterPerField.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using Lucene.Net.Analysis.Tokenattributes;
20 using Lucene.Net.Documents;
21 
22 namespace Lucene.Net.Index
23 {
24 
25  // TODO: break into separate freq and prox writers as
26  // codecs; make separate container (tii/tis/skip/*) that can
27  // be configured as any number of files 1..N
28  sealed class FreqProxTermsWriterPerField:TermsHashConsumerPerField, System.IComparable<FreqProxTermsWriterPerField>
29  {
30 
31  internal FreqProxTermsWriterPerThread perThread;
32  internal TermsHashPerField termsHashPerField;
33  internal FieldInfo fieldInfo;
34  internal DocumentsWriter.DocState docState;
35  internal FieldInvertState fieldState;
36  internal bool omitTermFreqAndPositions;
37  internal IPayloadAttribute payloadAttribute;
38 
40  {
41  this.termsHashPerField = termsHashPerField;
42  this.perThread = perThread;
43  this.fieldInfo = fieldInfo;
44  docState = termsHashPerField.docState;
45  fieldState = termsHashPerField.fieldState;
46  omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
47  }
48 
49  internal override int GetStreamCount()
50  {
51  if (fieldInfo.omitTermFreqAndPositions)
52  return 1;
53  else
54  return 2;
55  }
56 
57  internal override void Finish()
58  {
59  }
60 
61  internal bool hasPayloads;
62 
63  internal override void SkippingLongTerm()
64  {
65  }
66 
67  public int CompareTo(FreqProxTermsWriterPerField other)
68  {
69  return String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name);
70  }
71 
72  internal void Reset()
73  {
74  // Record, up front, whether our in-RAM format will be
75  // with or without term freqs:
76  omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
77  payloadAttribute = null;
78  }
79 
80  internal override bool Start(IFieldable[] fields, int count)
81  {
82  for (int i = 0; i < count; i++)
83  if (fields[i].IsIndexed)
84  return true;
85  return false;
86  }
87 
88  internal override void Start(IFieldable f)
89  {
90  if (fieldState.attributeSource.HasAttribute<IPayloadAttribute>())
91  {
92  payloadAttribute = fieldState.attributeSource.GetAttribute<IPayloadAttribute>();
93  }
94  else
95  {
96  payloadAttribute = null;
97  }
98  }
99 
100  internal void WriteProx(FreqProxTermsWriter.PostingList p, int proxCode)
101  {
102  Payload payload;
103  if (payloadAttribute == null)
104  {
105  payload = null;
106  }
107  else
108  {
109  payload = payloadAttribute.Payload;
110  }
111 
112  if (payload != null && payload.internalLength > 0)
113  {
114  termsHashPerField.WriteVInt(1, (proxCode << 1) | 1);
115  termsHashPerField.WriteVInt(1, payload.internalLength);
116  termsHashPerField.WriteBytes(1, payload.data, payload.internalOffset, payload.internalLength);
117  hasPayloads = true;
118  }
119  else
120  termsHashPerField.WriteVInt(1, proxCode << 1);
121  p.lastPosition = fieldState.position;
122  }
123 
124  internal override void NewTerm(RawPostingList p0)
125  {
126  // First time we're seeing this term since the last
127  // flush
128  System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));
129  FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
130  p.lastDocID = docState.docID;
131  if (omitTermFreqAndPositions)
132  {
133  p.lastDocCode = docState.docID;
134  }
135  else
136  {
137  p.lastDocCode = docState.docID << 1;
138  p.docFreq = 1;
139  WriteProx(p, fieldState.position);
140  }
141  }
142 
143  internal override void AddTerm(RawPostingList p0)
144  {
145 
146  System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));
147 
148  FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
149 
150  System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0);
151 
152  if (omitTermFreqAndPositions)
153  {
154  if (docState.docID != p.lastDocID)
155  {
156  System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
157  termsHashPerField.WriteVInt(0, p.lastDocCode);
158  p.lastDocCode = docState.docID - p.lastDocID;
159  p.lastDocID = docState.docID;
160  }
161  }
162  else
163  {
164  if (docState.docID != p.lastDocID)
165  {
166  System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
167  // Term not yet seen in the current doc but previously
168  // seen in other doc(s) since the last flush
169 
170  // Now that we know doc freq for previous doc,
171  // write it & lastDocCode
172  if (1 == p.docFreq)
173  termsHashPerField.WriteVInt(0, p.lastDocCode | 1);
174  else
175  {
176  termsHashPerField.WriteVInt(0, p.lastDocCode);
177  termsHashPerField.WriteVInt(0, p.docFreq);
178  }
179  p.docFreq = 1;
180  p.lastDocCode = (docState.docID - p.lastDocID) << 1;
181  p.lastDocID = docState.docID;
182  WriteProx(p, fieldState.position);
183  }
184  else
185  {
186  p.docFreq++;
187  WriteProx(p, fieldState.position - p.lastPosition);
188  }
189  }
190  }
191 
192  public void Abort()
193  {
194  }
195  }
196 }