Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
TermsHash.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 using System;
19 using System.Collections.Generic;
20 using ArrayUtil = Lucene.Net.Util.ArrayUtil;
21 
22 namespace Lucene.Net.Index
23 {
24 
25  /// <summary>This class implements <see cref="InvertedDocConsumer" />, which
26  /// is passed each token produced by the analyzer on each
27  /// field. It stores these tokens in a hash table, and
28  /// allocates separate byte streams per token. Consumers of
29  /// this class, eg <see cref="FreqProxTermsWriter" /> and <see cref="TermVectorsTermsWriter" />
30  ///, write their own byte streams
31  /// under each term.
32  /// </summary>
34  {
35 
36  internal TermsHashConsumer consumer;
37  internal TermsHash nextTermsHash;
38  internal int bytesPerPosting;
39  internal int postingsFreeChunk;
40  internal DocumentsWriter docWriter;
41  private RawPostingList[] postingsFreeList = new RawPostingList[1];
42  private int postingsFreeCount;
43  private int postingsAllocCount;
44  internal bool trackAllocations;
45 
46  public TermsHash(DocumentsWriter docWriter, bool trackAllocations, TermsHashConsumer consumer, TermsHash nextTermsHash)
47  {
48  this.docWriter = docWriter;
49  this.consumer = consumer;
50  this.nextTermsHash = nextTermsHash;
51  this.trackAllocations = trackAllocations;
52 
53  // Why + 4*POINTER_NUM_BYTE below?
54  // +1: Posting is referenced by postingsFreeList array
55  // +3: Posting is referenced by hash, which
56  // targets 25-50% fill factor; approximate this
57  // as 3X # pointers
58  bytesPerPosting = consumer.BytesPerPosting() + 4 * DocumentsWriter.POINTER_NUM_BYTE;
59  postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting);
60  }
61 
62  internal override InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread)
63  {
64  return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
65  }
66 
67  internal TermsHashPerThread AddThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread)
68  {
69  return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
70  }
71 
72  internal override void SetFieldInfos(FieldInfos fieldInfos)
73  {
74  this.fieldInfos = fieldInfos;
75  consumer.SetFieldInfos(fieldInfos);
76  }
77 
78  // NOTE: do not make this sync'd; it's not necessary (DW
79  // ensures all other threads are idle), and it leads to
80  // deadlock
81  public override void Abort()
82  {
83  consumer.Abort();
84  if (nextTermsHash != null)
85  nextTermsHash.Abort();
86  }
87 
88  internal void ShrinkFreePostings(IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state)
89  {
90 
91  System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);
92 
93  int newSize = 1;
94  if (newSize != postingsFreeList.Length)
95  {
96  if (postingsFreeCount > newSize)
97  {
98  if (trackAllocations)
99  {
100  docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting);
101  }
102  postingsFreeCount = newSize;
103  postingsAllocCount = newSize;
104  }
105 
106  RawPostingList[] newArray = new RawPostingList[newSize];
107  Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
108  postingsFreeList = newArray;
109  }
110  }
111 
112  internal override void CloseDocStore(SegmentWriteState state)
113  {
114  lock (this)
115  {
116  consumer.CloseDocStore(state);
117  if (nextTermsHash != null)
118  nextTermsHash.CloseDocStore(state);
119  }
120  }
121 
122  internal override void Flush(IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state)
123  {
124  lock (this)
125  {
126  var childThreadsAndFields = new Dictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>>();
127  Dictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> nextThreadsAndFields;
128 
129  if (nextTermsHash != null)
130  {
131  nextThreadsAndFields = new Dictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>>();
132  }
133  else
134  nextThreadsAndFields = null;
135 
136  foreach (var entry in threadsAndFields)
137  {
138  TermsHashPerThread perThread = (TermsHashPerThread) entry.Key;
139 
140  ICollection<InvertedDocConsumerPerField> fields = entry.Value;
141 
142  var fieldsIt = fields.GetEnumerator();
143  ICollection<TermsHashConsumerPerField> childFields = new HashSet<TermsHashConsumerPerField>();
144  ICollection<InvertedDocConsumerPerField> nextChildFields;
145 
146  if (nextTermsHash != null)
147  {
148  nextChildFields = new HashSet<InvertedDocConsumerPerField>();
149  }
150  else
151  nextChildFields = null;
152 
153  while (fieldsIt.MoveNext())
154  {
155  TermsHashPerField perField = (TermsHashPerField) fieldsIt.Current;
156  childFields.Add(perField.consumer);
157  if (nextTermsHash != null)
158  nextChildFields.Add(perField.nextPerField);
159  }
160 
161  childThreadsAndFields[perThread.consumer] = childFields;
162  if (nextTermsHash != null)
163  nextThreadsAndFields[perThread.nextPerThread] = nextChildFields;
164  }
165 
166  consumer.Flush(childThreadsAndFields, state);
167 
168  ShrinkFreePostings(threadsAndFields, state);
169 
170  if (nextTermsHash != null)
171  nextTermsHash.Flush(nextThreadsAndFields, state);
172  }
173  }
174 
175  public override bool FreeRAM()
176  {
177  if (!trackAllocations)
178  return false;
179 
180  bool any;
181  long bytesFreed = 0;
182  lock (this)
183  {
184  int numToFree;
185  if (postingsFreeCount >= postingsFreeChunk)
186  numToFree = postingsFreeChunk;
187  else
188  numToFree = postingsFreeCount;
189  any = numToFree > 0;
190  if (any)
191  {
192  for (int i = postingsFreeCount - numToFree; i < postingsFreeCount; i++)
193  {
194  postingsFreeList[i] = null;
195  }
196  //Arrays.fill(postingsFreeList, postingsFreeCount - numToFree, postingsFreeCount, null);
197  postingsFreeCount -= numToFree;
198  postingsAllocCount -= numToFree;
199  bytesFreed = -numToFree * bytesPerPosting;
200  any = true;
201  }
202  }
203 
204  if (any)
205  {
206  docWriter.BytesAllocated(bytesFreed);
207  }
208 
209  if (nextTermsHash != null)
210  any |= nextTermsHash.FreeRAM();
211 
212  return any;
213  }
214 
215  public void RecyclePostings(RawPostingList[] postings, int numPostings)
216  {
217  lock (this)
218  {
219 
220  System.Diagnostics.Debug.Assert(postings.Length >= numPostings);
221 
222  // Move all Postings from this ThreadState back to our
223  // free list. We pre-allocated this array while we were
224  // creating Postings to make sure it's large enough
225  System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length);
226  Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
227  postingsFreeCount += numPostings;
228  }
229  }
230 
231  public void GetPostings(RawPostingList[] postings)
232  {
233  lock (this)
234  {
235 
236  System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start"));
237 
238  System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length);
239  System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount);
240 
241  int numToCopy;
242  if (postingsFreeCount < postings.Length)
243  numToCopy = postingsFreeCount;
244  else
245  numToCopy = postings.Length;
246  int start = postingsFreeCount - numToCopy;
247  System.Diagnostics.Debug.Assert(start >= 0);
248  System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length);
249  System.Diagnostics.Debug.Assert(numToCopy <= postings.Length);
250  Array.Copy(postingsFreeList, start, postings, 0, numToCopy);
251 
252  // Directly allocate the remainder if any
253  if (numToCopy != postings.Length)
254  {
255  int extra = postings.Length - numToCopy;
256  int newPostingsAllocCount = postingsAllocCount + extra;
257 
258  consumer.CreatePostings(postings, numToCopy, extra);
259  System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create"));
260  postingsAllocCount += extra;
261 
262  if (trackAllocations)
263  docWriter.BytesAllocated(extra * bytesPerPosting);
264 
265  if (newPostingsAllocCount > postingsFreeList.Length)
266  // Pre-allocate the postingsFreeList so it's large
267  // enough to hold all postings we've given out
268  postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)];
269  }
270 
271  postingsFreeCount -= numToCopy;
272 
273  if (trackAllocations)
274  docWriter.BytesUsed(postings.Length * bytesPerPosting);
275  }
276  }
277  }
278 }