Lucene.Net  3.0.3
Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Pages
HTMLParser.cs
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /* Generated By:JavaCC: Do not edit this line. HTMLParser.java */
19 
20 using System;
21 using Lucene.Net.Support;
22 
23 namespace Lucene.Net.Demo.Html
24 {
25 #pragma warning disable 162,164
26 
28  {
29  private void InitBlock()
30  {
31  jj_2_rtns = new JJCalls[2];
32  jj_ls = new LookaheadSuccess();
33  }
34  public static int SUMMARY_LENGTH = 200;
35 
36  internal System.Text.StringBuilder title = new System.Text.StringBuilder(SUMMARY_LENGTH);
37  internal System.Text.StringBuilder summary = new System.Text.StringBuilder(SUMMARY_LENGTH * 2);
38  internal System.Collections.Specialized.NameValueCollection metaTags = new System.Collections.Specialized.NameValueCollection();
39  internal System.String currentMetaTag = null;
40  internal System.String currentMetaContent = null;
41  internal int length = 0;
42  internal bool titleComplete = false;
43  internal bool summaryComplete = false;
44  internal bool inTitle = false;
45  internal bool inMetaTag = false;
46  internal bool inStyle = false;
47  internal bool afterTag = false;
48  internal bool afterSpace = false;
49  internal System.String eol = System.Environment.NewLine;
50  internal System.IO.StreamReader pipeIn = null;
51  internal System.IO.StreamWriter pipeOut;
52  private MyPipedInputStream pipeInStream = null;
53  private System.IO.StreamWriter pipeOutStream = null;
54 
55  private class MyPipedInputStream : System.IO.MemoryStream
56  {
57  long _readPtr = 0;
58  long _writePtr = 0;
59 
60  public System.IO.Stream BaseStream
61  {
62  get
63  {
64  return this;
65  }
66  }
67 
68  public override int Read(byte[] buffer, int offset, int count)
69  {
70  lock (this)
71  {
72  base.Seek(_readPtr, System.IO.SeekOrigin.Begin);
73  int x = base.Read(buffer, offset, count);
74  _readPtr += x;
75  return x;
76  }
77  }
78 
79  public override void Write(byte[] buffer, int offset, int count)
80  {
81  lock (this)
82  {
83  base.Seek(_writePtr, System.IO.SeekOrigin.Begin);
84  base.Write(buffer, offset, count);
85  _writePtr += count;
86  }
87  }
88 
89  public override void Close()
90  {
91 
92  }
93 
94  public virtual bool Full()
95  {
96  return false;
97  }
98  }
99 
100  /// <deprecated> Use HTMLParser(FileInputStream) instead
101  /// </deprecated>
102  public HTMLParser(System.IO.FileInfo file):this(new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read))
103  {
104  }
105 
106  public virtual System.String GetTitle()
107  {
108  if (pipeIn == null)
109  GetReader(); // spawn parsing thread
110  while (true)
111  {
112  lock (this)
113  {
114  if (titleComplete || pipeInStream.Full())
115  break;
116  System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
117  }
118  }
119  return title.ToString().Trim();
120  }
121 
122  public virtual System.Collections.Specialized.NameValueCollection GetMetaTags()
123  {
124  if (pipeIn == null)
125  GetReader(); // spawn parsing thread
126  while (true)
127  {
128  lock (this)
129  {
130  if (titleComplete || pipeInStream.Full())
131  break;
132  System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
133  }
134  }
135  return metaTags;
136  }
137 
138 
139  public virtual System.String GetSummary()
140  {
141  if (pipeIn == null)
142  GetReader(); // spawn parsing thread
143  while (true)
144  {
145  lock (this)
146  {
147  if (summary.Length >= SUMMARY_LENGTH || pipeInStream.Full())
148  break;
149  System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
150  }
151  }
152  if (summary.Length > SUMMARY_LENGTH)
153  summary.Length = SUMMARY_LENGTH;
154 
155  System.String sum = summary.ToString().Trim();
156  System.String tit = GetTitle();
157  if (sum.StartsWith(tit) || sum.Equals(""))
158  return tit;
159  else
160  return sum;
161  }
162 
163  public virtual System.IO.StreamReader GetReader()
164  {
165  if (pipeIn == null)
166  {
167  pipeInStream = new MyPipedInputStream();
168  pipeOutStream = new System.IO.StreamWriter(pipeInStream.BaseStream);
169  pipeIn = new System.IO.StreamReader(pipeInStream.BaseStream, System.Text.Encoding.GetEncoding("UTF-16BE"));
170  pipeOut = new System.IO.StreamWriter(pipeOutStream.BaseStream, System.Text.Encoding.GetEncoding("UTF-16BE"));
171 
172  ThreadClass thread = new ParserThread(this);
173  thread.Start(); // start parsing
174  }
175 
176  return pipeIn;
177  }
178 
179  internal virtual void AddToSummary(System.String text)
180  {
181  if (summary.Length < SUMMARY_LENGTH)
182  {
183  summary.Append(text);
184  if (summary.Length >= SUMMARY_LENGTH)
185  {
186  lock (this)
187  {
188  summaryComplete = true;
189  System.Threading.Monitor.PulseAll(this);
190  }
191  }
192  }
193  }
194 
195  internal virtual void AddText(System.String text)
196  {
197  if (inStyle)
198  return ;
199  if (inTitle)
200  title.Append(text);
201  else
202  {
203  AddToSummary(text);
204  if (!titleComplete && !(title.Length == 0))
205  {
206  // finished title
207  lock (this)
208  {
209  titleComplete = true; // tell waiting threads
210  System.Threading.Monitor.PulseAll(this);
211  }
212  }
213  }
214 
215  length += text.Length;
216  pipeOut.Write(text);
217 
218  afterSpace = false;
219  }
220 
221  internal virtual void AddMetaTag()
222  {
223  metaTags[currentMetaTag] = currentMetaContent;
224  currentMetaTag = null;
225  currentMetaContent = null;
226  return ;
227  }
228 
229  internal virtual void AddSpace()
230  {
231  if (!afterSpace)
232  {
233  if (inTitle)
234  title.Append(" ");
235  else
236  AddToSummary(" ");
237 
238  System.String space = afterTag?eol:" ";
239  length += space.Length;
240  pipeOut.Write(space);
241  afterSpace = true;
242  }
243  }
244 
245  public void HTMLDocument()
246  {
247  Token t;
248  while (true)
249  {
250  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
251  {
252 
253  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ScriptStart:
254  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.TagName:
255  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.DeclName:
256  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment1:
257  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment2:
258  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Word:
259  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Entity:
260  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Space:
261  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Punct:
262  ;
263  break;
264 
265  default:
266  jj_la1[0] = jj_gen;
267  goto label_1_brk;
268 
269  }
270  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
271  {
272 
273  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.TagName:
274  Tag();
275  afterTag = true;
276  break;
277 
278  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.DeclName:
279  t = Decl();
280  afterTag = true;
281  break;
282 
283  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment1:
284  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment2:
285  CommentTag();
286  afterTag = true;
287  break;
288 
289  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ScriptStart:
290  ScriptTag();
291  afterTag = true;
292  break;
293 
294  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Word:
295  t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Word);
296  AddText(t.image); afterTag = false;
297  break;
298 
299  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Entity:
300  t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Entity);
301  AddText(Entities.Decode(t.image)); afterTag = false;
302  break;
303 
304  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Punct:
305  t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Punct);
306  AddText(t.image); afterTag = false;
307  break;
308 
309  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Space:
310  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Space);
311  AddSpace(); afterTag = false;
312  break;
313 
314  default:
315  jj_la1[1] = jj_gen;
316  Jj_consume_token(- 1);
317  throw new ParseException();
318 
319  }
320  }
321 
322 label_1_brk: ;
323 
324  Jj_consume_token(0);
325  }
326 
327  public void Tag()
328  {
329  Token t1, t2;
330  bool inImg = false;
331  t1 = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.TagName);
332  System.String tagName = t1.image.ToLower();
333  if (Tags.WS_ELEMS.Contains(tagName))
334  {
335  AddSpace();
336  }
337  inTitle = tagName.ToUpper().Equals("<title".ToUpper()); // keep track if in <TITLE>
338  inMetaTag = tagName.ToUpper().Equals("<META".ToUpper()); // keep track if in <META>
339  inStyle = tagName.ToUpper().Equals("<STYLE".ToUpper()); // keep track if in <STYLE>
340  inImg = tagName.ToUpper().Equals("<img".ToUpper()); // keep track if in <IMG>
341 
342  while (true)
343  {
344  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
345  {
346 
347  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgName:
348  ;
349  break;
350 
351  default:
352  jj_la1[2] = jj_gen;
353  goto label_2_brk;
354 
355  }
356  t1 = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgName);
357  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
358  {
359 
360  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgEquals:
361  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgEquals);
362  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
363  {
364 
365  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgValue:
366  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote1:
367  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote2:
368  t2 = ArgValue();
369  if (inImg && t1.image.ToUpper().Equals("alt".ToUpper()) && t2 != null)
370  AddText("[" + t2.image + "]");
371 
372  if (inMetaTag && (t1.image.ToUpper().Equals("name".ToUpper()) || t1.image.ToUpper().Equals("HTTP-EQUIV".ToUpper())) && t2 != null)
373  {
374  currentMetaTag = t2.image.ToLower();
375  if (currentMetaTag != null && currentMetaContent != null)
376  {
377  AddMetaTag();
378  }
379  }
380  if (inMetaTag && t1.image.ToUpper().Equals("content".ToUpper()) && t2 != null)
381  {
382  currentMetaContent = t2.image.ToLower();
383  if (currentMetaTag != null && currentMetaContent != null)
384  {
385  AddMetaTag();
386  }
387  }
388  break;
389 
390  default:
391  jj_la1[3] = jj_gen;
392  ;
393  break;
394 
395  }
396  break;
397 
398  default:
399  jj_la1[4] = jj_gen;
400  ;
401  break;
402 
403  }
404  }
405 
406 label_2_brk: ;
407 
408  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.TagEnd);
409  }
410 
411  public Token ArgValue()
412  {
413  Token t = null;
414  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
415  {
416 
417  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgValue:
418  t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgValue);
419  {
420  if (true)
421  return t;
422  }
423  break;
424 
425  default:
426  jj_la1[5] = jj_gen;
427  if (Jj_2_1(2))
428  {
429  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote1);
430  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CloseQuote1);
431  {
432  if (true)
433  return t;
434  }
435  }
436  else
437  {
438  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
439  {
440 
441  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote1:
442  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote1);
443  t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Quote1Text);
444  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CloseQuote1);
445  {
446  if (true)
447  return t;
448  }
449  break;
450 
451  default:
452  jj_la1[6] = jj_gen;
453  if (Jj_2_2(2))
454  {
455  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote2);
456  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CloseQuote2);
457  {
458  if (true)
459  return t;
460  }
461  }
462  else
463  {
464  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
465  {
466 
467  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote2:
468  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote2);
469  t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Quote2Text);
470  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CloseQuote2);
471  {
472  if (true)
473  return t;
474  }
475  break;
476 
477  default:
478  jj_la1[7] = jj_gen;
479  Jj_consume_token(- 1);
480  throw new ParseException();
481 
482  }
483  }
484  break;
485 
486  }
487  }
488  break;
489 
490  }
491  throw new System.ApplicationException("Missing return statement in function");
492  }
493 
494  public Token Decl()
495  {
496  Token t;
497  t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.DeclName);
498  while (true)
499  {
500  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
501  {
502 
503  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgName:
504  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgEquals:
505  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgValue:
506  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote1:
507  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote2:
508  ;
509  break;
510 
511  default:
512  jj_la1[8] = jj_gen;
513  goto label_3_brk;
514 
515  }
516  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
517  {
518 
519  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgName:
520  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgName);
521  break;
522 
523  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgValue:
524  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote1:
525  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote2:
526  ArgValue();
527  break;
528 
529  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgEquals:
530  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgEquals);
531  break;
532 
533  default:
534  jj_la1[9] = jj_gen;
535  Jj_consume_token(- 1);
536  throw new ParseException();
537 
538  }
539  }
540 
541 label_3_brk: ;
542 
543  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.TagEnd);
544  {
545  if (true)
546  return t;
547  }
548  throw new System.ApplicationException("Missing return statement in function");
549  }
550 
551  public void CommentTag()
552  {
553  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
554  {
555 
556  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment1:
557  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment1);
558  while (true)
559  {
560  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
561  {
562 
563  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CommentText1:
564  ;
565  break;
566 
567  default:
568  jj_la1[10] = jj_gen;
569  goto label_4_brk;
570 
571  }
572  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CommentText1);
573  }
574 
575 label_4_brk: ;
576 
577  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CommentEnd1);
578  break;
579 
580  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment2:
581  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.Comment2);
582  while (true)
583  {
584  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
585  {
586 
587  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CommentText2:
588  ;
589  break;
590 
591  default:
592  jj_la1[11] = jj_gen;
593  goto label_5_brk;
594 
595  }
596  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CommentText2);
597  }
598 
599 label_5_brk: ;
600 
601  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CommentEnd2);
602  break;
603 
604  default:
605  jj_la1[12] = jj_gen;
606  Jj_consume_token(- 1);
607  throw new ParseException();
608 
609  }
610  }
611 
612  public void ScriptTag()
613  {
614  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ScriptStart);
615  while (true)
616  {
617  switch ((jj_ntk == - 1)?Jj_ntk():jj_ntk)
618  {
619 
620  case Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ScriptText:
621  ;
622  break;
623 
624  default:
625  jj_la1[13] = jj_gen;
626  goto label_6_brk;
627 
628  }
629  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ScriptText);
630  }
631 
632 label_6_brk: ;
633 
634  Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ScriptEnd);
635  }
636 
637  private bool Jj_2_1(int xla)
638  {
639  jj_la = xla; jj_lastpos = jj_scanpos = token;
640  try
641  {
642  return !Jj_3_1();
643  }
644  catch (LookaheadSuccess ls)
645  {
646  return true;
647  }
648  finally
649  {
650  Jj_save(0, xla);
651  }
652  }
653 
654  private bool Jj_2_2(int xla)
655  {
656  jj_la = xla; jj_lastpos = jj_scanpos = token;
657  try
658  {
659  return !Jj_3_2();
660  }
661  catch (LookaheadSuccess ls)
662  {
663  return true;
664  }
665  finally
666  {
667  Jj_save(1, xla);
668  }
669  }
670 
671  private bool Jj_3_1()
672  {
673  if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote1))
674  return true;
675  if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CloseQuote1))
676  return true;
677  return false;
678  }
679 
680  private bool Jj_3_2()
681  {
682  if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.ArgQuote2))
683  return true;
684  if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants_Fields.CloseQuote2))
685  return true;
686  return false;
687  }
688 
690  internal SimpleCharStream jj_input_stream;
691  public Token token, jj_nt;
692  private int jj_ntk;
693  private Token jj_scanpos, jj_lastpos;
694  private int jj_la;
695  public bool lookingAhead = false;
696  private bool jj_semLA;
697  private int jj_gen;
698  private int[] jj_la1 = new int[14];
699  private static int[] jj_la1_0;
700  private static void Jj_la1_0()
701  {
702  jj_la1_0 = new int[]{0x2c7e, 0x2c7e, 0x10000, 0x380000, 0x20000, 0x80000, 0x100000, 0x200000, 0x3b0000, 0x3b0000, 0x8000000, 0x20000000, 0x30, 0x4000};
703  }
704  private JJCalls[] jj_2_rtns;
705  private bool jj_rescan = false;
706  private int jj_gc = 0;
707 
708  public HTMLParser(System.IO.Stream stream):this(stream, null)
709  {
710  }
711  public HTMLParser(System.IO.Stream stream, System.String encoding)
712  {
713  InitBlock();
714  try
715  {
716  jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1);
717  }
718  catch (System.IO.IOException e)
719  {
720  throw new System.Exception(e.Message, e);
721  }
722  token_source = new HTMLParserTokenManager(jj_input_stream);
723  token = new Token();
724  jj_ntk = - 1;
725  jj_gen = 0;
726  for (int i = 0; i < 14; i++)
727  jj_la1[i] = - 1;
728  for (int i = 0; i < jj_2_rtns.Length; i++)
729  jj_2_rtns[i] = new JJCalls();
730  }
731 
732  public virtual void ReInit(System.IO.Stream stream)
733  {
734  ReInit(stream, null);
735  }
736  public virtual void ReInit(System.IO.Stream stream, System.String encoding)
737  {
738  try
739  {
740  jj_input_stream.ReInit(stream, encoding, 1, 1);
741  }
742  catch (System.IO.IOException e)
743  {
744  throw new System.Exception(e.Message, e);
745  }
746  token_source.ReInit(jj_input_stream);
747  token = new Token();
748  jj_ntk = - 1;
749  jj_gen = 0;
750  for (int i = 0; i < 14; i++)
751  jj_la1[i] = - 1;
752  for (int i = 0; i < jj_2_rtns.Length; i++)
753  jj_2_rtns[i] = new JJCalls();
754  }
755 
756  public HTMLParser(System.IO.StreamReader stream)
757  {
758  InitBlock();
759  jj_input_stream = new SimpleCharStream(stream, 1, 1);
760  token_source = new HTMLParserTokenManager(jj_input_stream);
761  token = new Token();
762  jj_ntk = - 1;
763  jj_gen = 0;
764  for (int i = 0; i < 14; i++)
765  jj_la1[i] = - 1;
766  for (int i = 0; i < jj_2_rtns.Length; i++)
767  jj_2_rtns[i] = new JJCalls();
768  }
769 
770  public virtual void ReInit(System.IO.StreamReader stream)
771  {
772  jj_input_stream.ReInit(stream, 1, 1);
773  token_source.ReInit(jj_input_stream);
774  token = new Token();
775  jj_ntk = - 1;
776  jj_gen = 0;
777  for (int i = 0; i < 14; i++)
778  jj_la1[i] = - 1;
779  for (int i = 0; i < jj_2_rtns.Length; i++)
780  jj_2_rtns[i] = new JJCalls();
781  }
782 
784  {
785  InitBlock();
786  token_source = tm;
787  token = new Token();
788  jj_ntk = - 1;
789  jj_gen = 0;
790  for (int i = 0; i < 14; i++)
791  jj_la1[i] = - 1;
792  for (int i = 0; i < jj_2_rtns.Length; i++)
793  jj_2_rtns[i] = new JJCalls();
794  }
795 
796  public virtual void ReInit(HTMLParserTokenManager tm)
797  {
798  token_source = tm;
799  token = new Token();
800  jj_ntk = - 1;
801  jj_gen = 0;
802  for (int i = 0; i < 14; i++)
803  jj_la1[i] = - 1;
804  for (int i = 0; i < jj_2_rtns.Length; i++)
805  jj_2_rtns[i] = new JJCalls();
806  }
807 
808  private Token Jj_consume_token(int kind)
809  {
810  Token oldToken;
811  if ((oldToken = token).next != null)
812  token = token.next;
813  else
814  token = token.next = token_source.GetNextToken();
815  jj_ntk = - 1;
816  if (token.kind == kind)
817  {
818  jj_gen++;
819  if (++jj_gc > 100)
820  {
821  jj_gc = 0;
822  for (int i = 0; i < jj_2_rtns.Length; i++)
823  {
824  JJCalls c = jj_2_rtns[i];
825  while (c != null)
826  {
827  if (c.gen < jj_gen)
828  c.first = null;
829  c = c.next;
830  }
831  }
832  }
833  return token;
834  }
835  token = oldToken;
836  jj_kind = kind;
837  throw GenerateParseException();
838  }
839 
840  [Serializable]
841  private sealed class LookaheadSuccess:System.ApplicationException
842  {
843  }
844 
845  private LookaheadSuccess jj_ls;
846  private bool Jj_scan_token(int kind)
847  {
848  if (jj_scanpos == jj_lastpos)
849  {
850  jj_la--;
851  if (jj_scanpos.next == null)
852  {
853  jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.GetNextToken();
854  }
855  else
856  {
857  jj_lastpos = jj_scanpos = jj_scanpos.next;
858  }
859  }
860  else
861  {
862  jj_scanpos = jj_scanpos.next;
863  }
864  if (jj_rescan)
865  {
866  int i = 0; Token tok = token;
867  while (tok != null && tok != jj_scanpos)
868  {
869  i++; tok = tok.next;
870  }
871  if (tok != null)
872  Jj_add_error_token(kind, i);
873  }
874  if (jj_scanpos.kind != kind)
875  return true;
876  if (jj_la == 0 && jj_scanpos == jj_lastpos)
877  throw jj_ls;
878  return false;
879  }
880 
881  public Token GetNextToken()
882  {
883  if (token.next != null)
884  token = token.next;
885  else
886  token = token.next = token_source.GetNextToken();
887  jj_ntk = - 1;
888  jj_gen++;
889  return token;
890  }
891 
892  public Token GetToken(int index)
893  {
894  Token t = lookingAhead?jj_scanpos:token;
895  for (int i = 0; i < index; i++)
896  {
897  if (t.next != null)
898  t = t.next;
899  else
900  t = t.next = token_source.GetNextToken();
901  }
902  return t;
903  }
904 
905  private int Jj_ntk()
906  {
907  if ((jj_nt = token.next) == null)
908  return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
909  else
910  return (jj_ntk = jj_nt.kind);
911  }
912 
913  private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
914  private int[] jj_expentry;
915  private int jj_kind = - 1;
916  private int[] jj_lasttokens = new int[100];
917  private int jj_endpos;
918 
919  private void Jj_add_error_token(int kind, int pos)
920  {
921  if (pos >= 100)
922  return ;
923  if (pos == jj_endpos + 1)
924  {
925  jj_lasttokens[jj_endpos++] = kind;
926  }
927  else if (jj_endpos != 0)
928  {
929  jj_expentry = new int[jj_endpos];
930  for (int i = 0; i < jj_endpos; i++)
931  {
932  jj_expentry[i] = jj_lasttokens[i];
933  }
934  bool exists = false;
935  for (System.Collections.IEnumerator e = jj_expentries.GetEnumerator(); e.MoveNext(); )
936  {
937  int[] oldentry = (int[]) (e.Current);
938  if (oldentry.Length == jj_expentry.Length)
939  {
940  exists = true;
941  for (int i = 0; i < jj_expentry.Length; i++)
942  {
943  if (oldentry[i] != jj_expentry[i])
944  {
945  exists = false;
946  break;
947  }
948  }
949  if (exists)
950  break;
951  }
952  }
953  if (!exists)
954  jj_expentries.Add(jj_expentry);
955  if (pos != 0)
956  jj_lasttokens[(jj_endpos = pos) - 1] = kind;
957  }
958  }
959 
960  public virtual ParseException GenerateParseException()
961  {
962  jj_expentries.Clear();
963  bool[] la1tokens = new bool[31];
964  for (int i = 0; i < 31; i++)
965  {
966  la1tokens[i] = false;
967  }
968  if (jj_kind >= 0)
969  {
970  la1tokens[jj_kind] = true;
971  jj_kind = - 1;
972  }
973  for (int i = 0; i < 14; i++)
974  {
975  if (jj_la1[i] == jj_gen)
976  {
977  for (int j = 0; j < 32; j++)
978  {
979  if ((jj_la1_0[i] & (1 << j)) != 0)
980  {
981  la1tokens[j] = true;
982  }
983  }
984  }
985  }
986  for (int i = 0; i < 31; i++)
987  {
988  if (la1tokens[i])
989  {
990  jj_expentry = new int[1];
991  jj_expentry[0] = i;
992  jj_expentries.Add(jj_expentry);
993  }
994  }
995  jj_endpos = 0;
996  Jj_rescan_token();
997  Jj_add_error_token(0, 0);
998  int[][] exptokseq = new int[jj_expentries.Count][];
999  for (int i = 0; i < jj_expentries.Count; i++)
1000  {
1001  exptokseq[i] = (int[]) jj_expentries[i];
1002  }
1003  return new ParseException(token, exptokseq, Lucene.Net.Demo.Html.HTMLParserConstants_Fields.tokenImage);
1004  }
1005 
1006  public void Enable_tracing()
1007  {
1008  }
1009 
1010  public void Disable_tracing()
1011  {
1012  }
1013 
1014  private void Jj_rescan_token()
1015  {
1016  jj_rescan = true;
1017  for (int i = 0; i < 2; i++)
1018  {
1019  try
1020  {
1021  JJCalls p = jj_2_rtns[i];
1022  do
1023  {
1024  if (p.gen > jj_gen)
1025  {
1026  jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
1027  switch (i)
1028  {
1029 
1030  case 0: Jj_3_1(); break;
1031 
1032  case 1: Jj_3_2(); break;
1033  }
1034  }
1035  p = p.next;
1036  }
1037  while (p != null);
1038  }
1039  catch (LookaheadSuccess ls)
1040  {
1041  }
1042  }
1043  jj_rescan = false;
1044  }
1045 
1046  private void Jj_save(int index, int xla)
1047  {
1048  JJCalls p = jj_2_rtns[index];
1049  while (p.gen > jj_gen)
1050  {
1051  if (p.next == null)
1052  {
1053  p = p.next = new JJCalls(); break;
1054  }
1055  p = p.next;
1056  }
1057  p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
1058  }
1059 
1060  internal sealed class JJCalls
1061  {
1062  internal int gen;
1063  internal Token first;
1064  internal int arg;
1065  internal JJCalls next;
1066  }
1067 
1068  // void handleException(Exception e) {
1069  // System.out.println(e.toString()); // print the error message
1070  // System.out.println("Skipping...");
1071  // Token t;
1072  // do {
1073  // t = getNextToken();
1074  // } while (t.kind != TagEnd);
1075  // }
1076  static HTMLParser()
1077  {
1078  {
1079  Jj_la1_0();
1080  }
1081  }
1082  }
1083 }