1 
2  /***************************************************************************/
3 
4 /*
5  * Portions Copyright (c) 1999 GMRS Software GmbH
6  * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
7  * All rights reserved.
8  *
9  * Author: Arno Unkrig <arno@unkrig.de>
10  */
11 
12 /* This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License in the file COPYING for more details.
21  */
22 
23  /***************************************************************************/
24 
25 /*
26  * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
27  * Dates and reasons of modifications:
28  * Sun Apr  7 11:54:06 CEST 2002: Make some closing tags optional
29  * Mon Jul 22 13:42:13 CEST 2002: Don't insert Paragraphs to other block elements
30  * Mon Aug 12 17:14:57 CEST 2002: Make even more closing tags optional
31  */
32 
33  /***************************************************************************/
34 
35 
36 %name HTMLParser
37 %define PURE
38 %define DEBUG 1
39 
40 %{
41 
42 /* ------------------------------------------------------------------------- */
43 
44 #ident "$Id: HTMLParser.y,v 1.14 1999/10/26 10:56:55 arno Exp $"
45 
46 #include "html.h"
47 #include "HTMLParser.h"
48 
49 // MIPS machines don't have "alloca()", so disable stack realloc'ing.
50 #ifdef mips
51 #define yyoverflow yyerror("parser stack overflow"), (void)
52 #endif
53 
54 /* ------------------------------------------------------------------------- */
55 
56 %}
57 
58 /* ------------------------------------------------------------------------- */
59 
60 %define LEX_BODY = 0
61 %define ERROR_BODY = 0
62 %define MEMBERS\
63   virtual ~HTMLParser(); \
64   virtual void process(const Document &) = 0;\
65   virtual bool read_cdata(const char *terminal, string *) = 0;\
66   int list_nesting;
67 %define CONSTRUCTOR_INIT : list_nesting(0)
68 
69 %union {
70   Document                   *document;
71   Element                    *element;
72   list<auto_ptr<Element> >   *element_list;
73   PCData                     *pcdata;
74   string                     *strinG;
75   list<TagAttribute>         *tag_attributes;
76   int                        inT;
77   list<auto_ptr<TableRow> >  *table_rows;
78   list<auto_ptr<TableCell> > *table_cells;
79   ListItem                   *list_item;
80   list<auto_ptr<ListItem> >  *list_items;
81   Caption                    *caption;
82   Heading                    *heading;
83   list<auto_ptr<Option> >    *option_list;
84   Option                     *option;
85   DefinitionList             *definition_list;
86   list<auto_ptr<DefinitionListItem> > *definition_list_item_list;
87   TermName                   *term_name;
88   TermDefinition             *term_definition;
89   Preformatted               *preformatted;
90   Address                    *address;
91   list<auto_ptr<list<TagAttribute> > > *tag_attributes_list;
92 }
93 
94 %type  <document>                 document_
95 %type  <pcdata>                   pcdata
96 %type  <pcdata>                   opt_pcdata
97 %type  <element_list>             body_content
98 %type  <heading>                  heading
99 %type  <heading>                  HX
100 %type  <inT>                      END_HX
101 %type  <element>                  block
102 %type  <element>                  block_except_p
103 %type  <element>                  text
104 %type  <element_list>             texts
105 %type  <element_list>             opt_texts
106 %type  <element>                  font
107 %type  <element>                  phrase
108 %type  <element>                  special
109 %type  <element>                  form
110 %type  <table_rows>               table_rows
111 %type  <table_cells>              table_cells
112 %type  <caption>                  caption
113 %type  <caption>                  opt_caption
114 %type  <element_list>             applet_content
115 %type  <definition_list>          definition_list
116 %type  <definition_list_item_list>definition_list_content
117 %type  <term_name>                term_name
118 %type  <term_definition>          term_definition
119 %type  <option_list>              select_content
120 %type  <option>                   option
121 %type  <element>                  list
122 %type  <list_items>               list_content
123 %type  <list_item>                list_item
124 %type  <preformatted>             preformatted
125 %type  <element_list>             opt_flow
126 %type  <element_list>             flow
127 %type  <element>                  flow_
128 %type  <element_list>             paragraph_content
129 %type  <address>                  address
130 %type  <tag_attributes_list>      map_content
131 
132 %type  <tag_attributes> opt_LI
133 %type  <tag_attributes> opt_P
134 
135 %token                  DOCTYPE
136 %token <strinG>         PCDATA
137 %token                  SCAN_ERROR
138 
139 
140 %token <tag_attributes> A
141 %token <tag_attributes> ADDRESS
142 %token <tag_attributes> APPLET
143 %token <tag_attributes> AREA
144 %token <tag_attributes> B
145 %token <tag_attributes> BASE
146 %token <tag_attributes> BASEFONT
147 %token <tag_attributes> BIG
148 %token <tag_attributes> BLOCKQUOTE
149 %token <tag_attributes> BODY
150 %token <tag_attributes> BR
151 %token <tag_attributes> CAPTION
152 %token <tag_attributes> CENTER
153 %token <tag_attributes> CITE
154 %token <tag_attributes> CODE
155 %token <tag_attributes> DD
156 %token <tag_attributes> DFN
157 %token <tag_attributes> DIR
158 %token <tag_attributes> DIV
159 %token <tag_attributes> DL
160 %token <tag_attributes> DT
161 %token <tag_attributes> EM
162 %token <tag_attributes> FONT
163 %token <tag_attributes> FORM
164 %token <tag_attributes> H1
165 %token <tag_attributes> H2
166 %token <tag_attributes> H3
167 %token <tag_attributes> H4
168 %token <tag_attributes> H5
169 %token <tag_attributes> H6
170 %token <tag_attributes> HEAD
171 %token <tag_attributes> HR
172 %token <tag_attributes> HTML
173 %token <tag_attributes> I
174 %token <tag_attributes> IMG
175 %token <tag_attributes> INPUT
176 %token <tag_attributes> ISINDEX
177 %token <tag_attributes> KBD
178 %token <tag_attributes> LI
179 %token <tag_attributes> LINK
180 %token <tag_attributes> MAP
181 %token <tag_attributes> MENU
182 %token <tag_attributes> META
183 %token <tag_attributes> NOBR
184 %token <tag_attributes> OL
185 %token <tag_attributes> OPTION
186 %token <tag_attributes> P
187 %token <tag_attributes> PARAM
188 %token <tag_attributes> PRE
189 %token <tag_attributes> SAMP
190 %token <tag_attributes> SCRIPT
191 %token <tag_attributes> SELECT
192 %token <tag_attributes> SMALL
193 %token <tag_attributes> STRIKE
194 %token <tag_attributes> STRONG
195 %token <tag_attributes> STYLE
196 %token <tag_attributes> SUB
197 %token <tag_attributes> SUP
198 %token <tag_attributes> TABLE
199 %token <tag_attributes> TD
200 %token <tag_attributes> TEXTAREA
201 %token <tag_attributes> TH
202 %token <tag_attributes> TITLE
203 %token <tag_attributes> TR
204 %token <tag_attributes> TT
205 %token <tag_attributes> U
206 %token <tag_attributes> UL
207 %token <tag_attributes> VAR
208 
209 %token                  END_A
210 %token                  END_ADDRESS
211 %token                  END_APPLET
212 %token                  END_B
213 %token                  END_BIG
214 %token                  END_BLOCKQUOTE
215 %token                  END_BODY
216 %token                  END_CAPTION
217 %token                  END_CENTER
218 %token                  END_CITE
219 %token                  END_CODE
220 %token                  END_DD
221 %token                  END_DFN
222 %token                  END_DIR
223 %token                  END_DIV
224 %token                  END_DL
225 %token                  END_DT
226 %token                  END_EM
227 %token                  END_FONT
228 %token                  END_FORM
229 %token                  END_H1
230 %token                  END_H2
231 %token                  END_H3
232 %token                  END_H4
233 %token                  END_H5
234 %token                  END_H6
235 %token                  END_HEAD
236 %token                  END_HTML
237 %token                  END_I
238 %token                  END_KBD
239 %token                  END_LI
240 %token                  END_MAP
241 %token                  END_MENU
242 %token                  END_NOBR
243 %token                  END_OL
244 %token                  END_OPTION
245 %token                  END_P
246 %token                  END_PRE
247 %token                  END_SAMP
248 %token                  END_SCRIPT
249 %token                  END_SELECT
250 %token                  END_SMALL
251 %token                  END_STRIKE
252 %token                  END_STRONG
253 %token                  END_STYLE
254 %token                  END_SUB
255 %token                  END_SUP
256 %token                  END_TABLE
257 %token                  END_TD
258 %token                  END_TEXTAREA
259 %token                  END_TH
260 %token                  END_TITLE
261 %token                  END_TR
262 %token                  END_TT
263 %token                  END_U
264 %token                  END_UL
265 %token                  END_VAR
266 
267 /* ------------------------------------------------------------------------- */
268 
269 %start document
270 
271 %% /* { */
272 
273 document:
274   document_ {
275     process(*$1);
276     delete $1;
277   }
278   ;
279 
280 /*
281  * Well... actually, an HTML document should look like
282  *
283  * <!DOCTYPE ...>
284  * <HTML>
285  *   <HEAD>
286  *   ...
287  *   </HEAD>
288  *   <BODY>
289  *   ...
290  *   </BODY>
291  * </HTML>
292  *
293  * but...
294  *
295  * (A) All seven tags are optional
296  * (B) The contents of the HEAD and the BODY section can be distinuished
297  * (C) Most people out there do not know which element to put before, into,
298  *     or after which section...
299  *
300  * so... let's just forget about the structure of an HTML document, discard
301  * the seven tags, and process the remainder as a series of sections.
302  */
303 
304 document_:
305   /* empty */ {
306     $$ = new Document;
307     $$->body.content.reset(new list<auto_ptr<Element> >);
308   }
309   | document_ error {
310     $$ = $1;
311   }
312   | document_ DOCTYPE {
313     $$ = $1;
314   }
315   | document_ HTML {
316     $$->attributes.reset($2);
317     $$ = $1;
318   }
319   | document_ END_HTML {
320     $$ = $1;
321   }
322   | document_ HEAD {
323     delete $2;
324     $$ = $1;
325   }
326   | document_ END_HEAD {
327     $$ = $1;
328   }
329   | document_ TITLE opt_pcdata opt_END_TITLE {
330     delete $2; // Ignore <TITLE> attributes
331     ($$ = $1)->head.title.reset($3);
332   }
333   | document_ ISINDEX {
334     ($$ = $1)->head.isindex_attributes.reset($2);
335   }
336   | document_ BASE {
337     ($$ = $1)->head.base_attributes.reset($2);
338   }
339   | document_ META {
340     ($$ = $1)->head.meta_attributes.reset($2);
341   }
342   | document_ LINK {
343     ($$ = $1)->head.link_attributes.reset($2);
344   }
345   | document_ SCRIPT {
346     auto_ptr<Script> s(new Script);
347     s->attributes.reset($2);
348     if (!read_cdata("</SCRIPT>", &s->text)) {
349       yyerror("CDATA terminal not found");
350     }
351     ($$ = $1)->head.scripts.push_back(s);
352   }
353   | document_ STYLE {
354     auto_ptr<Style> s(new Style);
355     s->attributes.reset($2);
356     if (!read_cdata("</STYLE>", &s->text)) {
357       yyerror("CDATA terminal not found");
358     }
359     ($$ = $1)->head.styles.push_back(s);
360   }
361   | document_ BODY {
362     delete $2;
363     $$ = $1;
364   }
365   | document_ END_BODY {
366     $$ = $1;
367   }
368   | document_ texts {
369     Paragraph *p = new Paragraph;
370     p->texts.reset($2);
371     ($$ = $1)->body.content->push_back(auto_ptr<Element>(p));
372   }
373   | document_ heading {
374     ($$ = $1)->body.content->push_back(auto_ptr<Element>($2));
375   }
376   | document_ block {
377     ($$ = $1)->body.content->push_back(auto_ptr<Element>($2));
378   }
379   | document_ address {
380     ($$ = $1)->body.content->push_back(auto_ptr<Element>($2));
381   }
382   ;
383 
384 pcdata:
385   PCDATA {
386     $$ = new PCData;
387     $$->text = *$1;
388     delete $1;
389   }
390   ;
391 
392 body_content:
393   /* empty */ {
394     $$ = new list<auto_ptr<Element> >;
395   }
396   | body_content error {
397     $$ = $1;
398   }
399   | body_content SCRIPT {
400     auto_ptr<Script> s(new Script);
401     s->attributes.reset($2);
402     if (!read_cdata("</SCRIPT>", &s->text)) {
403       yyerror("CDATA terminal not found");
404     }
405 //    ($$ = $1)->head.scripts.push_back(s);
406   }
407   | body_content STYLE {
408     auto_ptr<Style> s(new Style);
409     s->attributes.reset($2);
410     if (!read_cdata("</STYLE>", &s->text)) {
411       yyerror("CDATA terminal not found");
412     }
413 //    ($$ = $1)->head.styles.push_back(s);
414   }
415   | body_content texts {
416     Paragraph *p = new Paragraph;
417     p->texts = auto_ptr<list<auto_ptr<Element> > >($2);
418     ($$ = $1)->push_back(auto_ptr<Element>(p));
419   }
420   | body_content heading {
421     ($$ = $1)->push_back(auto_ptr<Element>($2));
422   }
423   | body_content block {
424     ($$ = $1)->push_back(auto_ptr<Element>($2));
425   }
426   | body_content address {
427     ($$ = $1)->push_back(auto_ptr<Element>($2));
428   }
429   ;
430 
431 heading:
432   HX paragraph_content END_HX {
433             /* EXTENSION: Allow paragraph content in heading, not only texts */
434     if ($1->level != $3) {
435       yyerror ("Levels of opening and closing headings don't match");
436     }
437     $$ = $1;
438     $$->content.reset($2);
439   }
440   ;
441 
442 block:
443   block_except_p {
444     $$ = $1;
445   }
446   | P paragraph_content opt_END_P {
447     Paragraph *p = new Paragraph;
448     p->attributes.reset($1);
449     p->texts.reset($2);
450     $$ = p;
451   }
452   ;
453 
454 paragraph_content:  /* EXTENSION: Allow blocks (except "<P>") in paragraphs. */
455   /* empty */ {
456     $$ = new list<auto_ptr<Element> >;
457   }
458   | paragraph_content error {
459     $$ = $1;
460   }
461   | paragraph_content texts {
462     $$ = $1;
463     $$->splice($$->end(), *$2);
464     delete $2;
465   }
466   | paragraph_content block_except_p {
467     ($$ = $1)->push_back(auto_ptr<Element>($2));
468   }
469   ;
470 
471 block_except_p:
472   list {
473     $$ = $1;
474   }
475   | preformatted {
476     $$ = $1;
477   }
478   | definition_list {
479     $$ = $1;
480   }
481   | DIV body_content opt_END_DIV {
482     Division *p = new Division;
483     p->attributes.reset($1);
484     p->body_content.reset($2);
485     $$ = p;
486   }
487   | CENTER body_content opt_END_CENTER {
488     Center *p = new Center;
489     delete $1;       // CENTER has no attributes.
490     p->body_content.reset($2);
491     $$ = p;
492   }
493   | BLOCKQUOTE body_content opt_END_BLOCKQUOTE {
494     delete $1; // BLOCKQUOTE has no attributes!
495     BlockQuote *bq = new BlockQuote;
496     bq->content.reset($2);
497     $$ = bq;
498   }
499   | FORM body_content opt_END_FORM {
500     Form *f = new Form;
501     f->attributes.reset($1);
502     f->content.reset($2);
503     $$ = f;
504   }
505   | HR {
506     HorizontalRule *h = new HorizontalRule;
507     h->attributes.reset($1);
508     $$ = h;
509   }
510   | TABLE opt_caption table_rows END_TABLE {
511     Table *t = new Table;
512     t->attributes.reset($1);
513     t->caption.reset($2);
514     t->rows.reset($3);
515     $$ = t;
516   }
517   ;
518 
519 list:
520   OL { ++list_nesting; } list_content END_OL {
521     OrderedList *ol = new OrderedList;
522     ol->attributes.reset($1);
523     ol->items.reset($3);
524     ol->nesting = --list_nesting;
525     $$ = ol;
526   }
527   | UL { ++list_nesting; } list_content opt_END_UL {
528     UnorderedList *ul = new UnorderedList;
529     ul->attributes.reset($1);
530     ul->items.reset($3);
531     ul->nesting = --list_nesting;
532     $$ = ul;
533   }
534   | DIR { ++list_nesting; } list_content END_DIR {
535     Dir *d = new Dir;
536     d->attributes.reset($1);
537     d->items.reset($3);
538     d->nesting = --list_nesting;
539     $$ = d;
540   }
541   | MENU { ++list_nesting; } list_content END_MENU {
542     Menu *m = new Menu;
543     m->attributes.reset($1);
544     m->items.reset($3);
545     m->nesting = --list_nesting;
546     $$ = m;
547   }
548   ;
549 
550 list_content:
551   /* empty */ {
552     $$ = 0;
553   }
554   | list_content error {
555     $$ = $1;
556   }
557   | list_content list_item {
558     $$ = $1 ? $1 : new list<auto_ptr<ListItem> >;
559     $$->push_back(auto_ptr<ListItem>($2));
560   }
561   ;
562 
563 list_item:
564   LI opt_flow opt_END_LI {
565     ListNormalItem *lni = new ListNormalItem;
566     lni->attributes.reset($1);
567     lni->flow.reset($2);
568     $$ = lni;
569   }
570   | block {   /* EXTENSION: Handle a "block" in a list as an indented block. */
571     ListBlockItem *lbi = new ListBlockItem;
572     lbi->block.reset($1);
573     $$ = lbi;
574   }
575   | texts {              /* EXTENSION: Treat "texts" in a list as an "<LI>". */
576     ListNormalItem *lni = new ListNormalItem;
577     lni->flow.reset($1);
578     $$ = lni;
579   }
580   ;
581 
582 definition_list:
583                                            /* EXTENSION: Allow nested <DL>s. */
584                                              /* EXTENSION: "</DL>" optional. */
585   DL opt_flow opt_error definition_list opt_END_DL {
586     delete $1;
587     delete $2; /* Kludge */
588     $$ = $4;
589   }
590                                  /* EXTENSION: Accept a "preamble" in the DL */
591   | DL opt_flow opt_error definition_list_content END_DL {
592     DefinitionList *dl = new DefinitionList;
593     dl->attributes.reset($1);
594     dl->preamble.reset($2);
595     dl->items.reset($4);
596     $$ = dl;
597   }
598   ;
599 
600 definition_list_content:
601   /* empty */ {
602     $$ = 0;
603   }
604   | definition_list_content {
605     $$ = $1;
606   }
607   | definition_list_content term_name {
608     $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >;
609     $$->push_back(auto_ptr<DefinitionListItem>($2));
610   }
611   | definition_list_content term_definition {
612     $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >;
613     $$->push_back(auto_ptr<DefinitionListItem>($2));
614   }
615   ;
616 
617 term_name:
618   DT opt_flow opt_error {      /* EXTENSION: Allow "flow" instead of "texts" */
619     delete $1;
620     $$ = new TermName;
621     $$->flow.reset($2);
622   }
623   | DT opt_flow END_DT opt_P opt_error {/* EXTENSION: Ignore <P> after </DT> */
624     delete $1;
625     delete $4;
626     $$ = new TermName;
627     $$->flow.reset($2);
628   }
629   ;
630 
631 term_definition:
632   DD opt_flow opt_error {
633     delete $1;
634     $$ = new TermDefinition;
635     $$->flow.reset($2);
636   }
637   | DD opt_flow END_DD opt_P opt_error {/* EXTENSION: Ignore <P> after </DD> */
638     delete $1;
639     delete $4;
640     $$ = new TermDefinition;
641     $$->flow.reset($2);
642   }
643   ;
644 
645 flow:
646   flow_ {
647     $$ = new list<auto_ptr<Element> >;
648     $$->push_back(auto_ptr<Element>($1));
649   }
650   | flow error {
651     $$ = $1;
652   }
653   | flow flow_ {
654     ($$ = $1)->push_back(auto_ptr<Element>($2));
655   }
656   ;
657 
658 flow_:
659   text {
660     $$ = $1;
661   }
662   | heading {          /* EXTENSION: Allow headings in "flow", i.e. in lists */
663     $$ = $1;
664   }
665   | block {
666     $$ = $1;
667   }
668   ;
669 
670 preformatted:
671   PRE opt_texts opt_END_PRE {
672     $$ = new Preformatted;
673     $$->attributes.reset($1);
674     $$->texts.reset($2);
675   }
676   ;
677 
678 caption:
679   CAPTION opt_texts END_CAPTION {
680     $$ = new Caption;
681     $$->attributes.reset($1);
682     $$->texts.reset($2);
683   }
684   ;
685 
686 table_rows:
687   /* empty */ {
688     $$ = new list<auto_ptr<TableRow> >;
689   }
690   | table_rows error {
691     $$ = $1;
692   }
693   | table_rows TR table_cells opt_END_TR {
694     TableRow *tr = new TableRow;
695     tr->attributes.reset($2);
696     tr->cells.reset($3);
697     ($$ = $1)->push_back(auto_ptr<TableRow>(tr));
698   }
699   ;
700 
701 table_cells:
702   /* empty */ {
703     $$ = new list<auto_ptr<TableCell> >;
704   }
705   | table_cells error {
706     $$ = $1;
707   }
708   | table_cells TD body_content opt_END_TD {
709     TableCell *tc = new TableCell;
710     tc->attributes.reset($2);
711     tc->content.reset($3);
712     ($$ = $1)->push_back(auto_ptr<TableCell>(tc));
713   }
714   | table_cells TH body_content opt_END_TH opt_END_TD {
715                             /* EXTENSION: Allow "</TD>" in place of "</TH>". */
716     TableHeadingCell *thc = new TableHeadingCell;
717     thc->attributes.reset($2);
718     thc->content.reset($3);
719     ($$ = $1)->push_back(auto_ptr<TableCell>(thc));
720   }
721   | table_cells INPUT {    /* EXTENSION: Ignore <INPUT> between table cells. */
722     delete $2;
723     $$ = $1;
724   }
725   ;
726 
727 address:
728   ADDRESS opt_texts END_ADDRESS { /* Should be "address_content"... */
729     delete $1;
730     $$ = new Address;
731     $$->content.reset($2);
732   }
733   ;
734 
735 /* ------------------------------------------------------------------------- */
736 
737 texts:
738   text {
739     $$ = new list<auto_ptr<Element> >;
740     $$->push_back(auto_ptr<Element>($1));
741   }
742   | texts text {
743     ($$ = $1)->push_back(auto_ptr<Element>($2));
744   }
745   ;
746 
747 text:
748   pcdata                    opt_error { $$ = $1; }
749   | font                    opt_error { $$ = $1; }
750   | phrase                  opt_error { $$ = $1; }
751   | special                 opt_error { $$ = $1; }
752   | form                    opt_error { $$ = $1; }
753   | NOBR opt_texts END_NOBR opt_error { /* EXTENSION: NS 1.1 / IE 2.0 */
754     NoBreak *nb = new NoBreak;
755     delete $1;
756     nb->content.reset($2);
757     $$ = nb;
758   }
759   ;
760 
761 font:
762   TT       opt_texts opt_END_TT     { delete $1; $$ = new Font(TT,     $2); }
763   | I      opt_texts opt_END_I      { delete $1; $$ = new Font(I,      $2); }
764   | B      opt_texts opt_END_B      { delete $1; $$ = new Font(B,      $2); }
765   | U      opt_texts opt_END_U      { delete $1; $$ = new Font(U,      $2); }
766   | STRIKE opt_texts opt_END_STRIKE { delete $1; $$ = new Font(STRIKE, $2); }
767   | BIG    opt_texts opt_END_BIG    { delete $1; $$ = new Font(BIG,    $2); }
768   | SMALL  opt_texts opt_END_SMALL  { delete $1; $$ = new Font(SMALL,  $2); }
769   | SUB    opt_texts opt_END_SUB    { delete $1; $$ = new Font(SUB,    $2); }
770   | SUP    opt_texts opt_END_SUP    { delete $1; $$ = new Font(SUP,    $2); }
771   ;
772 
773 phrase:
774   EM       opt_texts opt_END_EM     { delete $1; $$ = new Phrase(EM,     $2); }
775   | STRONG opt_texts opt_END_STRONG { delete $1; $$ = new Phrase(STRONG, $2); }
776   | DFN    opt_texts opt_END_DFN    { delete $1; $$ = new Phrase(DFN,    $2); }
777   | CODE   opt_texts opt_END_CODE   { delete $1; $$ = new Phrase(CODE,   $2); }
778   | SAMP   opt_texts opt_END_SAMP   { delete $1; $$ = new Phrase(SAMP,   $2); }
779   | KBD    opt_texts opt_END_KBD    { delete $1; $$ = new Phrase(KBD,    $2); }
780   | VAR    opt_texts opt_END_VAR    { delete $1; $$ = new Phrase(VAR,    $2); }
781   | CITE   opt_texts opt_END_CITE   { delete $1; $$ = new Phrase(CITE,   $2); }
782   ;
783 
784 special:
785                         /* EXTENSION: Allow "flow" in <A>, not only "texts". */
786                                  /* EXTENSION: Allow useless <LI> in anchor. */
787                                                /* EXTENSION: "</A>" optional.*/
788   A opt_LI opt_flow opt_END_A {
789     delete $2;
790     Anchor *a = new Anchor;
791     a->attributes.reset($1);
792     a->texts.reset($3);
793     $$ = a;
794   }
795   | IMG {
796     Image *i = new Image;
797     i->attributes.reset($1);
798     $$ = i;
799   }
800   | APPLET applet_content END_APPLET {
801     Applet *a = new Applet;
802     a->attributes.reset($1);
803     a->content.reset($2);
804     $$ = a;
805   }
806                    /* EXTENSION: "flow" in <FONT> allowed, not only "texts". */
807                                            /* EXTENSION: "</FONT>" optional. */
808   | FONT opt_flow opt_END_FONT {
809     Font2 *f2 = new Font2;
810     f2->attributes.reset($1);
811     f2->elements.reset($2);
812     $$ = f2;
813   }
814   | BASEFONT {
815     BaseFont *bf = new BaseFont;
816     bf->attributes.reset($1);
817     $$ = bf;
818   }
819   | BR {
820     LineBreak *lb = new LineBreak;
821     lb->attributes.reset($1);
822     $$ = lb;
823   }
824   | MAP map_content END_MAP {
825     Map *m = new Map;
826     m->attributes.reset($1);
827     m->areas.reset($2);
828     $$ = m;
829   }
830   ;
831 
832 applet_content:
833   /* empty */ {
834     $$ = 0;
835   }
836   | applet_content text {
837     $$ = $1 ? $1 : new list<auto_ptr<Element> >;
838     $$->push_back(auto_ptr<Element>($2));
839   }
840   | applet_content PARAM {
841     $$ = $1 ? $1 : new list<auto_ptr<Element> >;
842     Param *p = new Param;
843     p->attributes.reset($2);
844     $$->push_back(auto_ptr<Element>(p));
845   }
846   ;
847 
848 map_content:
849   /* empty */ {
850     $$ = 0;
851   }
852   | map_content error {
853     $$ = $1;
854   }
855   | map_content AREA {
856     $$ = $1 ? $1 : new list<auto_ptr<list<TagAttribute> > >;
857     $$->push_back(auto_ptr<list<TagAttribute> >($2));
858   }
859   ;
860 
861 form:
862   INPUT {
863     Input *i = new Input;
864     i->attributes.reset($1);
865     $$ = i;
866   }
867   | SELECT select_content END_SELECT {
868     Select *s = new Select;
869     s->attributes.reset($1);
870     s->content.reset($2);
871     $$ = s;
872   }
873   | TEXTAREA pcdata END_TEXTAREA {
874     TextArea *ta = new TextArea;
875     ta->attributes.reset($1);
876     ta->pcdata.reset($2);
877     $$ = ta;
878   }
879   ;
880 
881 select_content:
882   option {
883     $$ = new list<auto_ptr<Option> >;
884     $$->push_back(auto_ptr<Option>($1));
885   }
886   | select_content option {
887     ($$ = $1)->push_back(auto_ptr<Option>($2));
888   }
889   ;
890 
891 option:
892   OPTION pcdata opt_END_OPTION {
893     $$ = new Option;
894     $$->attributes.reset($1);
895     $$->pcdata.reset($2);
896   }
897   ;
898 
899 /* ------------------------------------------------------------------------- */
900 
901 HX:
902   H1   { $$ = new Heading; $$->level = 1; $$->attributes.reset($1); }
903   | H2 { $$ = new Heading; $$->level = 2; $$->attributes.reset($1); }
904   | H3 { $$ = new Heading; $$->level = 3; $$->attributes.reset($1); }
905   | H4 { $$ = new Heading; $$->level = 4; $$->attributes.reset($1); }
906   | H5 { $$ = new Heading; $$->level = 5; $$->attributes.reset($1); }
907   | H6 { $$ = new Heading; $$->level = 6; $$->attributes.reset($1); }
908   ;
909 
910 END_HX:
911   END_H1   { $$ = 1; }
912   | END_H2 { $$ = 2; }
913   | END_H3 { $$ = 3; }
914   | END_H4 { $$ = 4; }
915   | END_H5 { $$ = 5; }
916   | END_H6 { $$ = 6; }
917   ;
918 
919 /* ------------------------------------------------------------------------- */
920 
921 opt_pcdata:     /* empty */ { $$ = 0; } | pcdata  { $$ = $1; };
922 opt_caption:    /* empty */ { $$ = 0; } | caption { $$ = $1; };
923 opt_texts:      /* empty */ { $$ = 0; } | texts   { $$ = $1; };
924 opt_flow:       /* empty */ { $$ = 0; } | flow    { $$ = $1; };
925 
926 opt_LI:         /* empty */ { $$ = 0; } | LI      { $$ = $1; };
927 opt_P:          /* empty */ { $$ = 0; } | P       { $$ = $1; };
928 
929 opt_END_A:      /* empty */ | END_A;
930 opt_END_B:      /* empty */ | END_B;
931 opt_END_BLOCKQUOTE: /* empty */ | END_BLOCKQUOTE;
932 opt_END_BIG:    /* empty */ | END_BIG;
933 opt_END_CENTER: /* empty */ | END_CENTER;
934 opt_END_CITE:   /* empty */ | END_CITE;
935 opt_END_CODE:   /* empty */ | END_CODE;
936 opt_END_DFN:    /* empty */ | END_DFN;
937 opt_END_DIV:    /* empty */ | END_DIV;
938 opt_END_DL:     /* empty */ | END_DL;
939 opt_END_EM:     /* empty */ | END_EM;
940 opt_END_FONT:   /* empty */ | END_FONT;
941 opt_END_FORM:   /* empty */ | END_FORM;
942 opt_END_I:      /* empty */ | END_I;
943 opt_END_KBD:    /* empty */ | END_KBD;
944 opt_END_LI:     /* empty */ | END_LI;
945 opt_END_OPTION: /* empty */ | END_OPTION;
946 opt_END_P:      /* empty */ | END_P;
947 opt_END_PRE:    /* empty */ | END_PRE;
948 opt_END_SAMP:   /* empty */ | END_SAMP;
949 opt_END_SMALL:  /* empty */ | END_SMALL;
950 opt_END_STRIKE: /* empty */ | END_STRIKE;
951 opt_END_STRONG: /* empty */ | END_STRONG;
952 opt_END_SUB:    /* empty */ | END_SUB;
953 opt_END_SUP:    /* empty */ | END_SUP;
954 opt_END_TD:     /* empty */ | END_TD;
955 opt_END_TH:     /* empty */ | END_TH;
956 opt_END_TITLE:  /* empty */ | END_TITLE;
957 opt_END_TR:     /* empty */ | END_TR;
958 opt_END_TT:     /* empty */ | END_TT;
959 opt_END_U:      /* empty */ | END_U;
960 opt_END_UL:     /* empty */ | END_UL;
961 opt_END_VAR:    /* empty */ | END_VAR;
962 
963 opt_error:      /* empty */ | error;
964 
965 %% /* } */
966 
967 /*
968  * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual
969  * methods of a class are inline or pure virtual, so we define the destructor,
970  * which is the only virtual method, non-inline, although it is empty.
971  */
972 
973 HTMLParser::~HTMLParser()
974 {
975 }
976 
977 /* ------------------------------------------------------------------------- */
978 
979