1 2 /***************************************************************************/ 3 4 /* 5 * Portions Copyright (c) 1999 GMRS Software GmbH 6 * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de 7 * All rights reserved. 8 * 9 * Author: Arno Unkrig <arno@unkrig.de> 10 */ 11 12 /* This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2 of the License, or 15 * (at your option) any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License in the file COPYING for more details. 21 */ 22 23 /***************************************************************************/ 24 25 /* 26 * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> 27 * Dates and reasons of modifications: 28 * Sun Apr 7 11:54:06 CEST 2002: Make some closing tags optional 29 * Mon Jul 22 13:42:13 CEST 2002: Don't insert Paragraphs to other block elements 30 * Mon Aug 12 17:14:57 CEST 2002: Make even more closing tags optional 31 */ 32 33 /***************************************************************************/ 34 35 36 %name HTMLParser 37 %define PURE 38 %define DEBUG 1 39 40 %{ 41 42 /* ------------------------------------------------------------------------- */ 43 44 #ident "$Id: HTMLParser.y,v 1.14 1999/10/26 10:56:55 arno Exp $" 45 46 #include "html.h" 47 #include "HTMLParser.h" 48 49 // MIPS machines don't have "alloca()", so disable stack realloc'ing. 50 #ifdef mips 51 #define yyoverflow yyerror("parser stack overflow"), (void) 52 #endif 53 54 /* ------------------------------------------------------------------------- */ 55 56 %} 57 58 /* ------------------------------------------------------------------------- */ 59 60 %define LEX_BODY = 0 61 %define ERROR_BODY = 0 62 %define MEMBERS\ 63 virtual ~HTMLParser(); \ 64 virtual void process(const Document &) = 0;\ 65 virtual bool read_cdata(const char *terminal, string *) = 0;\ 66 int list_nesting; 67 %define CONSTRUCTOR_INIT : list_nesting(0) 68 69 %union { 70 Document *document; 71 Element *element; 72 list<auto_ptr<Element> > *element_list; 73 PCData *pcdata; 74 string *strinG; 75 list<TagAttribute> *tag_attributes; 76 int inT; 77 list<auto_ptr<TableRow> > *table_rows; 78 list<auto_ptr<TableCell> > *table_cells; 79 ListItem *list_item; 80 list<auto_ptr<ListItem> > *list_items; 81 Caption *caption; 82 Heading *heading; 83 list<auto_ptr<Option> > *option_list; 84 Option *option; 85 DefinitionList *definition_list; 86 list<auto_ptr<DefinitionListItem> > *definition_list_item_list; 87 TermName *term_name; 88 TermDefinition *term_definition; 89 Preformatted *preformatted; 90 Address *address; 91 list<auto_ptr<list<TagAttribute> > > *tag_attributes_list; 92 } 93 94 %type <document> document_ 95 %type <pcdata> pcdata 96 %type <pcdata> opt_pcdata 97 %type <element_list> body_content 98 %type <heading> heading 99 %type <heading> HX 100 %type <inT> END_HX 101 %type <element> block 102 %type <element> block_except_p 103 %type <element> text 104 %type <element_list> texts 105 %type <element_list> opt_texts 106 %type <element> font 107 %type <element> phrase 108 %type <element> special 109 %type <element> form 110 %type <table_rows> table_rows 111 %type <table_cells> table_cells 112 %type <caption> caption 113 %type <caption> opt_caption 114 %type <element_list> applet_content 115 %type <definition_list> definition_list 116 %type <definition_list_item_list>definition_list_content 117 %type <term_name> term_name 118 %type <term_definition> term_definition 119 %type <option_list> select_content 120 %type <option> option 121 %type <element> list 122 %type <list_items> list_content 123 %type <list_item> list_item 124 %type <preformatted> preformatted 125 %type <element_list> opt_flow 126 %type <element_list> flow 127 %type <element> flow_ 128 %type <element_list> paragraph_content 129 %type <address> address 130 %type <tag_attributes_list> map_content 131 132 %type <tag_attributes> opt_LI 133 %type <tag_attributes> opt_P 134 135 %token DOCTYPE 136 %token <strinG> PCDATA 137 %token SCAN_ERROR 138 139 140 %token <tag_attributes> A 141 %token <tag_attributes> ADDRESS 142 %token <tag_attributes> APPLET 143 %token <tag_attributes> AREA 144 %token <tag_attributes> B 145 %token <tag_attributes> BASE 146 %token <tag_attributes> BASEFONT 147 %token <tag_attributes> BIG 148 %token <tag_attributes> BLOCKQUOTE 149 %token <tag_attributes> BODY 150 %token <tag_attributes> BR 151 %token <tag_attributes> CAPTION 152 %token <tag_attributes> CENTER 153 %token <tag_attributes> CITE 154 %token <tag_attributes> CODE 155 %token <tag_attributes> DD 156 %token <tag_attributes> DFN 157 %token <tag_attributes> DIR 158 %token <tag_attributes> DIV 159 %token <tag_attributes> DL 160 %token <tag_attributes> DT 161 %token <tag_attributes> EM 162 %token <tag_attributes> FONT 163 %token <tag_attributes> FORM 164 %token <tag_attributes> H1 165 %token <tag_attributes> H2 166 %token <tag_attributes> H3 167 %token <tag_attributes> H4 168 %token <tag_attributes> H5 169 %token <tag_attributes> H6 170 %token <tag_attributes> HEAD 171 %token <tag_attributes> HR 172 %token <tag_attributes> HTML 173 %token <tag_attributes> I 174 %token <tag_attributes> IMG 175 %token <tag_attributes> INPUT 176 %token <tag_attributes> ISINDEX 177 %token <tag_attributes> KBD 178 %token <tag_attributes> LI 179 %token <tag_attributes> LINK 180 %token <tag_attributes> MAP 181 %token <tag_attributes> MENU 182 %token <tag_attributes> META 183 %token <tag_attributes> NOBR 184 %token <tag_attributes> OL 185 %token <tag_attributes> OPTION 186 %token <tag_attributes> P 187 %token <tag_attributes> PARAM 188 %token <tag_attributes> PRE 189 %token <tag_attributes> SAMP 190 %token <tag_attributes> SCRIPT 191 %token <tag_attributes> SELECT 192 %token <tag_attributes> SMALL 193 %token <tag_attributes> STRIKE 194 %token <tag_attributes> STRONG 195 %token <tag_attributes> STYLE 196 %token <tag_attributes> SUB 197 %token <tag_attributes> SUP 198 %token <tag_attributes> TABLE 199 %token <tag_attributes> TD 200 %token <tag_attributes> TEXTAREA 201 %token <tag_attributes> TH 202 %token <tag_attributes> TITLE 203 %token <tag_attributes> TR 204 %token <tag_attributes> TT 205 %token <tag_attributes> U 206 %token <tag_attributes> UL 207 %token <tag_attributes> VAR 208 209 %token END_A 210 %token END_ADDRESS 211 %token END_APPLET 212 %token END_B 213 %token END_BIG 214 %token END_BLOCKQUOTE 215 %token END_BODY 216 %token END_CAPTION 217 %token END_CENTER 218 %token END_CITE 219 %token END_CODE 220 %token END_DD 221 %token END_DFN 222 %token END_DIR 223 %token END_DIV 224 %token END_DL 225 %token END_DT 226 %token END_EM 227 %token END_FONT 228 %token END_FORM 229 %token END_H1 230 %token END_H2 231 %token END_H3 232 %token END_H4 233 %token END_H5 234 %token END_H6 235 %token END_HEAD 236 %token END_HTML 237 %token END_I 238 %token END_KBD 239 %token END_LI 240 %token END_MAP 241 %token END_MENU 242 %token END_NOBR 243 %token END_OL 244 %token END_OPTION 245 %token END_P 246 %token END_PRE 247 %token END_SAMP 248 %token END_SCRIPT 249 %token END_SELECT 250 %token END_SMALL 251 %token END_STRIKE 252 %token END_STRONG 253 %token END_STYLE 254 %token END_SUB 255 %token END_SUP 256 %token END_TABLE 257 %token END_TD 258 %token END_TEXTAREA 259 %token END_TH 260 %token END_TITLE 261 %token END_TR 262 %token END_TT 263 %token END_U 264 %token END_UL 265 %token END_VAR 266 267 /* ------------------------------------------------------------------------- */ 268 269 %start document 270 271 %% /* { */ 272 273 document: 274 document_ { 275 process(*$1); 276 delete $1; 277 } 278 ; 279 280 /* 281 * Well... actually, an HTML document should look like 282 * 283 * <!DOCTYPE ...> 284 * <HTML> 285 * <HEAD> 286 * ... 287 * </HEAD> 288 * <BODY> 289 * ... 290 * </BODY> 291 * </HTML> 292 * 293 * but... 294 * 295 * (A) All seven tags are optional 296 * (B) The contents of the HEAD and the BODY section can be distinuished 297 * (C) Most people out there do not know which element to put before, into, 298 * or after which section... 299 * 300 * so... let's just forget about the structure of an HTML document, discard 301 * the seven tags, and process the remainder as a series of sections. 302 */ 303 304 document_: 305 /* empty */ { 306 $$ = new Document; 307 $$->body.content.reset(new list<auto_ptr<Element> >); 308 } 309 | document_ error { 310 $$ = $1; 311 } 312 | document_ DOCTYPE { 313 $$ = $1; 314 } 315 | document_ HTML { 316 $$->attributes.reset($2); 317 $$ = $1; 318 } 319 | document_ END_HTML { 320 $$ = $1; 321 } 322 | document_ HEAD { 323 delete $2; 324 $$ = $1; 325 } 326 | document_ END_HEAD { 327 $$ = $1; 328 } 329 | document_ TITLE opt_pcdata opt_END_TITLE { 330 delete $2; // Ignore <TITLE> attributes 331 ($$ = $1)->head.title.reset($3); 332 } 333 | document_ ISINDEX { 334 ($$ = $1)->head.isindex_attributes.reset($2); 335 } 336 | document_ BASE { 337 ($$ = $1)->head.base_attributes.reset($2); 338 } 339 | document_ META { 340 ($$ = $1)->head.meta_attributes.reset($2); 341 } 342 | document_ LINK { 343 ($$ = $1)->head.link_attributes.reset($2); 344 } 345 | document_ SCRIPT { 346 auto_ptr<Script> s(new Script); 347 s->attributes.reset($2); 348 if (!read_cdata("</SCRIPT>", &s->text)) { 349 yyerror("CDATA terminal not found"); 350 } 351 ($$ = $1)->head.scripts.push_back(s); 352 } 353 | document_ STYLE { 354 auto_ptr<Style> s(new Style); 355 s->attributes.reset($2); 356 if (!read_cdata("</STYLE>", &s->text)) { 357 yyerror("CDATA terminal not found"); 358 } 359 ($$ = $1)->head.styles.push_back(s); 360 } 361 | document_ BODY { 362 delete $2; 363 $$ = $1; 364 } 365 | document_ END_BODY { 366 $$ = $1; 367 } 368 | document_ texts { 369 Paragraph *p = new Paragraph; 370 p->texts.reset($2); 371 ($$ = $1)->body.content->push_back(auto_ptr<Element>(p)); 372 } 373 | document_ heading { 374 ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); 375 } 376 | document_ block { 377 ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); 378 } 379 | document_ address { 380 ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); 381 } 382 ; 383 384 pcdata: 385 PCDATA { 386 $$ = new PCData; 387 $$->text = *$1; 388 delete $1; 389 } 390 ; 391 392 body_content: 393 /* empty */ { 394 $$ = new list<auto_ptr<Element> >; 395 } 396 | body_content error { 397 $$ = $1; 398 } 399 | body_content SCRIPT { 400 auto_ptr<Script> s(new Script); 401 s->attributes.reset($2); 402 if (!read_cdata("</SCRIPT>", &s->text)) { 403 yyerror("CDATA terminal not found"); 404 } 405 // ($$ = $1)->head.scripts.push_back(s); 406 } 407 | body_content STYLE { 408 auto_ptr<Style> s(new Style); 409 s->attributes.reset($2); 410 if (!read_cdata("</STYLE>", &s->text)) { 411 yyerror("CDATA terminal not found"); 412 } 413 // ($$ = $1)->head.styles.push_back(s); 414 } 415 | body_content texts { 416 Paragraph *p = new Paragraph; 417 p->texts = auto_ptr<list<auto_ptr<Element> > >($2); 418 ($$ = $1)->push_back(auto_ptr<Element>(p)); 419 } 420 | body_content heading { 421 ($$ = $1)->push_back(auto_ptr<Element>($2)); 422 } 423 | body_content block { 424 ($$ = $1)->push_back(auto_ptr<Element>($2)); 425 } 426 | body_content address { 427 ($$ = $1)->push_back(auto_ptr<Element>($2)); 428 } 429 ; 430 431 heading: 432 HX paragraph_content END_HX { 433 /* EXTENSION: Allow paragraph content in heading, not only texts */ 434 if ($1->level != $3) { 435 yyerror ("Levels of opening and closing headings don't match"); 436 } 437 $$ = $1; 438 $$->content.reset($2); 439 } 440 ; 441 442 block: 443 block_except_p { 444 $$ = $1; 445 } 446 | P paragraph_content opt_END_P { 447 Paragraph *p = new Paragraph; 448 p->attributes.reset($1); 449 p->texts.reset($2); 450 $$ = p; 451 } 452 ; 453 454 paragraph_content: /* EXTENSION: Allow blocks (except "<P>") in paragraphs. */ 455 /* empty */ { 456 $$ = new list<auto_ptr<Element> >; 457 } 458 | paragraph_content error { 459 $$ = $1; 460 } 461 | paragraph_content texts { 462 $$ = $1; 463 $$->splice($$->end(), *$2); 464 delete $2; 465 } 466 | paragraph_content block_except_p { 467 ($$ = $1)->push_back(auto_ptr<Element>($2)); 468 } 469 ; 470 471 block_except_p: 472 list { 473 $$ = $1; 474 } 475 | preformatted { 476 $$ = $1; 477 } 478 | definition_list { 479 $$ = $1; 480 } 481 | DIV body_content opt_END_DIV { 482 Division *p = new Division; 483 p->attributes.reset($1); 484 p->body_content.reset($2); 485 $$ = p; 486 } 487 | CENTER body_content opt_END_CENTER { 488 Center *p = new Center; 489 delete $1; // CENTER has no attributes. 490 p->body_content.reset($2); 491 $$ = p; 492 } 493 | BLOCKQUOTE body_content opt_END_BLOCKQUOTE { 494 delete $1; // BLOCKQUOTE has no attributes! 495 BlockQuote *bq = new BlockQuote; 496 bq->content.reset($2); 497 $$ = bq; 498 } 499 | FORM body_content opt_END_FORM { 500 Form *f = new Form; 501 f->attributes.reset($1); 502 f->content.reset($2); 503 $$ = f; 504 } 505 | HR { 506 HorizontalRule *h = new HorizontalRule; 507 h->attributes.reset($1); 508 $$ = h; 509 } 510 | TABLE opt_caption table_rows END_TABLE { 511 Table *t = new Table; 512 t->attributes.reset($1); 513 t->caption.reset($2); 514 t->rows.reset($3); 515 $$ = t; 516 } 517 ; 518 519 list: 520 OL { ++list_nesting; } list_content END_OL { 521 OrderedList *ol = new OrderedList; 522 ol->attributes.reset($1); 523 ol->items.reset($3); 524 ol->nesting = --list_nesting; 525 $$ = ol; 526 } 527 | UL { ++list_nesting; } list_content opt_END_UL { 528 UnorderedList *ul = new UnorderedList; 529 ul->attributes.reset($1); 530 ul->items.reset($3); 531 ul->nesting = --list_nesting; 532 $$ = ul; 533 } 534 | DIR { ++list_nesting; } list_content END_DIR { 535 Dir *d = new Dir; 536 d->attributes.reset($1); 537 d->items.reset($3); 538 d->nesting = --list_nesting; 539 $$ = d; 540 } 541 | MENU { ++list_nesting; } list_content END_MENU { 542 Menu *m = new Menu; 543 m->attributes.reset($1); 544 m->items.reset($3); 545 m->nesting = --list_nesting; 546 $$ = m; 547 } 548 ; 549 550 list_content: 551 /* empty */ { 552 $$ = 0; 553 } 554 | list_content error { 555 $$ = $1; 556 } 557 | list_content list_item { 558 $$ = $1 ? $1 : new list<auto_ptr<ListItem> >; 559 $$->push_back(auto_ptr<ListItem>($2)); 560 } 561 ; 562 563 list_item: 564 LI opt_flow opt_END_LI { 565 ListNormalItem *lni = new ListNormalItem; 566 lni->attributes.reset($1); 567 lni->flow.reset($2); 568 $$ = lni; 569 } 570 | block { /* EXTENSION: Handle a "block" in a list as an indented block. */ 571 ListBlockItem *lbi = new ListBlockItem; 572 lbi->block.reset($1); 573 $$ = lbi; 574 } 575 | texts { /* EXTENSION: Treat "texts" in a list as an "<LI>". */ 576 ListNormalItem *lni = new ListNormalItem; 577 lni->flow.reset($1); 578 $$ = lni; 579 } 580 ; 581 582 definition_list: 583 /* EXTENSION: Allow nested <DL>s. */ 584 /* EXTENSION: "</DL>" optional. */ 585 DL opt_flow opt_error definition_list opt_END_DL { 586 delete $1; 587 delete $2; /* Kludge */ 588 $$ = $4; 589 } 590 /* EXTENSION: Accept a "preamble" in the DL */ 591 | DL opt_flow opt_error definition_list_content END_DL { 592 DefinitionList *dl = new DefinitionList; 593 dl->attributes.reset($1); 594 dl->preamble.reset($2); 595 dl->items.reset($4); 596 $$ = dl; 597 } 598 ; 599 600 definition_list_content: 601 /* empty */ { 602 $$ = 0; 603 } 604 | definition_list_content { 605 $$ = $1; 606 } 607 | definition_list_content term_name { 608 $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >; 609 $$->push_back(auto_ptr<DefinitionListItem>($2)); 610 } 611 | definition_list_content term_definition { 612 $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >; 613 $$->push_back(auto_ptr<DefinitionListItem>($2)); 614 } 615 ; 616 617 term_name: 618 DT opt_flow opt_error { /* EXTENSION: Allow "flow" instead of "texts" */ 619 delete $1; 620 $$ = new TermName; 621 $$->flow.reset($2); 622 } 623 | DT opt_flow END_DT opt_P opt_error {/* EXTENSION: Ignore <P> after </DT> */ 624 delete $1; 625 delete $4; 626 $$ = new TermName; 627 $$->flow.reset($2); 628 } 629 ; 630 631 term_definition: 632 DD opt_flow opt_error { 633 delete $1; 634 $$ = new TermDefinition; 635 $$->flow.reset($2); 636 } 637 | DD opt_flow END_DD opt_P opt_error {/* EXTENSION: Ignore <P> after </DD> */ 638 delete $1; 639 delete $4; 640 $$ = new TermDefinition; 641 $$->flow.reset($2); 642 } 643 ; 644 645 flow: 646 flow_ { 647 $$ = new list<auto_ptr<Element> >; 648 $$->push_back(auto_ptr<Element>($1)); 649 } 650 | flow error { 651 $$ = $1; 652 } 653 | flow flow_ { 654 ($$ = $1)->push_back(auto_ptr<Element>($2)); 655 } 656 ; 657 658 flow_: 659 text { 660 $$ = $1; 661 } 662 | heading { /* EXTENSION: Allow headings in "flow", i.e. in lists */ 663 $$ = $1; 664 } 665 | block { 666 $$ = $1; 667 } 668 ; 669 670 preformatted: 671 PRE opt_texts opt_END_PRE { 672 $$ = new Preformatted; 673 $$->attributes.reset($1); 674 $$->texts.reset($2); 675 } 676 ; 677 678 caption: 679 CAPTION opt_texts END_CAPTION { 680 $$ = new Caption; 681 $$->attributes.reset($1); 682 $$->texts.reset($2); 683 } 684 ; 685 686 table_rows: 687 /* empty */ { 688 $$ = new list<auto_ptr<TableRow> >; 689 } 690 | table_rows error { 691 $$ = $1; 692 } 693 | table_rows TR table_cells opt_END_TR { 694 TableRow *tr = new TableRow; 695 tr->attributes.reset($2); 696 tr->cells.reset($3); 697 ($$ = $1)->push_back(auto_ptr<TableRow>(tr)); 698 } 699 ; 700 701 table_cells: 702 /* empty */ { 703 $$ = new list<auto_ptr<TableCell> >; 704 } 705 | table_cells error { 706 $$ = $1; 707 } 708 | table_cells TD body_content opt_END_TD { 709 TableCell *tc = new TableCell; 710 tc->attributes.reset($2); 711 tc->content.reset($3); 712 ($$ = $1)->push_back(auto_ptr<TableCell>(tc)); 713 } 714 | table_cells TH body_content opt_END_TH opt_END_TD { 715 /* EXTENSION: Allow "</TD>" in place of "</TH>". */ 716 TableHeadingCell *thc = new TableHeadingCell; 717 thc->attributes.reset($2); 718 thc->content.reset($3); 719 ($$ = $1)->push_back(auto_ptr<TableCell>(thc)); 720 } 721 | table_cells INPUT { /* EXTENSION: Ignore <INPUT> between table cells. */ 722 delete $2; 723 $$ = $1; 724 } 725 ; 726 727 address: 728 ADDRESS opt_texts END_ADDRESS { /* Should be "address_content"... */ 729 delete $1; 730 $$ = new Address; 731 $$->content.reset($2); 732 } 733 ; 734 735 /* ------------------------------------------------------------------------- */ 736 737 texts: 738 text { 739 $$ = new list<auto_ptr<Element> >; 740 $$->push_back(auto_ptr<Element>($1)); 741 } 742 | texts text { 743 ($$ = $1)->push_back(auto_ptr<Element>($2)); 744 } 745 ; 746 747 text: 748 pcdata opt_error { $$ = $1; } 749 | font opt_error { $$ = $1; } 750 | phrase opt_error { $$ = $1; } 751 | special opt_error { $$ = $1; } 752 | form opt_error { $$ = $1; } 753 | NOBR opt_texts END_NOBR opt_error { /* EXTENSION: NS 1.1 / IE 2.0 */ 754 NoBreak *nb = new NoBreak; 755 delete $1; 756 nb->content.reset($2); 757 $$ = nb; 758 } 759 ; 760 761 font: 762 TT opt_texts opt_END_TT { delete $1; $$ = new Font(TT, $2); } 763 | I opt_texts opt_END_I { delete $1; $$ = new Font(I, $2); } 764 | B opt_texts opt_END_B { delete $1; $$ = new Font(B, $2); } 765 | U opt_texts opt_END_U { delete $1; $$ = new Font(U, $2); } 766 | STRIKE opt_texts opt_END_STRIKE { delete $1; $$ = new Font(STRIKE, $2); } 767 | BIG opt_texts opt_END_BIG { delete $1; $$ = new Font(BIG, $2); } 768 | SMALL opt_texts opt_END_SMALL { delete $1; $$ = new Font(SMALL, $2); } 769 | SUB opt_texts opt_END_SUB { delete $1; $$ = new Font(SUB, $2); } 770 | SUP opt_texts opt_END_SUP { delete $1; $$ = new Font(SUP, $2); } 771 ; 772 773 phrase: 774 EM opt_texts opt_END_EM { delete $1; $$ = new Phrase(EM, $2); } 775 | STRONG opt_texts opt_END_STRONG { delete $1; $$ = new Phrase(STRONG, $2); } 776 | DFN opt_texts opt_END_DFN { delete $1; $$ = new Phrase(DFN, $2); } 777 | CODE opt_texts opt_END_CODE { delete $1; $$ = new Phrase(CODE, $2); } 778 | SAMP opt_texts opt_END_SAMP { delete $1; $$ = new Phrase(SAMP, $2); } 779 | KBD opt_texts opt_END_KBD { delete $1; $$ = new Phrase(KBD, $2); } 780 | VAR opt_texts opt_END_VAR { delete $1; $$ = new Phrase(VAR, $2); } 781 | CITE opt_texts opt_END_CITE { delete $1; $$ = new Phrase(CITE, $2); } 782 ; 783 784 special: 785 /* EXTENSION: Allow "flow" in <A>, not only "texts". */ 786 /* EXTENSION: Allow useless <LI> in anchor. */ 787 /* EXTENSION: "</A>" optional.*/ 788 A opt_LI opt_flow opt_END_A { 789 delete $2; 790 Anchor *a = new Anchor; 791 a->attributes.reset($1); 792 a->texts.reset($3); 793 $$ = a; 794 } 795 | IMG { 796 Image *i = new Image; 797 i->attributes.reset($1); 798 $$ = i; 799 } 800 | APPLET applet_content END_APPLET { 801 Applet *a = new Applet; 802 a->attributes.reset($1); 803 a->content.reset($2); 804 $$ = a; 805 } 806 /* EXTENSION: "flow" in <FONT> allowed, not only "texts". */ 807 /* EXTENSION: "</FONT>" optional. */ 808 | FONT opt_flow opt_END_FONT { 809 Font2 *f2 = new Font2; 810 f2->attributes.reset($1); 811 f2->elements.reset($2); 812 $$ = f2; 813 } 814 | BASEFONT { 815 BaseFont *bf = new BaseFont; 816 bf->attributes.reset($1); 817 $$ = bf; 818 } 819 | BR { 820 LineBreak *lb = new LineBreak; 821 lb->attributes.reset($1); 822 $$ = lb; 823 } 824 | MAP map_content END_MAP { 825 Map *m = new Map; 826 m->attributes.reset($1); 827 m->areas.reset($2); 828 $$ = m; 829 } 830 ; 831 832 applet_content: 833 /* empty */ { 834 $$ = 0; 835 } 836 | applet_content text { 837 $$ = $1 ? $1 : new list<auto_ptr<Element> >; 838 $$->push_back(auto_ptr<Element>($2)); 839 } 840 | applet_content PARAM { 841 $$ = $1 ? $1 : new list<auto_ptr<Element> >; 842 Param *p = new Param; 843 p->attributes.reset($2); 844 $$->push_back(auto_ptr<Element>(p)); 845 } 846 ; 847 848 map_content: 849 /* empty */ { 850 $$ = 0; 851 } 852 | map_content error { 853 $$ = $1; 854 } 855 | map_content AREA { 856 $$ = $1 ? $1 : new list<auto_ptr<list<TagAttribute> > >; 857 $$->push_back(auto_ptr<list<TagAttribute> >($2)); 858 } 859 ; 860 861 form: 862 INPUT { 863 Input *i = new Input; 864 i->attributes.reset($1); 865 $$ = i; 866 } 867 | SELECT select_content END_SELECT { 868 Select *s = new Select; 869 s->attributes.reset($1); 870 s->content.reset($2); 871 $$ = s; 872 } 873 | TEXTAREA pcdata END_TEXTAREA { 874 TextArea *ta = new TextArea; 875 ta->attributes.reset($1); 876 ta->pcdata.reset($2); 877 $$ = ta; 878 } 879 ; 880 881 select_content: 882 option { 883 $$ = new list<auto_ptr<Option> >; 884 $$->push_back(auto_ptr<Option>($1)); 885 } 886 | select_content option { 887 ($$ = $1)->push_back(auto_ptr<Option>($2)); 888 } 889 ; 890 891 option: 892 OPTION pcdata opt_END_OPTION { 893 $$ = new Option; 894 $$->attributes.reset($1); 895 $$->pcdata.reset($2); 896 } 897 ; 898 899 /* ------------------------------------------------------------------------- */ 900 901 HX: 902 H1 { $$ = new Heading; $$->level = 1; $$->attributes.reset($1); } 903 | H2 { $$ = new Heading; $$->level = 2; $$->attributes.reset($1); } 904 | H3 { $$ = new Heading; $$->level = 3; $$->attributes.reset($1); } 905 | H4 { $$ = new Heading; $$->level = 4; $$->attributes.reset($1); } 906 | H5 { $$ = new Heading; $$->level = 5; $$->attributes.reset($1); } 907 | H6 { $$ = new Heading; $$->level = 6; $$->attributes.reset($1); } 908 ; 909 910 END_HX: 911 END_H1 { $$ = 1; } 912 | END_H2 { $$ = 2; } 913 | END_H3 { $$ = 3; } 914 | END_H4 { $$ = 4; } 915 | END_H5 { $$ = 5; } 916 | END_H6 { $$ = 6; } 917 ; 918 919 /* ------------------------------------------------------------------------- */ 920 921 opt_pcdata: /* empty */ { $$ = 0; } | pcdata { $$ = $1; }; 922 opt_caption: /* empty */ { $$ = 0; } | caption { $$ = $1; }; 923 opt_texts: /* empty */ { $$ = 0; } | texts { $$ = $1; }; 924 opt_flow: /* empty */ { $$ = 0; } | flow { $$ = $1; }; 925 926 opt_LI: /* empty */ { $$ = 0; } | LI { $$ = $1; }; 927 opt_P: /* empty */ { $$ = 0; } | P { $$ = $1; }; 928 929 opt_END_A: /* empty */ | END_A; 930 opt_END_B: /* empty */ | END_B; 931 opt_END_BLOCKQUOTE: /* empty */ | END_BLOCKQUOTE; 932 opt_END_BIG: /* empty */ | END_BIG; 933 opt_END_CENTER: /* empty */ | END_CENTER; 934 opt_END_CITE: /* empty */ | END_CITE; 935 opt_END_CODE: /* empty */ | END_CODE; 936 opt_END_DFN: /* empty */ | END_DFN; 937 opt_END_DIV: /* empty */ | END_DIV; 938 opt_END_DL: /* empty */ | END_DL; 939 opt_END_EM: /* empty */ | END_EM; 940 opt_END_FONT: /* empty */ | END_FONT; 941 opt_END_FORM: /* empty */ | END_FORM; 942 opt_END_I: /* empty */ | END_I; 943 opt_END_KBD: /* empty */ | END_KBD; 944 opt_END_LI: /* empty */ | END_LI; 945 opt_END_OPTION: /* empty */ | END_OPTION; 946 opt_END_P: /* empty */ | END_P; 947 opt_END_PRE: /* empty */ | END_PRE; 948 opt_END_SAMP: /* empty */ | END_SAMP; 949 opt_END_SMALL: /* empty */ | END_SMALL; 950 opt_END_STRIKE: /* empty */ | END_STRIKE; 951 opt_END_STRONG: /* empty */ | END_STRONG; 952 opt_END_SUB: /* empty */ | END_SUB; 953 opt_END_SUP: /* empty */ | END_SUP; 954 opt_END_TD: /* empty */ | END_TD; 955 opt_END_TH: /* empty */ | END_TH; 956 opt_END_TITLE: /* empty */ | END_TITLE; 957 opt_END_TR: /* empty */ | END_TR; 958 opt_END_TT: /* empty */ | END_TT; 959 opt_END_U: /* empty */ | END_U; 960 opt_END_UL: /* empty */ | END_UL; 961 opt_END_VAR: /* empty */ | END_VAR; 962 963 opt_error: /* empty */ | error; 964 965 %% /* } */ 966 967 /* 968 * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual 969 * methods of a class are inline or pure virtual, so we define the destructor, 970 * which is the only virtual method, non-inline, although it is empty. 971 */ 972 973 HTMLParser::~HTMLParser() 974 { 975 } 976 977 /* ------------------------------------------------------------------------- */ 978 979