1
2 /***************************************************************************/
3
4 /*
5 * Portions Copyright (c) 1999 GMRS Software GmbH
6 * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
7 * All rights reserved.
8 *
9 * Author: Arno Unkrig <arno@unkrig.de>
10 */
11
12 /* This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License in the file COPYING for more details.
21 */
22
23 /***************************************************************************/
24
25 /*
26 * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
27 * Dates and reasons of modifications:
28 * Fre Jun 8 17:43:02 CEST 2001: new method
29 * Wed Jul 2 22:07:12 CEST 2003: ported to g++ 3.3
30 */
31
32 /***************************************************************************/
33
34
35 #include <stdlib.h>
36 #include <stdarg.h>
37 #include <iostream>
38
39 #include "html.h"
40 #include "HTMLParser.h"
41 #include "cmp_nocase.h"
42
43 /* ------------------------------------------------------------------------- */
44
45 /*
46 * Define some helpers.
47 */
48
49 #define define_foreach(T, args, action) \
50 void foreach args { \
51 for (T::const_iterator i = l.begin(); i != l.end(); ++i) { \
52 action; \
53 } \
54 }
55
56 #define pack(T) \
57 define_foreach(list<auto_ptr<T> >, ( \
58 const list<auto_ptr<T> > &l, \
59 ostream &os, \
60 ostream_manipulator separator \
61 ), (*i)->unparse(os, separator))
62
63 static pack(Element)
64 static pack(TableCell)
65 static pack(TableRow)
66 static pack(ListItem)
67 static pack(Option)
68 static pack(DefinitionListItem)
69 static pack(Script)
70 static pack(Style)
71
72 #undef pack
73
74 /*
75 * Special helper for "const auto_ptr<list<TagAttribute> > &".
76 */
77 static ostream &operator<<(ostream &os, const auto_ptr<list<TagAttribute> > &a)
78 {
79 if (a.get()) {
80 const list<TagAttribute> &al(*a);
81 list<TagAttribute>::const_iterator i;
82 for (i = al.begin(); i != al.end(); ++i) {
83 os << " " << (*i).first << "=\"" << (*i).second << "\"";
84 }
85 }
86 return os;
87 }
88
89 /* ------------------------------------------------------------------------- */
90
91 /*
92 * Brothers of "endl".
93 */
none(ostream & os)94 static ostream &none(ostream &os) { return os; }
95
96 /* ------------------------------------------------------------------------- */
97
98 /*
99 * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual
100 * methods of a class are inline or pure virtual, so we define the virtual
101 * "Element::~Element()", which is the only virtual method, non-inline,
102 * although it is empty.
103 */
~Element()104 Element::~Element()
105 {
106 }
107
108 /* ------------------------------------------------------------------------- */
109
110 void
unparse(ostream & os,ostream_manipulator separator)111 Document::unparse(ostream &os, ostream_manipulator separator) const
112 {
113 os << "<HTML" << attributes << ">" << separator;
114 head.unparse(os, separator);
115 body.unparse(os, separator);
116 os << "</HTML>" << separator;
117 }
118
119 void
unparse(ostream & os,ostream_manipulator separator)120 Head::unparse(ostream &os, ostream_manipulator separator) const
121 {
122 os << "<HEAD>" << separator;
123 if (title.get()) {
124 os << "<TITLE>" << separator;
125 title->unparse(os, separator);
126 os << "</TITLE>" << separator;
127 }
128 if (isindex_attributes.get()) {
129 os << "<ISINDEX"<< isindex_attributes << ">" << std::endl;
130 }
131 if (base_attributes.get()) os << "<BASE" << base_attributes << ">" << std::endl;
132 foreach(scripts, os, separator);
133 foreach(styles, os, separator);
134 if (meta_attributes.get()) os << "<META" << meta_attributes << ">" << std::endl;
135 if (link_attributes.get()) os << "<LINK" << link_attributes << ">" << std::endl;
136 os << "</HEAD>" << separator;
137 }
138
139 void
unparse(ostream & os,ostream_manipulator separator)140 Script::unparse(ostream &os, ostream_manipulator separator) const
141 {
142 os
143 << "<SCRIPT" << attributes << ">" << separator
144 << text
145 << "</SCRIPT>" << separator;
146 }
147
148 void
unparse(ostream & os,ostream_manipulator separator)149 Style::unparse(ostream &os, ostream_manipulator separator) const
150 {
151 os
152 << "<STYLE" << attributes << ">" << separator
153 << text
154 << "</STYLE>" << separator;
155 }
156
157 void
unparse(ostream & os,ostream_manipulator separator)158 Body::unparse(ostream &os, ostream_manipulator separator) const
159 {
160 os << "<BODY" << attributes << ">" << separator;
161 if (content.get()) foreach(*content, os, separator);
162 os << "</BODY>" << separator;
163 }
164
165 void
unparse(ostream & os,ostream_manipulator separator)166 PCData::unparse(ostream &os, ostream_manipulator separator) const
167 {
168 for (string::size_type j = 0; j < text.length(); ++j) {
169 char c = text[j];
170 switch (((int) c) & 255) {
171 case LATIN1_nbsp: os << " "; break;
172 case '&': os << "&"; break;
173 case '<': os << "<"; break;
174 case '>': os << ">"; break;
175 case '"': os << """; break;
176 default:
177 if (c & 0x80) {
178 os << "&#" << (((int) c) & 255) << ";";
179 } else {
180 os << c;
181 }
182 break;
183 }
184 }
185
186 os << separator;
187 }
188
189 void
unparse(ostream & os,ostream_manipulator separator)190 Heading::unparse(ostream &os, ostream_manipulator separator) const
191 {
192 os << "<H" << level << attributes << ">" << separator;
193 if (content.get()) foreach(*content, os, separator);
194 os << "</H" << level << ">" << separator;
195 }
196
197 void
unparse(ostream & os,ostream_manipulator separator)198 Paragraph::unparse(ostream &os, ostream_manipulator separator) const
199 {
200 os << "<P" << attributes << ">" << separator;
201 if (texts.get()) foreach(*texts, os, separator);
202 os << "</P>" << separator;
203 }
204
205 void
unparse(ostream & os,ostream_manipulator separator)206 Image::unparse(ostream &os, ostream_manipulator separator) const
207 {
208 os << "<IMG" << attributes << ">" << separator;
209 }
210
211 void
unparse(ostream & os,ostream_manipulator separator)212 Applet::unparse(ostream &os, ostream_manipulator separator) const
213 {
214 os << "<APPLET" << attributes << ">" << separator;
215 if (content.get()) foreach(*content, os, separator);
216 os << "</APPLET>" << separator;
217 }
218
219 void
unparse(ostream & os,ostream_manipulator separator)220 Param::unparse(ostream &os, ostream_manipulator separator) const
221 {
222 os << "<PARAM" << attributes << ">" << separator;
223 }
224
225 void
unparse(ostream & os,ostream_manipulator separator)226 Division::unparse(ostream &os, ostream_manipulator separator) const
227 {
228 os << "<DIV" << attributes << ">" << separator;
229 if (body_content.get()) foreach(*body_content, os, separator);
230 os << "</DIV>" << separator;
231 }
232
233 void
unparse(ostream & os,ostream_manipulator separator)234 Center::unparse(ostream &os, ostream_manipulator separator) const
235 {
236 os << "<CENTER>" << separator;
237 if (body_content.get()) foreach(*body_content, os, separator);
238 os << "</CENTER>" << separator;
239 }
240
241 void
unparse(ostream & os,ostream_manipulator separator)242 BlockQuote::unparse(ostream &os, ostream_manipulator separator) const
243 {
244 os << "<BLOCKQUOTE>" << separator;
245 if (content.get()) foreach(*content, os, separator);
246 os << "</BLOCKQUOTE>" << separator;
247 }
248
249 void
unparse(ostream & os,ostream_manipulator separator)250 Address::unparse(ostream &os, ostream_manipulator separator) const
251 {
252 os << "<ADDRESS>" << separator;
253 if (content.get()) foreach(*content, os, separator);
254 os << "</ADDRESS>" << separator;
255 }
256
257 void
unparse(ostream & os,ostream_manipulator separator)258 Form::unparse(ostream &os, ostream_manipulator separator) const
259 {
260 os << "<FORM" << attributes << ">" << separator;
261 if (content.get()) foreach(*content, os, separator);
262 os << "</FORM>" << separator;
263 }
264
265 void
unparse(ostream & os,ostream_manipulator separator)266 Preformatted::unparse(ostream &os, ostream_manipulator separator) const
267 {
268 os << "<PRE" << attributes << ">" << separator;
269 if (texts.get()) { foreach(*texts, os, none); os << separator; }
270 os << "</PRE>" << separator;
271 }
272
273 void
unparse(ostream & os,ostream_manipulator separator)274 HorizontalRule::unparse(ostream &os, ostream_manipulator separator) const
275 {
276 os << "<HR" << attributes << ">" << separator;
277 }
278
279 void
unparse(ostream & os,ostream_manipulator separator)280 Input::unparse(ostream &os, ostream_manipulator separator) const
281 {
282 os << "<INPUT" << attributes << ">" << separator;
283 }
284
285 void
unparse(ostream & os,ostream_manipulator separator)286 Option::unparse(ostream &os, ostream_manipulator separator) const
287 {
288 os << "<OPTION" << attributes << ">" << separator;
289 if (pcdata.get()) pcdata->unparse(os, separator);
290 os << "</OPTION>" << std::endl;
291 }
292
293 void
unparse(ostream & os,ostream_manipulator separator)294 Select::unparse(ostream &os, ostream_manipulator separator) const
295 {
296 os << "<SELECT" << attributes << ">" << separator;
297 if (content.get()) foreach(*content, os, separator);
298 os << "</SELECT>" << std::endl;
299 }
300
301 void
unparse(ostream & os,ostream_manipulator separator)302 TextArea::unparse(ostream &os, ostream_manipulator separator) const
303 {
304 os << "<TEXTAREA" << attributes << ">" << separator;
305 if (pcdata.get()) pcdata->unparse(os, separator);
306 os << "</TEXTAREA>" << std::endl;
307 }
308
309 /* ------------------------------------------------------------------------- */
310
311 void
unparse(ostream & os,ostream_manipulator separator)312 OrderedList::unparse(ostream &os, ostream_manipulator separator) const
313 {
314 os << "<OL" << attributes << ">" << separator;
315 if (items.get()) foreach(*items, os, separator);
316 os << "</OL>" << separator;
317 }
318
319 void
unparse(ostream & os,ostream_manipulator separator)320 UnorderedList::unparse(ostream &os, ostream_manipulator separator) const
321 {
322 os << "<UL" << attributes << ">" << separator;
323 if (items.get()) foreach(*items, os, separator);
324 os << "</UL>" << separator;
325 }
326
327 void
unparse(ostream & os,ostream_manipulator separator)328 Dir::unparse(ostream &os, ostream_manipulator separator) const
329 {
330 os << "<DIR" << attributes << ">" << separator;
331 if (items.get()) foreach(*items, os, separator);
332 os << "</DIR>" << separator;
333 }
334
335 void
unparse(ostream & os,ostream_manipulator separator)336 Menu::unparse(ostream &os, ostream_manipulator separator) const
337 {
338 os << "<MENU" << attributes << ">" << separator;
339 if (items.get()) foreach(*items, os, separator);
340 os << "</MENU>" << separator;
341 }
342
343 void
unparse(ostream & os,ostream_manipulator separator)344 ListNormalItem::unparse(ostream &os, ostream_manipulator separator) const
345 {
346 os << "<LI" <<attributes << ">" << separator;
347 if (flow.get()) foreach(*flow, os, separator);
348 os << "</LI>" << separator;
349 }
350
351 void
unparse(ostream & os,ostream_manipulator separator)352 ListBlockItem::unparse(ostream &os, ostream_manipulator separator) const
353 {
354 if (block.get()) block->unparse(os, separator);
355 }
356
357 /* ------------------------------------------------------------------------- */
358
359 void
unparse(ostream & os,ostream_manipulator separator)360 DefinitionList::unparse(ostream &os, ostream_manipulator separator) const
361 {
362 os << "<DL" << attributes << ">" << separator;
363 if (items.get()) foreach(*items, os, separator);
364 os << "</DL>" << separator;
365 }
366
367 void
unparse(ostream & os,ostream_manipulator separator)368 TermName::unparse(ostream &os, ostream_manipulator separator) const
369 {
370 os << "<DT>" << separator;
371 if (flow.get()) foreach(*flow, os, separator);
372 os << "</DT>" << separator;
373 }
374
375 void
unparse(ostream & os,ostream_manipulator separator)376 TermDefinition::unparse(ostream &os, ostream_manipulator separator) const
377 {
378 os << "<DD>" << separator;
379 if (flow.get()) foreach(*flow, os, separator);
380 os << "</DD>" << separator;
381 }
382
383 /* ------------------------------------------------------------------------- */
384
385 void
unparse(ostream & os,ostream_manipulator separator)386 Table::unparse(ostream &os, ostream_manipulator separator) const
387 {
388 os << "<TABLE" << attributes << ">" << separator;
389 if (caption.get()) caption->unparse(os, separator);
390 if (rows.get()) foreach(*rows, os, separator);
391 os << "</TABLE>" << separator;
392 }
393
394 void
unparse(ostream & os,ostream_manipulator separator)395 NoBreak::unparse(ostream &os, ostream_manipulator separator) const
396 {
397 os << "<NOBR>" << separator;
398 if (content.get()) foreach(*content, os, separator);
399 os << "</NOBR>" << separator;
400 }
401
402 void
unparse(ostream & os,ostream_manipulator separator)403 Font::unparse(ostream &os, ostream_manipulator separator) const
404 {
405 const char *tag = (
406 attribute == HTMLParser::TT ? "TT" :
407 attribute == HTMLParser::I ? "I" :
408 attribute == HTMLParser::B ? "B" :
409 attribute == HTMLParser::U ? "U" :
410 attribute == HTMLParser::STRIKE ? "STRIKE" :
411 attribute == HTMLParser::BIG ? "BIG" :
412 attribute == HTMLParser::SMALL ? "SMALL" :
413 attribute == HTMLParser::SUB ? "SUB" :
414 attribute == HTMLParser::SUP ? "SUP" :
415 "???"
416 );
417
418 os << "<" << tag << ">" << separator;
419 if (texts.get()) foreach(*texts, os, separator);
420 os << "</" << tag << ">" << separator;
421 }
422
423 void
unparse(ostream & os,ostream_manipulator separator)424 Phrase::unparse(ostream &os, ostream_manipulator separator) const
425 {
426 const char *tag = (
427 attribute == HTMLParser::EM ? "EM" :
428 attribute == HTMLParser::STRONG ? "STRONG" :
429 attribute == HTMLParser::DFN ? "DFN" :
430 attribute == HTMLParser::CODE ? "CODE" :
431 attribute == HTMLParser::SAMP ? "SAMP" :
432 attribute == HTMLParser::KBD ? "KBD" :
433 attribute == HTMLParser::VAR ? "VAR" :
434 attribute == HTMLParser::CITE ? "CITE" :
435 "???"
436 );
437
438 os << "<" << tag << ">" << separator;
439 if (texts.get()) foreach(*texts, os, separator);
440 os << "</" << tag << ">" << separator;
441 }
442
443 void
unparse(ostream & os,ostream_manipulator separator)444 Anchor::unparse(ostream &os, ostream_manipulator separator) const
445 {
446 os << "<A" << attributes << ">" << separator;
447 if (texts.get()) foreach(*texts, os, separator);
448 os << "</A>" << separator;
449 }
450
451 void
unparse(ostream & os,ostream_manipulator separator)452 BaseFont::unparse(ostream &os, ostream_manipulator separator) const
453 {
454 os << "<BASEFONT" << attributes << ">" << separator;
455 }
456
457 void
unparse(ostream & os,ostream_manipulator separator)458 LineBreak::unparse(ostream &os, ostream_manipulator separator) const
459 {
460 os << "<BR" << attributes << ">" << separator;
461 }
462
463 void
unparse(ostream & os,ostream_manipulator separator)464 Map::unparse(ostream &os, ostream_manipulator separator) const
465 {
466 os << "<MAP" << attributes << ">" << separator;
467 if (areas.get()) {
468 const list<auto_ptr<list<TagAttribute> > > &al(*areas);
469 list<auto_ptr<list<TagAttribute> > >::const_iterator i;
470 for (i = al.begin(); i != al.end(); ++i) {
471 os << "<AREA" << *i << ">" << separator;
472 }
473 }
474 os << "</MAP>" << separator;
475 }
476
477 void
unparse(ostream & os,ostream_manipulator separator)478 Font2::unparse(ostream &os, ostream_manipulator separator) const
479 {
480 os << "<FONT" << attributes << ">" << separator;
481 if (elements.get()) foreach(*elements, os, separator);
482 os << "</FONT>" << separator;
483 }
484
485 void
unparse(ostream & os,ostream_manipulator separator)486 TableRow::unparse(ostream &os, ostream_manipulator separator) const
487 {
488 os << "<TR" << attributes << ">" << separator;
489 if (cells.get()) foreach(*cells, os, separator);
490 os << "</TR>" << separator;
491 }
492
493 void
unparse(ostream & os,ostream_manipulator separator)494 TableCell::unparse(ostream &os, ostream_manipulator separator) const
495 {
496 os << "<TD" << attributes << ">" << separator;
497 if (content.get()) foreach(*content, os, separator);
498 os << "</TD>" << separator;
499 }
500
501 void
unparse(ostream & os,ostream_manipulator separator)502 TableHeadingCell::unparse(ostream &os, ostream_manipulator separator) const
503 {
504 os << "<TH" << attributes << ">" << separator;
505 if (content.get()) foreach(*content, os, separator);
506 os << "</TH>" << separator;
507 }
508
509 void
unparse(ostream & os,ostream_manipulator separator)510 Caption::unparse(ostream &os, ostream_manipulator separator) const
511 {
512 os << "<CAPTION" << attributes << ">" << separator;
513 if (texts.get()) foreach(*texts, os, separator);
514 os << "</CAPTION>" << separator;
515 }
516
517 /* ------------------------------------------------------------------------- */
518
519 string
get_attribute(const list<TagAttribute> * as,const char * name,const char * dflt)520 get_attribute(
521 const list<TagAttribute> *as,
522 const char *name,
523 const char *dflt
524 )
525 {
526 if (as) {
527 list<TagAttribute>::const_iterator i;
528 for (i = as->begin(); i != as->end(); ++i) {
529 if (cmp_nocase((*i).first, name) == 0) return string((*i).second);
530 }
531 }
532 return string(dflt);
533 }
534
535 // *exists is set to false if attribute *name does not exist - Johannes Geiger
536
537 string
get_attribute(const list<TagAttribute> * as,const char * name,bool * exists)538 get_attribute(
539 const list<TagAttribute> *as,
540 const char *name,
541 bool *exists
542 )
543 {
544 *exists = true;
545 if (as) {
546 list<TagAttribute>::const_iterator i;
547 for (i = as->begin(); i != as->end(); ++i) {
548 if (cmp_nocase((*i).first, name) == 0) return string((*i).second);
549 }
550 }
551 *exists = false;
552 return string("");
553 }
554
555
556 int
get_attribute(const list<TagAttribute> * as,const char * name,int dflt)557 get_attribute(
558 const list<TagAttribute> *as,
559 const char *name,
560 int dflt
561 )
562 {
563 if (as) {
564 list<TagAttribute>::const_iterator i;
565 for (i = as->begin(); i != as->end(); ++i) {
566 if (cmp_nocase((*i).first, name) == 0) return atoi((*i).second.c_str());
567 }
568 }
569 return dflt;
570 }
571
572 int
get_attribute(const list<TagAttribute> * as,const char * name,int dflt,const char * s1,int v1,...)573 get_attribute(
574 const list<TagAttribute> *as,
575 const char *name,
576 int dflt,
577 const char *s1,
578 int v1,
579 ...
580 )
581 {
582 if (as) {
583 list<TagAttribute>::const_iterator i;
584 for (i = as->begin(); i != as->end(); ++i) {
585 if (cmp_nocase((*i).first, name) == 0) {
586 const char *s = s1;
587 int v = v1;
588
589 va_list va;
590 va_start(va, v1);
591 for (;;) {
592 if (cmp_nocase(s, (*i).second) == 0) break;
593 s = va_arg(va, const char *);
594 if (!s) { v = dflt; break; }
595 v = va_arg(va, int);
596 }
597 va_end(va);
598 return v;
599 }
600 }
601 }
602 return dflt;
603 }
604
605 int
get_attribute(const list<TagAttribute> * as,const char * name,const char * dflt1,int dflt2,const char * s1,int v1,...)606 get_attribute(
607 const list<TagAttribute> *as,
608 const char *name, // Attribute name
609 const char *dflt1, // If attribute not specified
610 int dflt2, // If string value does not match s1, ...
611 const char *s1,
612 int v1,
613 ...
614 )
615 {
616 if (as) {
617 list<TagAttribute>::const_iterator i;
618 for (i = as->begin(); i != as->end(); ++i) {
619 if (cmp_nocase((*i).first, name) == 0) {
620 dflt1 = (*i).second.c_str();
621 break;
622 }
623 }
624 }
625
626 if (!dflt1) return dflt2;
627
628 const char *s = s1;
629 int v = v1;
630
631 va_list va;
632 va_start(va, v1);
633 for (;;) {
634 if (cmp_nocase(s, dflt1) == 0) break;
635 s = va_arg(va, const char *);
636 if (!s) break;
637 v = va_arg(va, int);
638 }
639 va_end(va);
640
641 return s ? v : dflt2;
642 }
643
644 /* ------------------------------------------------------------------------- */
645
646