1 
2  /***************************************************************************/
3 
4 /*
5  * Portions Copyright (c) 1999 GMRS Software GmbH
6  * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
7  * All rights reserved.
8  *
9  * Author: Arno Unkrig <arno@unkrig.de>
10  */
11 
12 /* This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License in the file COPYING for more details.
21  */
22 
23  /***************************************************************************/
24 
25 /*
26  * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
27  * Dates and reasons of modifications:
28  * Fre Jun  8 17:43:02 CEST 2001: new method
29  * Wed Jul  2 22:07:12 CEST 2003: ported to g++ 3.3
30  */
31 
32  /***************************************************************************/
33 
34 
35 #include <stdlib.h>
36 #include <stdarg.h>
37 #include <iostream>
38 
39 #include "html.h"
40 #include "HTMLParser.h"
41 #include "cmp_nocase.h"
42 
43 /* ------------------------------------------------------------------------- */
44 
45 /*
46  * Define some helpers.
47  */
48 
49 #define define_foreach(T, args, action) \
50 void foreach args { \
51   for (T::const_iterator i = l.begin(); i != l.end(); ++i) { \
52     action; \
53   } \
54 }
55 
56 #define pack(T) \
57 define_foreach(list<auto_ptr<T> >, ( \
58   const list<auto_ptr<T> > &l, \
59   ostream                  &os, \
60   ostream_manipulator      separator \
61 ), (*i)->unparse(os, separator))
62 
63 static pack(Element)
64 static pack(TableCell)
65 static pack(TableRow)
66 static pack(ListItem)
67 static pack(Option)
68 static pack(DefinitionListItem)
69 static pack(Script)
70 static pack(Style)
71 
72 #undef pack
73 
74 /*
75  * Special helper for "const auto_ptr<list<TagAttribute> > &".
76  */
77 static ostream &operator<<(ostream &os, const auto_ptr<list<TagAttribute> > &a)
78 {
79   if (a.get()) {
80     const list<TagAttribute> &al(*a);
81     list<TagAttribute>::const_iterator i;
82     for (i = al.begin(); i != al.end(); ++i) {
83       os << " " << (*i).first << "=\"" << (*i).second << "\"";
84     }
85   }
86   return os;
87 }
88 
89 /* ------------------------------------------------------------------------- */
90 
91 /*
92  * Brothers of "endl".
93  */
none(ostream & os)94 static ostream &none(ostream &os) { return os; }
95 
96 /* ------------------------------------------------------------------------- */
97 
98 /*
99  * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual
100  * methods of a class are inline or pure virtual, so we define the virtual
101  * "Element::~Element()", which is the only virtual method, non-inline,
102  * although it is empty.
103  */
~Element()104 Element::~Element()
105 {
106 }
107 
108 /* ------------------------------------------------------------------------- */
109 
110 void
unparse(ostream & os,ostream_manipulator separator)111 Document::unparse(ostream &os, ostream_manipulator separator) const
112 {
113   os << "<HTML" << attributes << ">" << separator;
114   head.unparse(os, separator);
115   body.unparse(os, separator);
116   os << "</HTML>" << separator;
117 }
118 
119 void
unparse(ostream & os,ostream_manipulator separator)120 Head::unparse(ostream &os, ostream_manipulator separator) const
121 {
122   os << "<HEAD>" << separator;
123   if (title.get()) {
124     os << "<TITLE>" << separator;
125     title->unparse(os, separator);
126     os << "</TITLE>" << separator;
127   }
128   if (isindex_attributes.get()) {
129     os << "<ISINDEX"<< isindex_attributes << ">" << std::endl;
130   }
131   if (base_attributes.get()) os << "<BASE" << base_attributes << ">" << std::endl;
132   foreach(scripts, os, separator);
133   foreach(styles, os, separator);
134   if (meta_attributes.get()) os << "<META" << meta_attributes << ">" << std::endl;
135   if (link_attributes.get()) os << "<LINK" << link_attributes << ">" << std::endl;
136   os << "</HEAD>" << separator;
137 }
138 
139 void
unparse(ostream & os,ostream_manipulator separator)140 Script::unparse(ostream &os, ostream_manipulator separator) const
141 {
142   os
143     << "<SCRIPT" << attributes << ">" << separator
144     << text
145     << "</SCRIPT>" << separator;
146 }
147 
148 void
unparse(ostream & os,ostream_manipulator separator)149 Style::unparse(ostream &os, ostream_manipulator separator) const
150 {
151   os
152     << "<STYLE" << attributes << ">" << separator
153     << text
154     << "</STYLE>" << separator;
155 }
156 
157 void
unparse(ostream & os,ostream_manipulator separator)158 Body::unparse(ostream &os, ostream_manipulator separator) const
159 {
160   os << "<BODY" << attributes << ">" << separator;
161   if (content.get()) foreach(*content, os, separator);
162   os << "</BODY>" << separator;
163 }
164 
165 void
unparse(ostream & os,ostream_manipulator separator)166 PCData::unparse(ostream &os, ostream_manipulator separator) const
167 {
168   for (string::size_type j = 0; j < text.length(); ++j) {
169     char c = text[j];
170     switch (((int) c) & 255) {
171     case LATIN1_nbsp: os << "&nbsp;"; break;
172     case '&':         os << "&amp;";  break;
173     case '<':         os << "&lt;";   break;
174     case '>':         os << "&gt;";   break;
175     case '"':         os << "&quot;"; break;
176     default:
177       if (c & 0x80) {
178         os << "&#" << (((int) c) & 255) << ";";
179       } else {
180         os << c;
181       }
182       break;
183     }
184   }
185 
186   os << separator;
187 }
188 
189 void
unparse(ostream & os,ostream_manipulator separator)190 Heading::unparse(ostream &os, ostream_manipulator separator) const
191 {
192   os << "<H" << level << attributes << ">" << separator;
193   if (content.get()) foreach(*content, os, separator);
194   os << "</H" << level << ">" << separator;
195 }
196 
197 void
unparse(ostream & os,ostream_manipulator separator)198 Paragraph::unparse(ostream &os, ostream_manipulator separator) const
199 {
200   os << "<P" << attributes << ">" << separator;
201   if (texts.get()) foreach(*texts, os, separator);
202   os << "</P>" << separator;
203 }
204 
205 void
unparse(ostream & os,ostream_manipulator separator)206 Image::unparse(ostream &os, ostream_manipulator separator) const
207 {
208   os << "<IMG" << attributes << ">" << separator;
209 }
210 
211 void
unparse(ostream & os,ostream_manipulator separator)212 Applet::unparse(ostream &os, ostream_manipulator separator) const
213 {
214   os << "<APPLET" << attributes << ">" << separator;
215   if (content.get()) foreach(*content, os, separator);
216   os << "</APPLET>" << separator;
217 }
218 
219 void
unparse(ostream & os,ostream_manipulator separator)220 Param::unparse(ostream &os, ostream_manipulator separator) const
221 {
222   os << "<PARAM" << attributes << ">" << separator;
223 }
224 
225 void
unparse(ostream & os,ostream_manipulator separator)226 Division::unparse(ostream &os, ostream_manipulator separator) const
227 {
228   os << "<DIV" << attributes << ">" << separator;
229   if (body_content.get()) foreach(*body_content, os, separator);
230   os << "</DIV>" << separator;
231 }
232 
233 void
unparse(ostream & os,ostream_manipulator separator)234 Center::unparse(ostream &os, ostream_manipulator separator) const
235 {
236   os << "<CENTER>" << separator;
237   if (body_content.get()) foreach(*body_content, os, separator);
238   os << "</CENTER>" << separator;
239 }
240 
241 void
unparse(ostream & os,ostream_manipulator separator)242 BlockQuote::unparse(ostream &os, ostream_manipulator separator) const
243 {
244   os << "<BLOCKQUOTE>" << separator;
245   if (content.get()) foreach(*content, os, separator);
246   os << "</BLOCKQUOTE>" << separator;
247 }
248 
249 void
unparse(ostream & os,ostream_manipulator separator)250 Address::unparse(ostream &os, ostream_manipulator separator) const
251 {
252   os << "<ADDRESS>" << separator;
253   if (content.get()) foreach(*content, os, separator);
254   os << "</ADDRESS>" << separator;
255 }
256 
257 void
unparse(ostream & os,ostream_manipulator separator)258 Form::unparse(ostream &os, ostream_manipulator separator) const
259 {
260   os << "<FORM" << attributes << ">" << separator;
261   if (content.get()) foreach(*content, os, separator);
262   os << "</FORM>" << separator;
263 }
264 
265 void
unparse(ostream & os,ostream_manipulator separator)266 Preformatted::unparse(ostream &os, ostream_manipulator separator) const
267 {
268   os << "<PRE" << attributes << ">" << separator;
269   if (texts.get()) { foreach(*texts, os, none); os << separator; }
270   os << "</PRE>" << separator;
271 }
272 
273 void
unparse(ostream & os,ostream_manipulator separator)274 HorizontalRule::unparse(ostream &os, ostream_manipulator separator) const
275 {
276   os << "<HR" << attributes << ">" << separator;
277 }
278 
279 void
unparse(ostream & os,ostream_manipulator separator)280 Input::unparse(ostream &os, ostream_manipulator separator) const
281 {
282   os << "<INPUT" << attributes << ">" << separator;
283 }
284 
285 void
unparse(ostream & os,ostream_manipulator separator)286 Option::unparse(ostream &os, ostream_manipulator separator) const
287 {
288   os << "<OPTION" << attributes << ">" << separator;
289   if (pcdata.get()) pcdata->unparse(os, separator);
290   os << "</OPTION>" << std::endl;
291 }
292 
293 void
unparse(ostream & os,ostream_manipulator separator)294 Select::unparse(ostream &os, ostream_manipulator separator) const
295 {
296   os << "<SELECT" << attributes << ">" << separator;
297   if (content.get()) foreach(*content, os, separator);
298   os << "</SELECT>" << std::endl;
299 }
300 
301 void
unparse(ostream & os,ostream_manipulator separator)302 TextArea::unparse(ostream &os, ostream_manipulator separator) const
303 {
304   os << "<TEXTAREA" << attributes << ">" << separator;
305   if (pcdata.get()) pcdata->unparse(os, separator);
306   os << "</TEXTAREA>" << std::endl;
307 }
308 
309 /* ------------------------------------------------------------------------- */
310 
311 void
unparse(ostream & os,ostream_manipulator separator)312 OrderedList::unparse(ostream &os, ostream_manipulator separator) const
313 {
314   os << "<OL" << attributes << ">" << separator;
315   if (items.get()) foreach(*items, os, separator);
316   os << "</OL>" << separator;
317 }
318 
319 void
unparse(ostream & os,ostream_manipulator separator)320 UnorderedList::unparse(ostream &os, ostream_manipulator separator) const
321 {
322   os << "<UL" << attributes << ">" << separator;
323   if (items.get()) foreach(*items, os, separator);
324   os << "</UL>" << separator;
325 }
326 
327 void
unparse(ostream & os,ostream_manipulator separator)328 Dir::unparse(ostream &os, ostream_manipulator separator) const
329 {
330   os << "<DIR" << attributes << ">" << separator;
331   if (items.get()) foreach(*items, os, separator);
332   os << "</DIR>" << separator;
333 }
334 
335 void
unparse(ostream & os,ostream_manipulator separator)336 Menu::unparse(ostream &os, ostream_manipulator separator) const
337 {
338   os << "<MENU" << attributes << ">" << separator;
339   if (items.get()) foreach(*items, os, separator);
340   os << "</MENU>" << separator;
341 }
342 
343 void
unparse(ostream & os,ostream_manipulator separator)344 ListNormalItem::unparse(ostream &os, ostream_manipulator separator) const
345 {
346   os << "<LI" <<attributes << ">" << separator;
347   if (flow.get()) foreach(*flow, os, separator);
348   os << "</LI>" << separator;
349 }
350 
351 void
unparse(ostream & os,ostream_manipulator separator)352 ListBlockItem::unparse(ostream &os, ostream_manipulator separator) const
353 {
354   if (block.get()) block->unparse(os, separator);
355 }
356 
357 /* ------------------------------------------------------------------------- */
358 
359 void
unparse(ostream & os,ostream_manipulator separator)360 DefinitionList::unparse(ostream &os, ostream_manipulator separator) const
361 {
362   os << "<DL" << attributes << ">" << separator;
363   if (items.get()) foreach(*items, os, separator);
364   os << "</DL>" << separator;
365 }
366 
367 void
unparse(ostream & os,ostream_manipulator separator)368 TermName::unparse(ostream &os, ostream_manipulator separator) const
369 {
370   os << "<DT>" << separator;
371   if (flow.get()) foreach(*flow, os, separator);
372   os << "</DT>" << separator;
373 }
374 
375 void
unparse(ostream & os,ostream_manipulator separator)376 TermDefinition::unparse(ostream &os, ostream_manipulator separator) const
377 {
378   os << "<DD>" << separator;
379   if (flow.get()) foreach(*flow, os, separator);
380   os << "</DD>" << separator;
381 }
382 
383 /* ------------------------------------------------------------------------- */
384 
385 void
unparse(ostream & os,ostream_manipulator separator)386 Table::unparse(ostream &os, ostream_manipulator separator) const
387 {
388   os << "<TABLE" << attributes << ">" << separator;
389   if (caption.get()) caption->unparse(os, separator);
390   if (rows.get()) foreach(*rows, os, separator);
391   os << "</TABLE>" << separator;
392 }
393 
394 void
unparse(ostream & os,ostream_manipulator separator)395 NoBreak::unparse(ostream &os, ostream_manipulator separator) const
396 {
397   os << "<NOBR>" << separator;
398   if (content.get()) foreach(*content, os, separator);
399   os << "</NOBR>" << separator;
400 }
401 
402 void
unparse(ostream & os,ostream_manipulator separator)403 Font::unparse(ostream &os, ostream_manipulator separator) const
404 {
405   const char *tag = (
406     attribute == HTMLParser::TT     ? "TT" :
407     attribute == HTMLParser::I      ? "I" :
408     attribute == HTMLParser::B      ? "B" :
409     attribute == HTMLParser::U      ? "U" :
410     attribute == HTMLParser::STRIKE ? "STRIKE" :
411     attribute == HTMLParser::BIG    ? "BIG" :
412     attribute == HTMLParser::SMALL  ? "SMALL" :
413     attribute == HTMLParser::SUB    ? "SUB" :
414     attribute == HTMLParser::SUP    ? "SUP" :
415     "???"
416   );
417 
418   os << "<" << tag << ">" << separator;
419   if (texts.get()) foreach(*texts, os, separator);
420   os << "</" << tag << ">" << separator;
421 }
422 
423 void
unparse(ostream & os,ostream_manipulator separator)424 Phrase::unparse(ostream &os, ostream_manipulator separator) const
425 {
426   const char *tag = (
427     attribute == HTMLParser::EM     ? "EM" :
428     attribute == HTMLParser::STRONG ? "STRONG" :
429     attribute == HTMLParser::DFN    ? "DFN" :
430     attribute == HTMLParser::CODE   ? "CODE" :
431     attribute == HTMLParser::SAMP   ? "SAMP" :
432     attribute == HTMLParser::KBD    ? "KBD" :
433     attribute == HTMLParser::VAR    ? "VAR" :
434     attribute == HTMLParser::CITE   ? "CITE" :
435     "???"
436   );
437 
438   os << "<" << tag << ">" << separator;
439   if (texts.get()) foreach(*texts, os, separator);
440   os << "</" << tag << ">" << separator;
441 }
442 
443 void
unparse(ostream & os,ostream_manipulator separator)444 Anchor::unparse(ostream &os, ostream_manipulator separator) const
445 {
446   os << "<A" << attributes << ">" << separator;
447   if (texts.get()) foreach(*texts, os, separator);
448   os << "</A>" << separator;
449 }
450 
451 void
unparse(ostream & os,ostream_manipulator separator)452 BaseFont::unparse(ostream &os, ostream_manipulator separator) const
453 {
454   os << "<BASEFONT" << attributes << ">" << separator;
455 }
456 
457 void
unparse(ostream & os,ostream_manipulator separator)458 LineBreak::unparse(ostream &os, ostream_manipulator separator) const
459 {
460   os << "<BR" << attributes << ">" << separator;
461 }
462 
463 void
unparse(ostream & os,ostream_manipulator separator)464 Map::unparse(ostream &os, ostream_manipulator separator) const
465 {
466   os << "<MAP" << attributes << ">" << separator;
467   if (areas.get()) {
468     const list<auto_ptr<list<TagAttribute> > >           &al(*areas);
469     list<auto_ptr<list<TagAttribute> > >::const_iterator i;
470     for (i = al.begin(); i != al.end(); ++i) {
471       os << "<AREA" << *i << ">" << separator;
472     }
473   }
474   os << "</MAP>" << separator;
475 }
476 
477 void
unparse(ostream & os,ostream_manipulator separator)478 Font2::unparse(ostream &os, ostream_manipulator separator) const
479 {
480   os << "<FONT" << attributes << ">" << separator;
481   if (elements.get()) foreach(*elements, os, separator);
482   os << "</FONT>" << separator;
483 }
484 
485 void
unparse(ostream & os,ostream_manipulator separator)486 TableRow::unparse(ostream &os, ostream_manipulator separator) const
487 {
488   os << "<TR" << attributes << ">" << separator;
489   if (cells.get()) foreach(*cells, os, separator);
490   os << "</TR>" << separator;
491 }
492 
493 void
unparse(ostream & os,ostream_manipulator separator)494 TableCell::unparse(ostream &os, ostream_manipulator separator) const
495 {
496   os << "<TD" << attributes << ">" << separator;
497   if (content.get()) foreach(*content, os, separator);
498   os << "</TD>" << separator;
499 }
500 
501 void
unparse(ostream & os,ostream_manipulator separator)502 TableHeadingCell::unparse(ostream &os, ostream_manipulator separator) const
503 {
504   os << "<TH" << attributes << ">" << separator;
505   if (content.get()) foreach(*content, os, separator);
506   os << "</TH>" << separator;
507 }
508 
509 void
unparse(ostream & os,ostream_manipulator separator)510 Caption::unparse(ostream &os, ostream_manipulator separator) const
511 {
512   os << "<CAPTION" << attributes << ">" << separator;
513   if (texts.get()) foreach(*texts, os, separator);
514   os << "</CAPTION>" << separator;
515 }
516 
517 /* ------------------------------------------------------------------------- */
518 
519 string
get_attribute(const list<TagAttribute> * as,const char * name,const char * dflt)520 get_attribute(
521   const list<TagAttribute> *as,
522   const char               *name,
523   const char               *dflt
524 )
525 {
526   if (as) {
527     list<TagAttribute>::const_iterator i;
528     for (i = as->begin(); i != as->end(); ++i) {
529       if (cmp_nocase((*i).first, name) == 0) return string((*i).second);
530     }
531   }
532   return string(dflt);
533 }
534 
535 // *exists is set to false if attribute *name does not exist - Johannes Geiger
536 
537 string
get_attribute(const list<TagAttribute> * as,const char * name,bool * exists)538 get_attribute(
539   const list<TagAttribute> *as,
540   const char               *name,
541   bool                     *exists
542 )
543 {
544   *exists = true;
545   if (as) {
546     list<TagAttribute>::const_iterator i;
547     for (i = as->begin(); i != as->end(); ++i) {
548       if (cmp_nocase((*i).first, name) == 0) return string((*i).second);
549     }
550   }
551   *exists = false;
552   return string("");
553 }
554 
555 
556 int
get_attribute(const list<TagAttribute> * as,const char * name,int dflt)557 get_attribute(
558   const list<TagAttribute> *as,
559   const char               *name,
560   int                      dflt
561 )
562 {
563   if (as) {
564     list<TagAttribute>::const_iterator i;
565     for (i = as->begin(); i != as->end(); ++i) {
566       if (cmp_nocase((*i).first, name) == 0) return atoi((*i).second.c_str());
567     }
568   }
569   return dflt;
570 }
571 
572 int
get_attribute(const list<TagAttribute> * as,const char * name,int dflt,const char * s1,int v1,...)573 get_attribute(
574   const list<TagAttribute> *as,
575   const char               *name,
576   int                      dflt,
577   const char               *s1,
578   int                      v1,
579   ...
580 )
581 {
582   if (as) {
583     list<TagAttribute>::const_iterator i;
584     for (i = as->begin(); i != as->end(); ++i) {
585       if (cmp_nocase((*i).first, name) == 0) {
586         const char *s = s1;
587         int        v = v1;
588 
589         va_list va;
590         va_start(va, v1);
591         for (;;) {
592           if (cmp_nocase(s, (*i).second) == 0) break;
593           s = va_arg(va, const char *);
594           if (!s) { v = dflt; break; }
595           v = va_arg(va, int);
596         }
597         va_end(va);
598         return v;
599       }
600     }
601   }
602   return dflt;
603 }
604 
605 int
get_attribute(const list<TagAttribute> * as,const char * name,const char * dflt1,int dflt2,const char * s1,int v1,...)606 get_attribute(
607   const list<TagAttribute> *as,
608   const char               *name,   // Attribute name
609   const char               *dflt1,  // If attribute not specified
610   int                      dflt2,   // If string value does not match s1, ...
611   const char               *s1,
612   int                      v1,
613   ...
614 )
615 {
616   if (as) {
617     list<TagAttribute>::const_iterator i;
618     for (i = as->begin(); i != as->end(); ++i) {
619       if (cmp_nocase((*i).first, name) == 0) {
620 	dflt1 = (*i).second.c_str();
621         break;
622       }
623     }
624   }
625 
626   if (!dflt1) return dflt2;
627 
628   const char *s = s1;
629   int        v = v1;
630 
631   va_list va;
632   va_start(va, v1);
633   for (;;) {
634     if (cmp_nocase(s, dflt1) == 0) break;
635     s = va_arg(va, const char *);
636     if (!s) break;
637     v = va_arg(va, int);
638   }
639   va_end(va);
640 
641   return s ? v : dflt2;
642 }
643 
644 /* ------------------------------------------------------------------------- */
645 
646