1 /*									 HTML.c
2 **	SIMPLE HTML PARSER WITHOUT ANY PRESENTATION CODE
3 **
4 **	(c) COPYRIGHT MIT 1995.
5 **	Please first read the full copyright statement in the file COPYRIGH.
6 **	@(#) $Id$
7 **
8 **	This generates of a hypertext object.  It converts from the
9 **	structured stream interface foo HTML events into the style-
10 **	oriented interface of the HText interface.
11 **
12 ** HISTORY:
13 **	 8 Jul 94  FM	Insulate free() from _free structure element.
14 */
15 
16 /* Library include files */
17 #include "wwwsys.h"
18 #include "WWWUtil.h"
19 #include "WWWCore.h"
20 #include "WWWHTML.h"
21 #include "HTML.h"
22 #include "HTextImp.h"
23 
24 #define PUTC(t,c)	(*(t)->target->isa->put_character)((t)->target, (c))
25 #define PUTS(t,s)	(*(t)->target->isa->put_string)((t)->target, (s))
26 #define PUTB(s,b,l)	(*(t)->target->isa->put_block)((t)->target, (b), (l))
27 #define FLUSH_TARGET(t)	(*(t)->target->isa->flush)((t)->target)
28 #define FREE_TARGET(t)	(*(t)->target->isa->_free)((t)->target)
29 #define ABORT_TARGET(t)	(*(t)->target->isa->abort)((t)->target, e)
30 
31 #define MAX_NESTING 40
32 
33 struct _HTStream {
34     const HTStreamClass *	isa;
35     /* .... */
36 };
37 
38 struct _HTStructured {
39     const HTStructuredClass * 	isa;
40     HTRequest *			request;
41     HTParentAnchor * 		node_anchor;
42     HTextImp * 			text;
43     HTStream *			target;
44     HTChunk * 			title;
45     BOOL			in_word;
46     SGML_dtd *			dtd;
47     char *			comment_start;	/* for literate programming */
48     char *			comment_end;
49     BOOL			started;
50 
51     int				overflow;
52     int * 			sp;
53     int	 			stack[MAX_NESTING];
54 };
55 
56 /*
57 ** 	Entity values -- for ISO Latin 1 local representation
58 **	This MUST match exactly the table referred to in the DTD!
59 */
60 static char * ISO_Latin1[HTML_ENTITIES] = {
61 /* 00 */
62   	"\306",	/* capital AE diphthong (ligature) */
63   	"\301",	/* capital A, acute accent */
64   	"\302",	/* capital A, circumflex accent */
65   	"\300",	/* capital A, grave accent */
66   	"\305",	/* capital A, ring */
67   	"\303",	/* capital A, tilde */
68   	"\304",	/* capital A, dieresis or umlaut mark */
69   	"\307",	/* capital C, cedilla */
70   	"\320",	/* capital Eth, Icelandic */
71   	"\311",	/* capital E, acute accent */
72 /* 10 */
73   	"\312",	/* capital E, circumflex accent */
74   	"\310",	/* capital E, grave accent */
75   	"\313",	/* capital E, dieresis or umlaut mark */
76   	"\315",	/* capital I, acute accent */
77   	"\316",	/* capital I, circumflex accent */
78   	"\314",	/* capital I, grave accent */
79   	"\317",	/* capital I, dieresis or umlaut mark */
80   	"\321",	/* capital N, tilde */
81   	"\323",	/* capital O, acute accent */
82   	"\324",	/* capital O, circumflex accent */
83 /* 20 */
84   	"\322",	/* capital O, grave accent */
85   	"\330",	/* capital O, slash */
86   	"\325",	/* capital O, tilde */
87   	"\326",	/* capital O, dieresis or umlaut mark */
88   	"\336",	/* capital THORN, Icelandic */
89   	"\332",	/* capital U, acute accent */
90   	"\333",	/* capital U, circumflex accent */
91   	"\331",	/* capital U, grave accent */
92   	"\334",	/* capital U, dieresis or umlaut mark */
93   	"\335",	/* capital Y, acute accent */
94 /* 30 */
95   	"\341",	/* small a, acute accent */
96   	"\342",	/* small a, circumflex accent */
97   	"\264",	/* acute accent */
98   	"\346",	/* small ae diphthong (ligature) */
99   	"\340",	/* small a, grave accent */
100   	"\046",	/* ampersand */
101   	"\345",	/* small a, ring */
102   	"\343",	/* small a, tilde */
103   	"\344",	/* small a, dieresis or umlaut mark */
104         "\246",	/* broken vertical bar */
105 /* 40 */
106   	"\347",	/* small c, cedilla */
107 	"\270",	/* cedilla */
108 	"\242", /* cent sign */
109         "\251",	/* copyright */
110         "\244",	/* general currency sign */
111   	"\260",	/* degree sign */
112   	"\367",	/* division sign */
113   	"\351",	/* small e, acute accent */
114   	"\352",	/* small e, circumflex accent */
115   	"\350",	/* small e, grave accent */
116 /* 50 */
117   	"\360",	/* small eth, Icelandic */
118   	"\353",	/* small e, dieresis or umlaut mark */
119   	"\275",	/* fraction one-half */
120   	"\274",	/* fraction one-fourth */
121   	"\276",	/* fraction three-fourth */
122   	"\076",	/* greater than */
123   	"\355",	/* small i, acute accent */
124   	"\356",	/* small i, circumflex accent */
125 	"\241", /* inverted exclamation */
126   	"\354",	/* small i, grave accent */
127 /* 60 */
128   	"\277",	/* inverted question mark */
129   	"\357",	/* small i, dieresis or umlaut mark */
130   	"\253",	/* left angle quote */
131   	"\074",	/* less than */
132   	"\257",	/* macron accent */
133   	"\265",	/* micro sign (greek mu) */
134   	"\267",	/* middle dot */
135 	"\040", /* non-breaking space */
136   	"\254",	/* not sign */
137   	"\361",	/* small n, tilde */
138 /* 70 */
139   	"\363",	/* small o, acute accent */
140   	"\364",	/* small o, circumflex accent */
141   	"\362",	/* small o, grave accent */
142   	"\252",	/* feminine ordinal */
143   	"\272",	/* masculine ordinal */
144   	"\370",	/* small o, slash */
145   	"\365",	/* small o, tilde */
146   	"\366",	/* small o, dieresis or umlaut mark */
147   	"\266",	/* paragraph sign */
148   	"\261",	/* plus or minus */
149 /* 80 */
150 	"\243", /* pound sign */
151         "\042", /* double quote sign - June 94 */
152 	"\273",	/* right angle quote */
153   	"\256",	/* registered trademark */
154 	"\247", /* section sign */
155   	"\255",	/* soft hyphen */
156   	"\271",	/* superscript 1 */
157   	"\262",	/* superscript 2 */
158   	"\263",	/* superscript 3 */
159   	"\337",	/* small sharp s, German (sz ligature) */
160 /* 90 */
161   	"\376",	/* small thorn, Icelandic */
162   	"\327",	/* multiply sign */
163   	"\372",	/* small u, acute accent */
164   	"\373",	/* small u, circumflex accent */
165   	"\371",	/* small u, grave accent */
166         "\250",	/* dieresis or umlaut mark */
167   	"\374",	/* small u, dieresis or umlaut mark */
168   	"\375",	/* small y, acute accent */
169 	"\245", /* yen sign */
170   	"\377"	/* small y, dieresis or umlaut mark */
171 /* 100 */
172 };
173 
174 PRIVATE char ** CurrentEntityValues = ISO_Latin1;
175 
HTMLUseCharacterSet(HTMLCharacterSet i)176 PUBLIC BOOL HTMLUseCharacterSet (HTMLCharacterSet i)
177 {
178     if (i == HTML_ISO_LATIN1) {
179 	CurrentEntityValues = ISO_Latin1;
180 	return YES;
181     } else {
182 	HTTRACE(SGML_TRACE, "HTML Parser. Doesn't support this character set\n");
183 	return NO;
184     }
185 }
186 
HTML_write(HTStructured * me,const char * b,int l)187 PRIVATE int HTML_write (HTStructured * me, const char * b, int l)
188 {
189     if (!me->started) {
190 	HTextImp_build(me->text, HTEXT_BEGIN);
191 	me->started = YES;
192     }
193 
194     /* Look at what we got */
195     switch (me->sp[0]) {
196 
197     case HTML_TITLE:
198 	HTChunk_putb(me->title, b, l);
199 	/* Fall through */
200 
201     default:
202 	HTextImp_addText(me->text, b, l);
203     }
204     return HT_OK;
205 }
206 
HTML_put_character(HTStructured * me,char c)207 PRIVATE int HTML_put_character (HTStructured * me, char c)
208 {
209     return HTML_write(me, &c, sizeof(char));
210 }
211 
HTML_put_string(HTStructured * me,const char * s)212 PRIVATE int HTML_put_string (HTStructured * me, const char* s)
213 {
214     return HTML_write(me, s, (int) strlen(s));
215 }
216 
HTML_start_element(HTStructured * me,int element_number,const BOOL * present,const char ** value)217 PRIVATE void HTML_start_element (HTStructured *	me,
218 				 int		element_number,
219 				 const BOOL * 	present,
220 				 const char **	value)
221 {
222     HTChildAnchor * address = NULL;
223     if (!me->started) {
224 	HTextImp_build(me->text, HTEXT_BEGIN);
225 	me->started = YES;
226     }
227 
228     /* Look at what element was started */
229     switch (element_number) {
230     case HTML_A:
231 	if (present[HTML_A_HREF] && value[HTML_A_HREF]) {
232 	    address = HTAnchor_findChildAndLink(
233 		me->node_anchor,					/* parent */
234 		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */
235 		value[HTML_A_HREF],					/* Addresss */
236 		present[HTML_A_REL] && value[HTML_A_REL] ?
237 		(HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);
238 
239 	    if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
240 		HTLink * link = HTAnchor_mainLink((HTAnchor *) address);
241 		HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));
242 		if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
243 	    }
244 	    HTextImp_foundLink(me->text, element_number, HTML_A_HREF,
245 			       address, present, value);
246 	    HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]);
247 	}
248 	break;
249 
250     case HTML_AREA:
251 	if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) {
252 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
253 						value[HTML_AREA_HREF], NULL);
254 	    HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF,
255 			       address, present, value);
256 	    HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]);
257 	}
258 	break;
259 
260     case HTML_BASE:
261 	if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) {
262 	    HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]);
263 	    HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]);
264 	}
265 	break;
266 
267     case HTML_BODY:
268 	if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) {
269 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
270 						value[HTML_BODY_BACKGROUND], NULL);
271 	    HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND,
272 			       address, present, value);
273 	    HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]);
274 	}
275 	break;
276 
277     case HTML_FORM:
278 	if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) {
279 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
280 						value[HTML_FORM_ACTION], NULL);
281 	    HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION,
282 			       address, present, value);
283 	}
284 	break;
285 
286     case HTML_FRAME:
287 	if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) {
288 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
289 						value[HTML_FRAME_SRC], NULL);
290 	    HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC,
291 			       address, present, value);
292 	    HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]);
293 	}
294 	break;
295 
296     case HTML_INPUT:
297 	if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) {
298 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
299 						value[HTML_INPUT_SRC], NULL);
300 	    HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC,
301 			       address, present, value);
302 	}
303 	break;
304 
305     case HTML_IMG:
306 	if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) {
307 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
308 						value[HTML_IMG_SRC], NULL);
309 	    HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC,
310 			       address, present, value);
311 	}
312 	break;
313 
314     case HTML_ISINDEX:
315    	HTAnchor_setIndex(me->node_anchor);
316 	break;
317 
318     case HTML_LINK:
319 	if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {
320 	    HTParentAnchor * dest = NULL;
321 	    address = HTAnchor_findChildAndLink(
322 		me->node_anchor,					/* parent */
323 		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */
324 		present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL,	/* Addresss */
325 		NULL);							/* Rels */
326 	    dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address));
327 
328 	    /* If forward reference */
329 	    if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {
330 		char * strval = NULL;
331 		char * ptr = NULL;
332 		char * relation = NULL;
333 		StrAllocCopy(strval, value[HTML_LINK_REL]);
334 		ptr = strval;
335 		while ((relation = HTNextLWSToken(&ptr)) != NULL) {
336 		    HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,
337 			       (HTLinkType) HTAtom_caseFor(relation),
338 			       METHOD_INVALID);
339 		}
340 		HT_FREE(strval);
341 	    }
342 
343 	    /* If reverse reference */
344 	    if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {
345 		char * strval = NULL;
346 		char * ptr = NULL;
347 		char * relation = NULL;
348 		StrAllocCopy(strval, value[HTML_LINK_REV]);
349 		ptr = strval;
350 		while ((relation = HTNextLWSToken(&ptr)) != NULL) {
351 		    HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor,
352 			       (HTLinkType) HTAtom_caseFor(relation),
353 			       METHOD_INVALID);
354 		}
355 		HT_FREE(strval);
356 	    }
357 
358 	    /* If we got any type information as well */
359 	    if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) {
360 		if (HTAnchor_format(dest) == WWW_UNKNOWN)
361 		    HTAnchor_setFormat(dest,
362 				       (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE]));
363 	    }
364 
365 	    /* Call out to the layout engine */
366 	    HTextImp_foundLink(me->text, element_number, HTML_LINK_HREF,
367 			       address, present, value);
368 	}
369 	break;
370 
371     case HTML_META:
372 	if (present[HTML_META_NAME] && value[HTML_META_NAME]) {
373 	    HTAnchor_addMeta (me->node_anchor,
374 			      value[HTML_META_NAME],
375 			      (present[HTML_META_CONTENT] && value[HTML_META_CONTENT]) ?
376 			      value[HTML_META_CONTENT] : "");
377 	}
378 	break;
379 
380     case HTML_OBJECT:
381 	if (present[HTML_OBJECT_CLASSID] && value[HTML_OBJECT_CLASSID]) {
382 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
383 						value[HTML_OBJECT_CLASSID], NULL);
384 	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CLASSID,
385 			       address, present, value);
386 	}
387 
388 	if (present[HTML_OBJECT_CODEBASE] && value[HTML_OBJECT_CODEBASE]) {
389 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
390 						value[HTML_OBJECT_CODEBASE], NULL);
391 	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CODEBASE,
392 			       address, present, value);
393 	}
394 
395 	if (present[HTML_OBJECT_DATA] && value[HTML_OBJECT_DATA]) {
396 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
397 						value[HTML_OBJECT_DATA], NULL);
398 	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_DATA,
399 			       address, present, value);
400 	}
401 
402 	if (present[HTML_OBJECT_ARCHIVE] && value[HTML_OBJECT_ARCHIVE]) {
403 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
404 						value[HTML_OBJECT_ARCHIVE], NULL);
405 	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_ARCHIVE,
406 			       address, present, value);
407 	}
408 
409 	if (present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP]) {
410 	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
411 						value[HTML_OBJECT_USEMAP], NULL);
412 	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_USEMAP,
413 			       address, present, value);
414 	}
415 	break;
416 
417     case HTML_PRE:
418     	if (me->comment_end)
419 	    HTextImp_addText(me->text, me->comment_end, strlen(me->comment_end));
420 	break;
421 
422     case HTML_TITLE:
423         HTChunk_truncate(me->title,0);
424 	break;
425     }
426 
427     /* Update our parse stack */
428     if (SGML_findTagContents(me->dtd, element_number) != SGML_EMPTY) {
429         if (me->sp == me->stack) {
430 	    HTTRACE(SGML_TRACE, "HTML Parser. Maximum nesting of %d exceded!\n" _ MAX_NESTING);
431 	    me->overflow++;
432 	    return;
433 	}
434     	--(me->sp);
435 	me->sp[0] = element_number;
436     }
437 
438     /* Call out to the layout engine */
439     HTextImp_beginElement(me->text, element_number, present, value);
440 }
441 
HTML_end_element(HTStructured * me,int element_number)442 PRIVATE void HTML_end_element (HTStructured * me, int element_number)
443 {
444     if (!me->started) {
445 	HTextImp_build(me->text, HTEXT_BEGIN);
446 	me->started = YES;
447     }
448 
449     /* Update our parse stack */
450     if (me->overflow > 0) {
451 	me->overflow--;
452 	return;
453     }
454     me->sp++;
455     if (me->sp > me->stack + MAX_NESTING - 1) {
456 	HTTRACE(SGML_TRACE, "HTML Parser. Bottom of parse stack reached\n");
457 	me->sp = me->stack + MAX_NESTING - 1;
458     }
459 
460     /* Look at what element was closed */
461     switch(element_number) {
462     case HTML_TITLE:
463     	HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
464 	break;
465 
466     case HTML_PRE:
467     	if (me->comment_start)
468 	    HTextImp_addText(me->text, me->comment_start, strlen(me->comment_start));
469 	break;
470     }
471 
472     /* Call out to the layout engine */
473     HTextImp_endElement(me->text, element_number);
474 }
475 
HTML_put_entity(HTStructured * me,int entity_number)476 PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
477 {
478     if (!me->started) {
479 	HTextImp_build(me->text, HTEXT_BEGIN);
480 	me->started = YES;
481     }
482     if (entity_number>=0 && entity_number<HTML_ENTITIES)
483 	HTML_put_string(me, *(CurrentEntityValues+entity_number));
484 }
485 
HTML_flush(HTStructured * me)486 PUBLIC int HTML_flush (HTStructured * me)
487 {
488     if (!me->started) {
489 	HTextImp_build(me->text, HTEXT_BEGIN);
490 	me->started = YES;
491     }
492     if (me->comment_end) HTML_put_string(me, me->comment_end);
493     return me->target ? FLUSH_TARGET(me) : HT_OK;
494 }
495 
HTML_unparsedBeginElement(HTStructured * me,const char * b,int l)496 PRIVATE int HTML_unparsedBeginElement (HTStructured * me, const char * b, int l)
497 {
498     if (!me->started) {
499 	HTextImp_build(me->text, HTEXT_BEGIN);
500 	me->started = YES;
501     }
502     HTextImp_unparsedBeginElement(me->text, b, l);
503     return HT_OK;
504 }
505 
HTML_unparsedEndElement(HTStructured * me,const char * b,int l)506 PRIVATE int HTML_unparsedEndElement (HTStructured * me, const char * b, int l)
507 {
508     if (!me->started) {
509 	HTextImp_build(me->text, HTEXT_BEGIN);
510 	me->started = YES;
511     }
512     HTextImp_unparsedEndElement(me->text, b, l);
513     return HT_OK;
514 }
515 
HTML_unparsedEntity(HTStructured * me,const char * b,int l)516 PRIVATE int HTML_unparsedEntity (HTStructured * me, const char * b, int l)
517 {
518     if (!me->started) {
519 	HTextImp_build(me->text, HTEXT_BEGIN);
520 	me->started = YES;
521     }
522     HTextImp_unparsedEntity(me->text, b, l);
523     return HT_OK;
524 }
525 
HTML_free(HTStructured * me)526 PUBLIC int HTML_free (HTStructured * me)
527 {
528     if (!me->started) HTextImp_build(me->text, HTEXT_BEGIN);
529     if (me->comment_end) HTML_put_string(me, me->comment_end);
530     HTextImp_build(me->text, HTEXT_END);
531     HTextImp_delete(me->text);
532     HTChunk_delete(me->title);
533     if (me->target) FREE_TARGET(me);
534     HT_FREE(me);
535     return HT_OK;
536 }
537 
HTML_abort(HTStructured * me,HTList * e)538 PRIVATE int HTML_abort (HTStructured * me, HTList * e)
539 {
540     if (!me->started) HTextImp_build(me->text, HTEXT_BEGIN);
541     HTextImp_build(me->text, HTEXT_ABORT);
542     HTextImp_delete(me->text);
543     HTChunk_delete(me->title);
544     if (me->target) ABORT_TARGET(me);
545     HT_FREE(me);
546     return HT_ERROR;
547 }
548 
549 /*	Structured Object Class
550 **	-----------------------
551 */
552 PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
553 {
554     "text/html",
555     HTML_flush,
556     HTML_free,
557     HTML_abort,
558     HTML_put_character,
559     HTML_put_string,
560     HTML_write,
561     HTML_start_element,
562     HTML_end_element,
563     HTML_put_entity,
564     HTML_unparsedBeginElement,
565     HTML_unparsedEndElement,
566     HTML_unparsedEntity
567 };
568 
569 /*	Structured Text object
570 **	----------------------
571 **
572 **	The structured stream can generate either presentation,
573 **	or plain text, or HTML.
574 */
HTML_new(HTRequest * request,void * param,HTFormat input_format,HTFormat output_format,HTStream * output_stream)575 PRIVATE HTStructured * HTML_new (HTRequest *	request,
576 				 void *		param,
577 				 HTFormat	input_format,
578 				 HTFormat	output_format,
579 				 HTStream *	output_stream)
580 {
581     HTStructured * me = NULL;
582     if (request) {
583 	if ((me = (HTStructured *) HT_CALLOC(1, sizeof(HTStructured))) == NULL)
584 	    HT_OUTOFMEM("HTML_new");
585 	me->isa = &HTMLPresentation;
586 	me->dtd = HTML_dtd();
587 	me->request = request;
588 	me->node_anchor =  HTRequest_anchor(request);
589 	me->title = HTChunk_new(128);
590 	me->comment_start = NULL;
591 	me->comment_end = NULL;
592 	me->target = output_stream;
593 	me->sp = me->stack + MAX_NESTING - 1;
594 
595 	/* Create the text object */
596 	me->text = HTextImp_new(me->request, me->node_anchor, me->target);
597     }
598     return me;
599 }
600 
601 /*	HTConverter for HTML to plain text
602 **	----------------------------------
603 **
604 **	This will convert from HTML to presentation or plain text.
605 */
HTMLToPlain(HTRequest * request,void * param,HTFormat input_format,HTFormat output_format,HTStream * output_stream)606 PUBLIC HTStream * HTMLToPlain (HTRequest *	request,
607 			       void *		param,
608 			       HTFormat		input_format,
609 			       HTFormat		output_format,
610 			       HTStream *	output_stream)
611 {
612     return SGML_new(HTML_dtd(), HTML_new(
613     	request, NULL, input_format, output_format, output_stream));
614 }
615 
616 
617 /*	HTConverter for HTML to C code
618 **	------------------------------
619 **
620 **	C code is like plain text but all non-preformatted code
621 **	is commented out.
622 **	This will convert from HTML to presentation or plain text.
623 */
HTMLToC(HTRequest * request,void * param,HTFormat input_format,HTFormat output_format,HTStream * output_stream)624 PUBLIC HTStream * HTMLToC (HTRequest *	request,
625 			   void *	param,
626 			   HTFormat	input_format,
627 			   HTFormat	output_format,
628 			   HTStream *	output_stream)
629 {
630     if (output_stream) {
631 	HTStructured * html = NULL;
632 	(*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
633 	html = HTML_new(request, NULL, input_format, output_format, output_stream);
634 	html->comment_start = "\n/* ";
635 	html->dtd = HTML_dtd();
636 	html->comment_end = " */\n";	/* Must start in col 1 for cpp */
637 	return SGML_new(HTML_dtd(), html);
638     } else
639 	return HTErrorStream();
640 }
641 
642 
643 /*	Presenter for HTML
644 **	------------------
645 **
646 **	This will convert from HTML to presentation or plain text.
647 **
648 **	Override this if you have a windows version
649 */
HTMLPresent(HTRequest * request,void * param,HTFormat input_format,HTFormat output_format,HTStream * output_stream)650 PUBLIC HTStream * HTMLPresent (HTRequest *	request,
651 			       void *		param,
652 			       HTFormat		input_format,
653 			       HTFormat		output_format,
654 			       HTStream *	output_stream)
655 {
656     return SGML_new(HTML_dtd(), HTML_new(
657     	request, NULL, input_format, output_format, output_stream));
658 }
659 
660