1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
3 #endif
4 
5 /*
6  * ========================================================================
7  * Copyright 2013-2021 Eduardo Chappa
8  * Copyright 2006-2008 University of Washington
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *     http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * ========================================================================
17  */
18 
19 /*======================================================================
20      filter.c
21 
22      This code provides a generalized, flexible way to allow
23      piping of data thru filters.  Each filter is passed a structure
24      that it will use to hold its static data while it operates on
25      the stream of characters that are passed to it.  After processing
26      it will either return or call the next filter in
27      the pipe with any character (or characters) it has ready to go. This
28      means some terminal type of filter has to be the last in the
29      chain (i.e., one that writes the passed char someplace, but doesn't
30      call another filter).
31 
32      See below for more details.
33 
34      The motivation is to handle MIME decoding, richtext conversion,
35      iso_code stripping and anything else that may come down the
36      pike (e.g., PEM) in an elegant fashion.  mikes (920811)
37 
38    TODO:
39        reasonable error handling
40 
41   ====*/
42 
43 
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
58 
59 #ifdef _WINDOWS
60 #include "../pico/osdep/mswin.h"
61 #endif
62 
63 
64 /*
65  * Internal prototypes
66  */
67 int	gf_so_writec(int);
68 int	gf_so_readc(unsigned char *);
69 int	gf_freadc(unsigned char *);
70 int	gf_freadc_locale(unsigned char *);
71 int	gf_freadc_getchar(unsigned char *, void *);
72 int	gf_fwritec(int);
73 int	gf_fwritec_locale(int);
74 #ifdef _WINDOWS
75 int	gf_freadc_windows(unsigned char *);
76 #endif /* _WINDOWS */
77 int	gf_preadc(unsigned char *);
78 int	gf_preadc_locale(unsigned char *);
79 int	gf_preadc_getchar(unsigned char *, void *);
80 int	gf_pwritec(int);
81 int	gf_pwritec_locale(int);
82 int	gf_sreadc(unsigned char *);
83 int	gf_sreadc_locale(unsigned char *);
84 int	gf_sreadc_getchar(unsigned char *, void *);
85 int	gf_swritec(int);
86 int	gf_swritec_locale(int);
87 void	gf_terminal(FILTER_S *, int);
88 void    gf_error(char *);
89 char   *gf_filter_puts(char *);
90 void	gf_filter_eod(void);
91 
92 void	gf_8bit_put(FILTER_S *, int);
93 
94 
95 
96 /*
97  * System specific options
98  */
99 #ifdef _WINDOWS
100 #define CRLF_NEWLINES
101 #endif
102 
103 
104 /*
105  * Hooks for callers to adjust behavior
106  */
107 char *(*pith_opt_pretty_var_name)(char *);
108 char *(*pith_opt_pretty_feature_name)(char *, int);
109 
110 
111 /*
112  * pointer to first function in a pipe, and pointer to last filter
113  */
114 FILTER_S         *gf_master = NULL;
115 static	gf_io_t   last_filter;
116 static	char     *gf_error_string;
117 static	long	  gf_byte_count;
118 static	jmp_buf   gf_error_state;
119 
120 
121 #define	GF_NOOP		0x01		/* flags used by generalized */
122 #define GF_EOD		0x02		/* filters                   */
123 #define GF_DATA		0x04		/* See filter.c for more     */
124 #define GF_ERROR	0x08		/* details                   */
125 #define GF_RESET	0x10
126 
127 
128 /*
129  * A list of states used by the various filters.  Reused in many filters.
130  */
131 #define	DFL	0
132 #define	EQUAL	1
133 #define	HEX	2
134 #define	WSPACE	3
135 #define	CCR	4
136 #define	CLF	5
137 #define	TOKEN	6
138 #define	TAG	7
139 #define	HANDLE	8
140 #define	HDATA	9
141 #define	ESC	10
142 #define	ESCDOL	11
143 #define	ESCPAR	12
144 #define	EUC	13
145 #define	BOL	14
146 #define	FL_QLEV	15
147 #define	FL_STF	16
148 #define	FL_SIG	17
149 #define	STOP_DECODING	18
150 #define	SPACECR	19
151 
152 
153 
154 /*
155  * Macros to reduce function call overhead associated with calling
156  * each filter for each byte filtered, and to minimize filter structure
157  * dereferences.  NOTE: "queuein" has to do with putting chars into the
158  * filter structs data queue.  So, writing at the queuein offset is
159  * what a filter does to pass processed data out of itself.  Ditto for
160  * queueout.  This explains the FI --> queueout init stuff below.
161  */
162 #define	GF_QUE_START(F)	(&(F)->queue[0])
163 #define	GF_QUE_END(F)	(&(F)->queue[GF_MAXBUF - 1])
164 
165 #define	GF_IP_INIT(F)	ip  = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define	GF_IP_INIT_GLO(F)  (*ipp)  = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define	GF_EIB_INIT(F)	eib = (F) ? GF_QUE_END(F) : NULL
168 #define	GF_EIB_INIT_GLO(F)  (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define	GF_OP_INIT(F)	op  = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define	GF_EOB_INIT(F)	eob = (F) ? &(F)->queue[(F)->queuein] : NULL
171 
172 #define	GF_IP_END(F)	(F)->queuein  = ip - GF_QUE_START(F)
173 #define	GF_IP_END_GLO(F)  (F)->queuein  = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define	GF_OP_END(F)	(F)->queueout = op - GF_QUE_START(F)
175 
176 #define	GF_INIT(FI, FO)	unsigned char *GF_OP_INIT(FI);	 \
177 			unsigned char *GF_EOB_INIT(FI); \
178 			unsigned char *GF_IP_INIT(FO);  \
179 			unsigned char *GF_EIB_INIT(FO);
180 
181 #define	GF_CH_RESET(F)	(op = eob = GF_QUE_START(F), \
182 					    (F)->queueout = (F)->queuein = 0)
183 
184 #define	GF_END(FI, FO)	(GF_OP_END(FI), GF_IP_END(FO))
185 
186 #define	GF_FLUSH(F)	((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 			       GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define	GF_FLUSH_GLO(F)	((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 			       GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
190 
191 #define	GF_PUTC(F, C)	((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define	GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
193 
194 /*
195  * Introducing the *_GLO macros for use in splitting the big macros out
196  * into functions (wrap_flush, wrap_eol).  The reason we need a
197  * separate macro is because of the vars ip, eib, op, and eob, which are
198  * set up locally in a call to GF_INIT.  To preserve these variables
199  * in the new functions, we now pass pointers to these four vars.  Each
200  * of these new functions expects the presence of pointer vars
201  * ipp, eibp, opp, and eobp.
202  */
203 
204 #define	GF_GETC(F, C)	((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
205 
206 #define GF_COLOR_PUTC(F, C) {                                            \
207                               char *p;                                   \
208                               char cb[RGBLEN+1];                         \
209                               GF_PUTC_GLO((F)->next, TAG_EMBED);         \
210                               GF_PUTC_GLO((F)->next, TAG_FGCOLOR);       \
211 			      strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212                               cb[sizeof(cb)-1] = '\0';                   \
213 			      p = cb;                                    \
214                               for(; *p; p++)                             \
215                                 GF_PUTC_GLO((F)->next, *p);              \
216                               GF_PUTC_GLO((F)->next, TAG_EMBED);         \
217                               GF_PUTC_GLO((F)->next, TAG_BGCOLOR);       \
218 			      strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219                               cb[sizeof(cb)-1] = '\0';                   \
220 			      p = cb;                                    \
221                               for(; *p; p++)                             \
222                                 GF_PUTC_GLO((F)->next, *p);              \
223                             }
224 
225 /*
226  * Generalized getc and putc routines.  provided here so they don't
227  * need to be re-done elsewhere to
228  */
229 
230 /*
231  * pointers to objects to be used by the generic getc and putc
232  * functions
233  */
234 static struct gf_io_struct {
235     FILE          *file;
236     PIPE_S        *pipe;
237     char          *txtp;
238     unsigned long  n;
239     int            flags;
240     CBUF_S         cb;
241 } gf_in, gf_out;
242 
243 #define	GF_SO_STACK	struct gf_so_stack
244 static GF_SO_STACK {
245     STORE_S	*so;
246     GF_SO_STACK *next;
247 } *gf_so_in, *gf_so_out;
248 
249 
250 
251 /*
252  * Returns 1 if pc will write into a PicoText object, 0 otherwise.
253  *
254  * The purpose of this routine is so that we can avoid setting SIGALARM
255  * when writing into a PicoText object, because that type of object uses
256  * unprotected malloc/free/realloc, which can't be interrupted.
257  */
258 int
pc_is_picotext(gf_io_t pc)259 pc_is_picotext(gf_io_t pc)
260 {
261     return(pc == gf_so_writec && gf_so_out && gf_so_out->so &&
262 	   gf_so_out->so->src == ExternalText);
263 }
264 
265 
266 
267 /*
268  * setup to use and return a pointer to the generic
269  * getc function
270  */
271 void
gf_set_readc(gf_io_t * gc,void * txt,long unsigned int len,SourceType src,int flags)272 gf_set_readc(gf_io_t *gc, void *txt, long unsigned int len, SourceType src, int flags)
273 {
274     gf_in.n = len;
275     gf_in.flags = flags;
276     gf_in.cb.cbuf[0] = '\0';
277     gf_in.cb.cbufp   = gf_in.cb.cbuf;
278     gf_in.cb.cbufend = gf_in.cb.cbuf;
279 
280     if(src == FileStar){
281 	gf_in.file = (FILE *)txt;
282 	fseek(gf_in.file, 0L, 0);
283 #ifdef _WINDOWS
284 	*gc = (flags & READ_FROM_LOCALE) ? gf_freadc_windows
285 					 : gf_freadc;
286 #else /* UNIX */
287 	*gc = (flags & READ_FROM_LOCALE) ? gf_freadc_locale
288 					 : gf_freadc;
289 #endif /* UNIX */
290     }
291     else if(src == PipeStar){
292 	gf_in.pipe = (PIPE_S *)txt;
293 	*gc = gf_preadc;
294 	*gc = (flags & READ_FROM_LOCALE) ? gf_preadc_locale
295 					 : gf_preadc;
296     }
297     else{
298 	gf_in.txtp = (char *)txt;
299 	*gc = (flags & READ_FROM_LOCALE) ? gf_sreadc_locale
300 					 : gf_sreadc;
301     }
302 }
303 
304 
305 /*
306  * setup to use and return a pointer to the generic
307  * putc function
308  */
309 void
gf_set_writec(gf_io_t * pc,void * txt,long unsigned int len,SourceType src,int flags)310 gf_set_writec(gf_io_t *pc, void *txt, long unsigned int len, SourceType src, int flags)
311 {
312     gf_out.n = len;
313     gf_out.flags = flags;
314     gf_out.cb.cbuf[0] = '\0';
315     gf_out.cb.cbufp   = gf_out.cb.cbuf;
316     gf_out.cb.cbufend = gf_out.cb.cbuf;
317 
318     if(src == FileStar){
319 	gf_out.file = (FILE *)txt;
320 #ifdef _WINDOWS
321 	*pc =                             gf_fwritec;
322 #else /* UNIX */
323 	*pc = (flags & WRITE_TO_LOCALE) ? gf_fwritec_locale
324 					: gf_fwritec;
325 #endif /* UNIX */
326     }
327     else if(src == PipeStar){
328 	gf_out.pipe = (PIPE_S *)txt;
329 	*pc = (flags & WRITE_TO_LOCALE) ? gf_pwritec_locale
330 					: gf_pwritec;
331     }
332     else{
333 	gf_out.txtp = (char *)txt;
334 	*pc = (flags & WRITE_TO_LOCALE) ? gf_swritec_locale
335 					: gf_swritec;
336     }
337 }
338 
339 
340 /*
341  * setup to use and return a pointer to the generic
342  * getc function
343  */
344 void
gf_set_so_readc(gf_io_t * gc,STORE_S * so)345 gf_set_so_readc(gf_io_t *gc, STORE_S *so)
346 {
347     GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
348 
349     sp->so   = so;
350     sp->next = gf_so_in;
351     gf_so_in = sp;
352     *gc      = gf_so_readc;
353 }
354 
355 
356 void
gf_clear_so_readc(STORE_S * so)357 gf_clear_so_readc(STORE_S *so)
358 {
359     GF_SO_STACK *sp;
360 
361     if((sp = gf_so_in) != NULL){
362 	if(so == sp->so){
363 	    gf_so_in = gf_so_in->next;
364 	    fs_give((void **) &sp);
365 	}
366 	else
367 	  alpine_panic("Programmer botch: Can't unstack store readc");
368     }
369     else
370       alpine_panic("Programmer botch: NULL store clearing store readc");
371 }
372 
373 
374 /*
375  * setup to use and return a pointer to the generic
376  * putc function
377  */
378 void
gf_set_so_writec(gf_io_t * pc,STORE_S * so)379 gf_set_so_writec(gf_io_t *pc, STORE_S *so)
380 {
381     GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
382 
383     sp->so    = so;
384     sp->next  = gf_so_out;
385     gf_so_out = sp;
386     *pc       = gf_so_writec;
387 }
388 
389 
390 void
gf_clear_so_writec(STORE_S * so)391 gf_clear_so_writec(STORE_S *so)
392 {
393     GF_SO_STACK *sp;
394 
395     if((sp = gf_so_out) != NULL){
396 	if(so == sp->so){
397 	    gf_so_out = gf_so_out->next;
398 	    fs_give((void **) &sp);
399 	}
400 	else
401 	  alpine_panic("Programmer botch: Can't unstack store writec");
402     }
403     else
404       alpine_panic("Programmer botch: NULL store clearing store writec");
405 }
406 
407 
408 /*
409  * put the character to the object previously defined
410  */
411 int
gf_so_writec(int c)412 gf_so_writec(int c)
413 {
414     return(so_writec(c, gf_so_out->so));
415 }
416 
417 
418 /*
419  * get a character from an object previously defined
420  */
421 int
gf_so_readc(unsigned char * c)422 gf_so_readc(unsigned char *c)
423 {
424     return(so_readc(c, gf_so_in->so));
425 }
426 
427 
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
430 int
gf_freadc(unsigned char * c)431 gf_freadc(unsigned char *c)
432 {
433     int rv = 0;
434 
435     do {
436 	errno = 0;
437 	clearerr(gf_in.file);
438 	rv = fread(c, sizeof(unsigned char), (size_t)1, gf_in.file);
439     } while(!rv && ferror(gf_in.file) && errno == EINTR);
440 
441     return(rv);
442 }
443 
444 
445 int
gf_freadc_locale(unsigned char * c)446 gf_freadc_locale(unsigned char *c)
447 {
448     return(generic_readc_locale(c, gf_freadc_getchar, (void *) gf_in.file, &gf_in.cb));
449 }
450 
451 
452 /*
453  * This is just to make it work with generic_readc_locale.
454  */
455 int
gf_freadc_getchar(unsigned char * c,void * extraarg)456 gf_freadc_getchar(unsigned char *c, void *extraarg)
457 {
458     FILE *file;
459     int rv = 0;
460 
461     file = (FILE *) extraarg;
462 
463     do {
464 	errno = 0;
465 	clearerr(file);
466 	rv = fread(c, sizeof(unsigned char), (size_t)1, file);
467     } while(!rv && ferror(file) && errno == EINTR);
468 
469     return(rv);
470 }
471 
472 
473 /*
474  * Put a character to a file.
475  * Assumes gf_out struct is filled in.
476  * Returns 1 on success, <= 0 on failure.
477  */
478 int
gf_fwritec(int c)479 gf_fwritec(int c)
480 {
481     unsigned char ch = (unsigned char)c;
482     int rv = 0;
483 
484     do
485       rv = fwrite(&ch, sizeof(unsigned char), (size_t)1, gf_out.file);
486     while(!rv && ferror(gf_out.file) && errno == EINTR);
487 
488     return(rv);
489 }
490 
491 
492 /*
493  * The locale version converts from UTF-8 to user's locale charset
494  * before writing the characters.
495  */
496 int
gf_fwritec_locale(int c)497 gf_fwritec_locale(int c)
498 {
499     int rv = 1;
500     int i, outchars;
501     unsigned char obuf[MAX(MB_LEN_MAX,32)];
502 
503     if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
504 	for(i = 0; i < outchars; i++)
505 	  if(gf_fwritec(obuf[i]) != 1){
506 	      rv = 0;
507 	      break;
508 	  }
509     }
510 
511     return(rv);
512 }
513 
514 
515 #ifdef _WINDOWS
516 /*
517  * Read unicode characters from windows filesystem and return
518  * them as a stream of UTF-8 characters. The stream is assumed
519  * opened so that it will know how to put together the unicode.
520  *
521  * (This is totally untested, copied loosely from so_file_readc_windows
522  *  which may or may not be appropriate.)
523  */
524 int
gf_freadc_windows(unsigned char * c)525 gf_freadc_windows(unsigned char *c)
526 {
527     int rv = 0;
528     UCS ucs;
529 
530     /* already got some from previous call? */
531     if(gf_in.cb.cbufend > gf_in.cb.cbuf){
532 	*c = *gf_in.cb.cbufp;
533 	gf_in.cb.cbufp++;
534 	rv++;
535 	if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
536 	    gf_in.cb.cbufend = gf_in.cb.cbuf;
537 	    gf_in.cb.cbufp   = gf_in.cb.cbuf;
538 	}
539 
540 	return(rv);
541     }
542 
543     if(gf_in.file){
544 	/* windows only so second arg is ignored */
545 	ucs = read_a_wide_char(gf_in.file, NULL);
546 	rv = (ucs == CCONV_EOF) ? 0 : 1;
547     }
548 
549     if(rv){
550 	/*
551 	 * Now we need to convert the UCS character to UTF-8
552 	 * and dole out the UTF-8 one char at a time.
553 	 */
554 	gf_in.cb.cbufend = utf8_put(gf_in.cb.cbuf, (unsigned long) ucs);
555 	gf_in.cb.cbufp = gf_in.cb.cbuf;
556 	if(gf_in.cb.cbufend > gf_in.cb.cbuf){
557 	    *c = *gf_in.cb.cbufp;
558 	    gf_in.cb.cbufp++;
559 	    if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
560 		gf_in.cb.cbufend = gf_in.cb.cbuf;
561 		gf_in.cb.cbufp   = gf_in.cb.cbuf;
562 	    }
563 	}
564 	else
565 	  *c = '?';
566     }
567 
568     return(rv);
569 }
570 #endif /* _WINDOWS */
571 
572 
573 int
gf_preadc(unsigned char * c)574 gf_preadc(unsigned char *c)
575 {
576     return(pipe_readc(c, gf_in.pipe));
577 }
578 
579 
580 int
gf_preadc_locale(unsigned char * c)581 gf_preadc_locale(unsigned char *c)
582 {
583     return(generic_readc_locale(c, gf_preadc_getchar, (void *) gf_in.pipe, &gf_in.cb));
584 }
585 
586 
587 /*
588  * This is just to make it work with generic_readc_locale.
589  */
590 int
gf_preadc_getchar(unsigned char * c,void * extraarg)591 gf_preadc_getchar(unsigned char *c, void *extraarg)
592 {
593     PIPE_S *pipe;
594 
595     pipe = (PIPE_S *) extraarg;
596 
597     return(pipe_readc(c, pipe));
598 }
599 
600 
601 /*
602  * Put a character to a pipe.
603  * Assumes gf_out struct is filled in.
604  * Returns 1 on success, <= 0 on failure.
605  */
606 int
gf_pwritec(int c)607 gf_pwritec(int c)
608 {
609     return(pipe_writec(c, gf_out.pipe));
610 }
611 
612 
613 /*
614  * The locale version converts from UTF-8 to user's locale charset
615  * before writing the characters.
616  */
617 int
gf_pwritec_locale(int c)618 gf_pwritec_locale(int c)
619 {
620     int rv = 1;
621     int i, outchars;
622     unsigned char obuf[MAX(MB_LEN_MAX,32)];
623 
624     if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
625 	for(i = 0; i < outchars; i++)
626 	  if(gf_pwritec(obuf[i]) != 1){
627 	      rv = 0;
628 	      break;
629 	  }
630     }
631 
632     return(rv);
633 }
634 
635 
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
638 int
gf_sreadc(unsigned char * c)639 gf_sreadc(unsigned char *c)
640 {
641     return((gf_in.n) ? *c = *(gf_in.txtp)++, gf_in.n-- : 0);
642 }
643 
644 
645 int
gf_sreadc_locale(unsigned char * c)646 gf_sreadc_locale(unsigned char *c)
647 {
648     return(generic_readc_locale(c, gf_sreadc_getchar, NULL, &gf_in.cb));
649 }
650 
651 
652 int
gf_sreadc_getchar(unsigned char * c,void * extraarg)653 gf_sreadc_getchar(unsigned char *c, void *extraarg)
654 {
655     /*
656      * extraarg is ignored and gf_sreadc just uses globals instead.
657      * That's ok as long as we don't call it more than once at a time.
658      */
659     return(gf_sreadc(c));
660 }
661 
662 
663 /*
664  * Put a character to a string.
665  * Assumes gf_out struct is filled in.
666  * Returns 1 on success, <= 0 on failure.
667  */
668 int
gf_swritec(int c)669 gf_swritec(int c)
670 {
671     return((gf_out.n) ? *(gf_out.txtp)++ = c, gf_out.n-- : 0);
672 }
673 
674 
675 /*
676  * The locale version converts from UTF-8 to user's locale charset
677  * before writing the characters.
678  */
679 int
gf_swritec_locale(int c)680 gf_swritec_locale(int c)
681 {
682     int rv = 1;
683     int i, outchars;
684     unsigned char obuf[MAX(MB_LEN_MAX,32)];
685 
686     if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
687 	for(i = 0; i < outchars; i++)
688 	  if(gf_swritec(obuf[i]) != 1){
689 	      rv = 0;
690 	      break;
691 	  }
692     }
693 
694     return(rv);
695 }
696 
697 
698 /*
699  * output the given string with the given function
700  */
701 int
gf_puts(register char * s,gf_io_t pc)702 gf_puts(register char *s, gf_io_t pc)
703 {
704     while(*s != '\0')
705       if(!(*pc)((unsigned char)*s++))
706 	return(0);		/* ERROR putting char ! */
707 
708     return(1);
709 }
710 
711 
712 /*
713  * output the given string with the given function
714  */
715 int
gf_nputs(register char * s,long int n,gf_io_t pc)716 gf_nputs(register char *s, long int n, gf_io_t pc)
717 {
718     while(n--)
719       if(!(*pc)((unsigned char)*s++))
720 	return(0);		/* ERROR putting char ! */
721 
722     return(1);
723 }
724 
725 
726 /*
727  * Read a stream of multi-byte characters from the
728  * user's locale charset and return a stream of
729  * UTF-8 characters, one at a time. The input characters
730  * are obtained by using the get_a_char function.
731  *
732  * Args        c -- the returned octet
733  *    get_a_char -- function to get a single octet of the multibyte
734  *                  character. The first arg of that function is the
735  *                  returned value and the second arg is for the
736  *                  functions use. The second arg is replaced with
737  *                  extraarg when it is called.
738  *      extraarg -- The second arg to get_a_char.
739  *            cb -- Storage area for state between calls to this func.
740  */
741 int
generic_readc_locale(unsigned char * c,int (* get_a_char)(unsigned char *,void *),void * extraarg,CBUF_S * cb)742 generic_readc_locale(unsigned char *c,
743 		     int (*get_a_char)(unsigned char *, void *),
744 		     void *extraarg,
745 		     CBUF_S *cb)
746 {
747     unsigned long octets_so_far = 0, remaining_octets;
748     unsigned char *inputp;
749     unsigned char ch;
750     UCS ucs;
751     unsigned char inputbuf[20];
752     int rv = 0;
753     int got_one = 0;
754 
755     /* already got some from previous call? */
756     if(cb->cbufend > cb->cbuf){
757 	*c = *cb->cbufp;
758 	cb->cbufp++;
759 	rv++;
760 	if(cb->cbufp >= cb->cbufend){
761 	    cb->cbufend = cb->cbuf;
762 	    cb->cbufp   = cb->cbuf;
763 	}
764 
765 	return(rv);
766     }
767 
768     memset(inputbuf, 0, sizeof(inputbuf));
769     if((*get_a_char)(&ch, extraarg) == 0)
770       return(0);
771 
772     inputbuf[octets_so_far++] = ch;
773 
774     while(!got_one){
775 	remaining_octets = octets_so_far;
776 	inputp = inputbuf;
777 	ucs = mbtow(ps_global->input_cs, &inputp, &remaining_octets);
778 	switch(ucs){
779 	  case CCONV_BADCHAR:
780 	    return(rv);
781 
782 	  case CCONV_NEEDMORE:
783 /*
784  * Do we need to do something with the characters we've
785  * collected that don't form a valid UCS character?
786  * Probably need to try discarding them one at a time
787  * from the front instead of just throwing them all out.
788  */
789 	    if(octets_so_far >= sizeof(inputbuf))
790 	      return(rv);
791 
792 	    if((*get_a_char)(&ch, extraarg) == 0)
793 	      return(rv);
794 
795 	    inputbuf[octets_so_far++] = ch;
796 	    break;
797 
798 	  default:
799 	    /* got a good UCS-4 character */
800 	    got_one++;
801 	    break;
802 	}
803     }
804 
805     /*
806      * Now we need to convert the UCS character to UTF-8
807      * and dole out the UTF-8 one char at a time.
808      */
809     rv++;
810     cb->cbufend = utf8_put(cb->cbuf, (unsigned long) ucs);
811     cb->cbufp = cb->cbuf;
812     if(cb->cbufend > cb->cbuf){
813 	*c = *cb->cbufp;
814 	cb->cbufp++;
815 	if(cb->cbufp >= cb->cbufend){
816 	    cb->cbufend = cb->cbuf;
817 	    cb->cbufp   = cb->cbuf;
818 	}
819     }
820     else
821       *c = '?';
822 
823     return(rv);
824 }
825 
826 
827 /*
828  * Start of generalized filter routines
829  */
830 
831 /*
832  * initializing function to make sure list of filters is empty.
833  */
834 void
gf_filter_init(void)835 gf_filter_init(void)
836 {
837     FILTER_S *flt, *fltn = gf_master;
838 
839     while((flt = fltn) != NULL){	/* free list of old filters */
840 	fltn = flt->next;
841 	fs_give((void **)&flt);
842     }
843 
844     gf_master = NULL;
845     gf_error_string = NULL;		/* clear previous errors */
846     gf_byte_count = 0L;			/* reset counter */
847 }
848 
849 
850 
851 /*
852  * link the given filter into the filter chain
853  */
854 void
gf_link_filter(filter_t f,void * data)855 gf_link_filter(filter_t f, void *data)
856 {
857     FILTER_S *new, *tail;
858 
859 #ifdef CRLF_NEWLINES
860     /*
861      * If the system's native EOL convention is CRLF, then there's no
862      * point in passing data thru a filter that's not doing anything
863      */
864     if(f == gf_nvtnl_local || f == gf_local_nvtnl)
865       return;
866 #endif
867 
868     new = (FILTER_S *)fs_get(sizeof(FILTER_S));
869     memset(new, 0, sizeof(FILTER_S));
870 
871     new->f = f;				/* set the function pointer     */
872     new->opt = data;			/* set any optional parameter data */
873     (*f)(new, GF_RESET);		/* have it setup initial state  */
874 
875     if((tail = gf_master) != NULL){	/* or add it to end of existing  */
876 	while(tail->next)		/* list  */
877 	  tail = tail->next;
878 
879 	tail->next = new;
880     }
881     else				/* attach new struct to list    */
882       gf_master = new;			/* start a new list */
883 }
884 
885 
886 /*
887  * terminal filter, doesn't call any other filters, typically just does
888  * something with the output
889  */
890 void
gf_terminal(FILTER_S * f,int flg)891 gf_terminal(FILTER_S *f, int flg)
892 {
893     if(flg == GF_DATA){
894 	GF_INIT(f, f);
895 
896 	while(op < eob)
897 	  if((*last_filter)(*op++) <= 0) /* generic terminal filter */
898 	    gf_error(errno ? error_description(errno) : "Error writing pipe");
899 
900 	GF_CH_RESET(f);
901     }
902     else if(flg == GF_RESET)
903       errno = 0;			/* prepare for problems */
904 }
905 
906 
907 /*
908  * set some outside gf_io_t function to the terminal function
909  * for example: a function to write a char to a file or into a buffer
910  */
911 void
gf_set_terminal(gf_io_t f)912 gf_set_terminal(gf_io_t f)			/* function to set generic filter */
913 
914 {
915     last_filter = f;
916 }
917 
918 
919 /*
920  * common function for filter's to make it known that an error
921  * has occurred.  Jumps back to gf_pipe with error message.
922  */
923 void
gf_error(char * s)924 gf_error(char *s)
925 {
926     /* let the user know the error passed in s */
927     gf_error_string = s;
928     longjmp(gf_error_state, 1);
929 }
930 
931 
932 /*
933  * The routine that shoves each byte through the chain of
934  * filters.  It sets up error handling, and the terminal function.
935  * Then loops getting bytes with the given function, and passing
936  * it on to the first filter in the chain.
937  */
938 char *
gf_pipe(gf_io_t gc,gf_io_t pc)939 gf_pipe(gf_io_t gc, gf_io_t pc)
940                    			/* how to get a character */
941 {
942     unsigned char c;
943 
944     dprint((4, "-- gf_pipe: "));
945 
946     /*
947      * set up for any errors a filter may encounter
948      */
949     if(setjmp(gf_error_state)){
950 	dprint((4, "ERROR: %s\n",
951 		   gf_error_string ? gf_error_string : "NULL"));
952 	return(gf_error_string); 	/*  */
953     }
954 
955     /*
956      * set and link in the terminal filter
957      */
958     gf_set_terminal(pc);
959     gf_link_filter(gf_terminal, NULL);
960 
961     /*
962      * while there are chars to process, send them thru the pipe.
963      * NOTE: it's necessary to enclose the loop below in a block
964      * as the GF_INIT macro calls some automatic var's into
965      * existence.  It can't be placed at the start of gf_pipe
966      * because its useful for us to be called without filters loaded
967      * when we're just being used to copy bytes between storage
968      * objects.
969      */
970     {
971 	GF_INIT(gf_master, gf_master);
972 
973 	while((*gc)(&c)){
974 	    gf_byte_count++;
975 
976 #ifdef	_WINDOWS
977 	    if(!(gf_byte_count & 0x3ff))
978 	      /* Under windows we yield to allow event processing.
979 	       * Progress display is handled through the alarm()
980 	       * mechanism.
981 	       */
982 	      mswin_yield ();
983 #endif
984 
985 	    GF_PUTC(gf_master, c & 0xff);
986 	}
987 
988 	/*
989 	 * toss an end-of-data marker down the pipe to give filters
990 	 * that have any buffered data the opportunity to dump it
991 	 */
992 	(void) GF_FLUSH(gf_master);
993 	(*gf_master->f)(gf_master, GF_EOD);
994     }
995 
996     dprint((4, "done.\n"));
997     return(NULL);			/* everything went OK */
998 }
999 
1000 
1001 /*
1002  * return the number of bytes piped so far
1003  */
1004 long
gf_bytes_piped(void)1005 gf_bytes_piped(void)
1006 {
1007     return(gf_byte_count);
1008 }
1009 
1010 
1011 /*
1012  * filter the given input with the given command
1013  *
1014  *  Args: cmd -- command string to execute
1015  *	prepend -- string to prepend to filtered input
1016  *	source_so -- storage object containing data to be filtered
1017  *	pc -- function to write filtered output with
1018  *	aux_filters -- additional filters to pass data thru after "cmd"
1019  *
1020  *  Returns: NULL on success, reason for failure (not alloc'd!) on error
1021  */
1022 char *
gf_filter(char * cmd,char * prepend,STORE_S * source_so,gf_io_t pc,FILTLIST_S * aux_filters,int silent,int disable_reset,void (* pipecb_f)(PIPE_S *,int,void *))1023 gf_filter(char *cmd, char *prepend, STORE_S *source_so, gf_io_t pc,
1024 	  FILTLIST_S *aux_filters, int silent, int disable_reset,
1025 	  void (*pipecb_f)(PIPE_S *, int, void *))
1026 {
1027     unsigned char c, obuf[MAX(MB_LEN_MAX,32)];
1028     int	     flags, outchars, i;
1029     char   *errstr = NULL, buf[MAILTMPLEN];
1030     PIPE_S *fpipe;
1031     CBUF_S  cb;
1032 #ifdef	NON_BLOCKING_IO
1033     int     n;
1034 #endif
1035 
1036     dprint((4, "so_filter: \"%s\"\n", cmd ? cmd : "?"));
1037 
1038     gf_filter_init();
1039 
1040     /*
1041      * After coming back from user's pipe command we need to convert
1042      * the output from the pipe back to UTF-8.
1043      */
1044     if(ps_global->keyboard_charmap && strucmp("UTF-8", ps_global->keyboard_charmap))
1045       gf_link_filter(gf_utf8, gf_utf8_opt(ps_global->keyboard_charmap));
1046 
1047     for( ; aux_filters && aux_filters->filter; aux_filters++)
1048       gf_link_filter(aux_filters->filter, aux_filters->data);
1049 
1050     gf_set_terminal(pc);
1051     gf_link_filter(gf_terminal, NULL);
1052 
1053     cb.cbuf[0] = '\0';
1054     cb.cbufp   = cb.cbuf;
1055     cb.cbufend = cb.cbuf;
1056 
1057     /*
1058      * Spawn filter feeding it data, and reading what it writes.
1059      */
1060     so_seek(source_so, 0L, 0);
1061     flags = PIPE_WRITE | PIPE_READ | PIPE_NOSHELL
1062 			| (silent ? PIPE_SILENT : 0)
1063 			| (!disable_reset ? PIPE_RESET : 0);
1064 
1065     if((fpipe = open_system_pipe(cmd, NULL, NULL, flags, 0, pipecb_f, pipe_report_error)) != NULL){
1066 
1067 #ifdef	NON_BLOCKING_IO
1068 
1069 	if(fcntl(fileno(fpipe->in.f), F_SETFL, NON_BLOCKING_IO) == -1)
1070 	  errstr = "Can't set up non-blocking IO";
1071 
1072 	if(prepend && (fputs(prepend, fpipe->out.f) == EOF
1073 		       || fputc('\n', fpipe->out.f) == EOF))
1074 	  errstr = error_description(errno);
1075 
1076 	while(!errstr){
1077 	    /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 	     * isn't ubiquitous ;).
1079 	     */
1080 	    for(n = 0; !errstr && fpipe->out.f && n < 1024; n++)
1081 	      if(!so_readc(&c, source_so)){
1082 		  fclose(fpipe->out.f);
1083 		  fpipe->out.f = NULL;
1084 	      }
1085 	      else{
1086 		  /*
1087 		   * Got a UTF-8 character from source_so.
1088 		   * We need to convert it to the user's locale charset
1089 		   * and then send the result to the pipe.
1090 		   */
1091 		  if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1092 		    for(i = 0; i < outchars && !errstr; i++)
1093 		      if(fputc(obuf[i], fpipe->out.f) == EOF)
1094 		        errstr = error_description(errno);
1095 	      }
1096 
1097 	    /*
1098 	     * Note: We clear errno here and test below, before ferror,
1099 	     *	     because *some* stdio implementations consider
1100 	     *	     EAGAIN and EWOULDBLOCK equivalent to EOF...
1101 	     */
1102 	    errno = 0;
1103 	    clearerr(fpipe->in.f); /* fix from <cananian@cananian.mit.edu> */
1104 
1105 	    while(!errstr && fgets(buf, sizeof(buf), fpipe->in.f))
1106 	      errstr = gf_filter_puts(buf);
1107 
1108 	    /* then fgets failed! */
1109 	    if(!errstr && !(errno == EAGAIN || errno == EWOULDBLOCK)){
1110 		if(feof(fpipe->in.f))		/* nothing else interesting! */
1111 		  break;
1112 		else if(ferror(fpipe->in.f))	/* bummer. */
1113 		  errstr = error_description(errno);
1114 	    }
1115 	    else if(errno == EAGAIN || errno == EWOULDBLOCK)
1116 	      clearerr(fpipe->in.f);
1117 	}
1118 
1119 #else /* !NON_BLOCKING_IO */
1120 
1121 	if(prepend && (pipe_puts(prepend, fpipe) == EOF
1122 		       || pipe_putc('\n', fpipe) == EOF))
1123 	  errstr = error_description(errno);
1124 
1125 	/*
1126 	 * Well, do the best we can, and hope the pipe we're writing
1127 	 * doesn't fill up before we start reading...
1128 	 */
1129 	while(!errstr && so_readc(&c, source_so))
1130 	  if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1131 	    for(i = 0; i < outchars && !errstr; i++)
1132 	      if(pipe_putc(obuf[i], fpipe) == EOF)
1133 		errstr = error_description(errno);
1134 
1135 	if(pipe_close_write(fpipe))
1136 	  errstr = _("Pipe command returned error.");
1137 
1138 	while(!errstr && pipe_gets(buf, sizeof(buf), fpipe))
1139 	  errstr = gf_filter_puts(buf);
1140 
1141 #endif /* !NON_BLOCKING_IO */
1142 
1143 	if(close_system_pipe(&fpipe, NULL, pipecb_f) && !errstr)
1144 	  errstr = _("Pipe command returned error.");
1145 
1146 	gf_filter_eod();
1147     }
1148     else
1149       errstr = _("Error setting up pipe command.");
1150 
1151     return(errstr);
1152 }
1153 
1154 
1155 /*
1156  * gf_filter_puts - write the given string down the filter's pipe
1157  */
1158 char *
gf_filter_puts(register char * s)1159 gf_filter_puts(register char *s)
1160 {
1161     GF_INIT(gf_master, gf_master);
1162 
1163     /*
1164      * set up for any errors a filter may encounter
1165      */
1166     if(setjmp(gf_error_state)){
1167 	dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 		   gf_error_string ? gf_error_string : "NULL"));
1169 	return(gf_error_string);
1170     }
1171 
1172     while(*s)
1173       GF_PUTC(gf_master, (*s++) & 0xff);
1174 
1175     GF_END(gf_master, gf_master);
1176     return(NULL);
1177 }
1178 
1179 
1180 /*
1181  * gf_filter_eod - flush pending data filter's input queue and deliver
1182  *		   the GF_EOD marker.
1183  */
1184 void
gf_filter_eod(void)1185 gf_filter_eod(void)
1186 {
1187     GF_INIT(gf_master, gf_master);
1188     (void) GF_FLUSH(gf_master);
1189     (*gf_master->f)(gf_master, GF_EOD);
1190 }
1191 
1192 
1193 /*
1194  * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1195  *
1196  * Filters MUST use the specified interface (pointer to filter
1197  * structure, the unsigned character buffer in that struct, and a
1198  * cmd flag), and pass each resulting octet to the next filter in the
1199  * chain.  Only the terminal filter need not call another filter.
1200  * As a result, filters share a pretty general structure.
1201  * Typically three main conditionals separate initialization from
1202  * data from end-of-data command processing.
1203  *
1204  * Lastly, being character-at-a-time, they're a little more complex
1205  * to write than filters operating on buffers because some state
1206  * must typically be kept between characters.  However, for a
1207  * little bit of complexity here, much convenience is gained later
1208  * as they can be arbitrarily chained together at run time and
1209  * consume few resources (especially memory or disk) as they work.
1210  * (NOTE 951005: even less cpu now that data between filters is passed
1211  *  via a vector.)
1212  *
1213  * A few notes about implementing filters:
1214  *
1215  *  - A generic filter template looks like:
1216  *
1217  *    void
1218  *    gf_xxx_filter(f, flg)
1219  *        FILTER_S *f;
1220  *        int       flg;
1221  *    {
1222  *	  GF_INIT(f, f->next);		// def's var's to speed queue drain
1223  *
1224  *        if(flg == GF_DATA){
1225  *	      register unsigned char c;
1226  *
1227  *	      while(GF_GETC(f, c)){	// macro taking data off input queue
1228  *	          // operate on c and pass it on here
1229  *                GF_PUTC(f->next, c);	// macro writing output queue
1230  *	      }
1231  *
1232  *	      GF_END(f, f->next);	// macro to sync pointers/offsets
1233  *	      //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1234  *        }
1235  *        else if(flg == GF_EOD){
1236  *            // process any buffered data here and pass it on
1237  *	      GF_FLUSH(f->next);	// flush pending data to next filter
1238  *            (*f->next->f)(f->next, GF_EOD);
1239  *        }
1240  *        else if(flg == GF_RESET){
1241  *            // initialize any data in the struct here
1242  *        }
1243  *    }
1244  *
1245  *  - Any free storage allocated during initialization (typically tied
1246  *    to the "line" pointer in FILTER_S) is the filter's responsibility
1247  *    to clean up when the GF_EOD command comes through.
1248  *
1249  *  - Filter's must pass GF_EOD they receive on to the next
1250  *    filter in the chain so it has the opportunity to flush
1251  *    any buffered data.
1252  *
1253  *  - All filters expect NVT end-of-lines.  The idea is to prepend
1254  *    or append either the gf_local_nvtnl or gf_nvtnl_local
1255  *    os-dependant filters to the data on the appropriate end of the
1256  *    pipe for the task at hand.
1257  *
1258  *  - NOTE: As of 951004, filters no longer take their input as a single
1259  *    char argument, but rather get data to operate on via a vector
1260  *    representing the input queue in the FILTER_S structure.
1261  *
1262  */
1263 
1264 
1265 
1266 /*
1267  * BASE64 TO BINARY encoding and decoding routines below
1268  */
1269 
1270 
1271 /*
1272  * BINARY to BASE64 filter (encoding described in rfc1341)
1273  */
1274 void
gf_binary_b64(FILTER_S * f,int flg)1275 gf_binary_b64(FILTER_S *f, int flg)
1276 {
1277     static char *v =
1278             "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279     GF_INIT(f, f->next);
1280 
1281     if(flg == GF_DATA){
1282 	register unsigned char c;
1283 	register unsigned char t = f->t;
1284 	register long n = f->n;
1285 
1286 	while(GF_GETC(f, c)){
1287 
1288 	    switch(n++){
1289 	      case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 	      case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1291 	      case 42: case 45:
1292 		GF_PUTC(f->next, v[c >> 2]);
1293 					/* byte 1: high 6 bits (1) */
1294 		t = c << 4;		/* remember high 2 bits for next */
1295 		break;
1296 
1297 	      case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 	      case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1299 	      case 43:
1300 		GF_PUTC(f->next, v[(t|(c>>4)) & 0x3f]);
1301 		t = c << 2;
1302 		break;
1303 
1304 	      case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 	      case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1306 	      case 44:
1307 		GF_PUTC(f->next, v[(t|(c >> 6)) & 0x3f]);
1308 		GF_PUTC(f->next, v[c & 0x3f]);
1309 		break;
1310 	    }
1311 
1312 	    if(n == 45){			/* start a new line? */
1313 		GF_PUTC(f->next, '\015');
1314 		GF_PUTC(f->next, '\012');
1315 		n = 0L;
1316 	    }
1317 	}
1318 
1319 	f->n = n;
1320 	f->t = t;
1321 	GF_END(f, f->next);
1322     }
1323     else if(flg == GF_EOD){		/* no more data */
1324 	switch (f->n % 3) {		/* handle trailing bytes */
1325 	  case 0:			/* no trailing bytes */
1326 	    break;
1327 
1328 	  case 1:
1329 	    GF_PUTC(f->next, v[(f->t) & 0x3f]);
1330 	    GF_PUTC(f->next, '=');	/* byte 3 */
1331 	    GF_PUTC(f->next, '=');	/* byte 4 */
1332 	    break;
1333 
1334 	  case 2:
1335 	    GF_PUTC(f->next, v[(f->t) & 0x3f]);
1336 	    GF_PUTC(f->next, '=');	/* byte 4 */
1337 	    break;
1338 	}
1339 
1340 	/* end with CRLF */
1341 	if(f->n){
1342 	    GF_PUTC(f->next, '\015');
1343 	    GF_PUTC(f->next, '\012');
1344 	}
1345 
1346 	(void) GF_FLUSH(f->next);
1347 	(*f->next->f)(f->next, GF_EOD);
1348     }
1349     else if(flg == GF_RESET){
1350 	dprint((9, "-- gf_reset binary_b64\n"));
1351 	f->n = 0L;
1352     }
1353 }
1354 
1355 
1356 
1357 /*
1358  * BASE64 to BINARY filter (encoding described in rfc1341)
1359  */
1360 void
gf_b64_binary(FILTER_S * f,int flg)1361 gf_b64_binary(FILTER_S *f, int flg)
1362 {
1363     static char v[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 		       65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 		       65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 		       52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 		       65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 		       15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 		       65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 		       41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371     GF_INIT(f, f->next);
1372 
1373     if(flg == GF_DATA){
1374 	register unsigned char c;
1375 	register unsigned char t = f->t;
1376 	register int n = (int) f->n;
1377 	register int state = f->f1;
1378 
1379 	while(GF_GETC(f, c)){
1380 
1381 	    if(state){
1382 		state = 0;
1383 		if (c != '=') {
1384 		    gf_error("Illegal '=' in base64 text");
1385 		    /* NO RETURN */
1386 		}
1387 	    }
1388 
1389 	    /* in range, and a valid value? */
1390 	    if((c & ~0x7f) || (c = v[c]) > 63){
1391 		if(c == 64){
1392 		    switch (n++) {	/* check quantum position */
1393 		      case 2:
1394 			state++;	/* expect an equal as next char */
1395 			break;
1396 
1397 		      case 3:
1398 			n = 0L;		/* restart quantum */
1399 			break;
1400 
1401 		      default:		/* impossible quantum position */
1402 			gf_error("Internal base64 decoder error");
1403 			/* NO RETURN */
1404 		    }
1405 		}
1406 	    }
1407 	    else{
1408 		switch (n++) {		/* install based on quantum position */
1409 		  case 0:		/* byte 1: high 6 bits */
1410 		    t = c << 2;
1411 		    break;
1412 
1413 		  case 1:		/* byte 1: low 2 bits */
1414 		    GF_PUTC(f->next, (t|(c >> 4)));
1415 		    t = c << 4;		/* byte 2: high 4 bits */
1416 		    break;
1417 
1418 		  case 2:		/* byte 2: low 4 bits */
1419 		    GF_PUTC(f->next, (t|(c >> 2)));
1420 		    t = c << 6;		/* byte 3: high 2 bits */
1421 		    break;
1422 
1423 		  case 3:
1424 		    GF_PUTC(f->next, t | c);
1425 		    n = 0L;		/* reinitialize mechanism */
1426 		    break;
1427 		}
1428 	    }
1429 	}
1430 
1431 	f->f1 = state;
1432 	f->t = t;
1433 	f->n = n;
1434 	GF_END(f, f->next);
1435     }
1436     else if(flg == GF_EOD){
1437 	(void) GF_FLUSH(f->next);
1438 	(*f->next->f)(f->next, GF_EOD);
1439     }
1440     else if(flg == GF_RESET){
1441 	dprint((9, "-- gf_reset b64_binary\n"));
1442 	f->n  = 0L;			/* quantum position */
1443 	f->f1 = 0;			/* state holder: equal seen? */
1444     }
1445 }
1446 
1447 
1448 
1449 
1450 /*
1451  * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452  * encoding described in rfc1341
1453  */
1454 
1455 #define	GF_MAXLINE	80		/* good buffer size */
1456 
1457 /*
1458  * default action for QUOTED-PRINTABLE to 8BIT decoder
1459  */
1460 #define	GF_QP_DEFAULT(f, c)	{ \
1461 				    if((c) == ' '){ \
1462 					state = WSPACE; \
1463 						/* reset white space! */ \
1464 					(f)->linep = (f)->line; \
1465 					*((f)->linep)++ = ' '; \
1466 				    } \
1467 				    else if((c) == '='){ \
1468 					state = EQUAL; \
1469 				    } \
1470 				    else \
1471 				      GF_PUTC((f)->next, (c)); \
1472 				}
1473 
1474 
1475 /*
1476  * QUOTED-PRINTABLE to 8BIT filter
1477  */
1478 void
gf_qp_8bit(FILTER_S * f,int flg)1479 gf_qp_8bit(FILTER_S *f, int flg)
1480 {
1481 
1482     GF_INIT(f, f->next);
1483 
1484     if(flg == GF_DATA){
1485 	register unsigned char c;
1486 	register int state = f->f1;
1487 
1488 	while(GF_GETC(f, c)){
1489 
1490 	    switch(state){
1491 	      case DFL :		/* default case */
1492 	      default:
1493 		GF_QP_DEFAULT(f, c);
1494 		break;
1495 
1496 	      case CCR    :		/* non-significant space */
1497 		state = DFL;
1498 		if(c == '\012')
1499 		  continue;		/* go on to next char */
1500 
1501 		GF_QP_DEFAULT(f, c);
1502 		break;
1503 
1504 	      case EQUAL  :
1505 		if(c == '\015'){	/* "=\015" is a soft EOL */
1506 		    state = CCR;
1507 		    break;
1508 		}
1509 
1510 		if(c == '='){		/* compatibility clause for old guys */
1511 		    GF_PUTC(f->next, '=');
1512 		    state = DFL;
1513 		    break;
1514 		}
1515 
1516 		if(!isxdigit((unsigned char)c)){	/* must be hex! */
1517 		    /*
1518 		     * First character after '=' not a hex digit.
1519 		     * This ain't right, but we're going to treat it as
1520 		     * plain old text instead of an '=' followed by hex.
1521 		     * In other words, they forgot to encode the '='.
1522 		     * Before 4.60 we just bailed with an error here, but now
1523 		     * we keep going as long as we are just displaying
1524 		     * the result (and not saving it or something).
1525 		     *
1526 		     * Wait! The users don't like that. They want to be able
1527 		     * to use it even if it might be wrong. So just plow
1528 		     * ahead even if displaying.
1529 		     *
1530 		     * Better have this be a constant string so that if we
1531 		     * get multiple instances of it in a single message we
1532 		     * can avoid the too many error messages problem. It
1533 		     * better be the same message as the one a few lines
1534 		     * below, as well.
1535 		     *
1536 		     * Turn off decoding after encountering such an error and
1537 		     * just dump the rest of the text as is.
1538 		     */
1539 		    state = STOP_DECODING;
1540 		    GF_PUTC(f->next, '=');
1541 		    GF_PUTC(f->next, c);
1542 		    q_status_message(SM_ORDER,3,3,
1543 			_("Warning: Non-hexadecimal character in QP encoding!"));
1544 
1545 		    dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c, c));
1546 		    break;
1547 		}
1548 
1549 		if (isdigit ((unsigned char)c))
1550 		  f->t = c - '0';
1551 		else
1552 		  f->t = c - (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1553 
1554 		f->f2 = c;	/* store character in case we have to
1555 				   back out in !isxdigit below */
1556 
1557 		state = HEX;
1558 		break;
1559 
1560 	      case HEX :
1561 		state = DFL;
1562 		if(!isxdigit((unsigned char)c)){	/* must be hex! */
1563 		    state = STOP_DECODING;
1564 		    GF_PUTC(f->next, '=');
1565 		    GF_PUTC(f->next, f->f2);
1566 		    GF_PUTC(f->next, c);
1567 		    q_status_message(SM_ORDER,3,3,
1568 			_("Warning: Non-hexadecimal character in QP encoding!"));
1569 
1570 		    dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c, c, f->f2));
1571 		    break;
1572 		}
1573 
1574 		if (isdigit((unsigned char)c))
1575 		  c -= '0';
1576 		else
1577 		  c -= (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1578 
1579 		GF_PUTC(f->next, c + (f->t << 4));
1580 		break;
1581 
1582 	      case WSPACE :
1583 		if(c == ' '){		/* toss it in with other spaces */
1584 		    if(f->linep - f->line < GF_MAXLINE)
1585 		      *(f->linep)++ = ' ';
1586 		    break;
1587 		}
1588 
1589 		state = DFL;
1590 		if(c == '\015'){	/* not our white space! */
1591 		    f->linep = f->line;	/* reset buffer */
1592 		    GF_PUTC(f->next, '\015');
1593 		    break;
1594 		}
1595 
1596 		/* the spaces are ours, write 'em */
1597 		f->n = f->linep - f->line;
1598 		while((f->n)--)
1599 		  GF_PUTC(f->next, ' ');
1600 
1601 		GF_QP_DEFAULT(f, c);	/* take care of 'c' in default way */
1602 		break;
1603 
1604 	      case STOP_DECODING :
1605 		GF_PUTC(f->next, c);
1606 		break;
1607 	    }
1608 	}
1609 
1610 	f->f1 = state;
1611 	GF_END(f, f->next);
1612     }
1613     else if(flg == GF_EOD){
1614 	fs_give((void **)&(f->line));
1615 	(void) GF_FLUSH(f->next);
1616 	(*f->next->f)(f->next, GF_EOD);
1617     }
1618     else if(flg == GF_RESET){
1619 	dprint((9, "-- gf_reset qp_8bit\n"));
1620 	f->f1 = DFL;
1621 	f->linep = f->line = (char *)fs_get(GF_MAXLINE * sizeof(char));
1622     }
1623 }
1624 
1625 
1626 
1627 /*
1628  * USEFUL MACROS TO HELP WITH QP ENCODING
1629  */
1630 
1631 #define	QP_MAXL	75			/* 76th place only for continuation */
1632 
1633 /*
1634  * Macro to test and wrap long quoted printable lines
1635  */
1636 #define	GF_8BIT_WRAP(f)		{ \
1637 				    GF_PUTC((f)->next, '='); \
1638 				    GF_PUTC((f)->next, '\015'); \
1639 				    GF_PUTC((f)->next, '\012'); \
1640 				}
1641 
1642 /*
1643  * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644  * line break if needed.
1645  */
1646 #define	GF_8BIT_PUT_QUOTE(f, c)	{ \
1647 				    if(((f)->n += 3) > QP_MAXL){ \
1648 					GF_8BIT_WRAP(f); \
1649 					(f)->n = 3;	/* set line count */ \
1650 				    } \
1651 				    GF_PUTC((f)->next, '='); \
1652 				     GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 				     GF_PUTC((f)->next, HEX_CHAR2(c)); \
1654 				 }
1655 
1656 /*
1657  * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1658  * if needed.
1659  */
1660 #define	GF_8BIT_PUT(f, c)	{ \
1661 				      if((++(f->n)) > QP_MAXL){ \
1662 					  GF_8BIT_WRAP(f); \
1663 					  f->n = 1L; \
1664 				      } \
1665 				      if(f->n == 1L && c == '.'){ \
1666 					  GF_8BIT_PUT_QUOTE(f, c); \
1667 					  f->n = 3; \
1668 				      } \
1669 				      else \
1670 					GF_PUTC(f->next, c); \
1671 				 }
1672 
1673 
1674 /*
1675  * default action for 8bit to quoted printable encoder
1676  */
1677 #define	GF_8BIT_DEFAULT(f, c)	if((c) == ' '){ \
1678 				     state = WSPACE; \
1679 				 } \
1680 				 else if(c == '\015'){ \
1681 				     state = CCR; \
1682 				 } \
1683 				 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 					 || (c & 0x80) || (c == '=')){ \
1685 				     GF_8BIT_PUT_QUOTE(f, c); \
1686 				 } \
1687 				 else{ \
1688 				   GF_8BIT_PUT(f, c); \
1689 				 }
1690 
1691 
1692 /*
1693  * 8BIT to QUOTED-PRINTABLE filter
1694  */
1695 void
gf_8bit_qp(FILTER_S * f,int flg)1696 gf_8bit_qp(FILTER_S *f, int flg)
1697 {
1698     short dummy_dots = 0, dummy_dmap = 1;
1699     GF_INIT(f, f->next);
1700 
1701     if(flg == GF_DATA){
1702 	 register unsigned char c;
1703 	 register int state = f->f1;
1704 
1705 	 while(GF_GETC(f, c)){
1706 
1707 	     /* keep track of "^JFrom " */
1708 	     Find_Froms(f->t, dummy_dots, f->f2, dummy_dmap, c);
1709 
1710 	     switch(state){
1711 	       case DFL :		/* handle ordinary case */
1712 		 GF_8BIT_DEFAULT(f, c);
1713 		 break;
1714 
1715 	       case CCR :		/* true line break? */
1716 		 state = DFL;
1717 		 if(c == '\012'){
1718 		     GF_PUTC(f->next, '\015');
1719 		     GF_PUTC(f->next, '\012');
1720 		     f->n = 0L;
1721 		 }
1722 		 else{			/* nope, quote the CR */
1723 		     GF_8BIT_PUT_QUOTE(f, '\015');
1724 		     GF_8BIT_DEFAULT(f, c); /* and don't forget about c! */
1725 		 }
1726 		 break;
1727 
1728 	       case WSPACE:
1729 		 state = DFL;
1730 		 if(c == '\015' || f->t){ /* handle the space */
1731 		     GF_8BIT_PUT_QUOTE(f, ' ');
1732 		     f->t = 0;		/* reset From flag */
1733 		 }
1734 		 else
1735 		   GF_8BIT_PUT(f, ' ');
1736 
1737 		 GF_8BIT_DEFAULT(f, c);	/* handle 'c' in the default way */
1738 		 break;
1739 	     }
1740 	 }
1741 
1742 	 f->f1 = state;
1743 	 GF_END(f, f->next);
1744     }
1745     else if(flg == GF_EOD){
1746 	 switch(f->f1){
1747 	   case CCR :
1748 	     GF_8BIT_PUT_QUOTE(f, '\015'); /* write the last cr */
1749 	     break;
1750 
1751 	   case WSPACE :
1752 	     GF_8BIT_PUT_QUOTE(f, ' ');	/* write the last space */
1753 	     break;
1754 	 }
1755 
1756 	 (void) GF_FLUSH(f->next);
1757 	 (*f->next->f)(f->next, GF_EOD);
1758     }
1759     else if(flg == GF_RESET){
1760 	 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 	 f->f1 = DFL;			/* state from last character        */
1762 	 f->f2 = 1;			/* state of "^NFrom " bitmap        */
1763 	 f->t  = 0;
1764 	 f->n  = 0L;			/* number of chars in current line  */
1765     }
1766 }
1767 
1768 /*
1769  * This filter converts characters in one character set (the character
1770  * set of a message, for example) to another (the user's character set).
1771  */
1772 void
gf_convert_8bit_charset(FILTER_S * f,int flg)1773 gf_convert_8bit_charset(FILTER_S *f, int flg)
1774 {
1775     static unsigned char *conv_table = NULL;
1776     GF_INIT(f, f->next);
1777 
1778     if(flg == GF_DATA){
1779 	 register unsigned char c;
1780 
1781 	 while(GF_GETC(f, c)){
1782 	   GF_PUTC(f->next, conv_table ? conv_table[c] : c);
1783 	 }
1784 
1785 	 GF_END(f, f->next);
1786     }
1787     else if(flg == GF_EOD){
1788 	 (void) GF_FLUSH(f->next);
1789 	 (*f->next->f)(f->next, GF_EOD);
1790     }
1791     else if(flg == GF_RESET){
1792 	 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 	 conv_table = (f->opt) ? (unsigned char *) (f->opt) : NULL;
1794 
1795     }
1796 }
1797 
1798 
1799 typedef	struct _utf8c_s {
1800     void *conv_table;
1801     int   report_err;
1802 } UTF8C_S;
1803 
1804 
1805 /*
1806  * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807  * Characters missing from the destination set, and invalid UTF-8 sequences,
1808  * will be converted to "?".
1809  */
1810 void
gf_convert_utf8_charset(FILTER_S * f,int flg)1811 gf_convert_utf8_charset(FILTER_S *f, int flg)
1812 {
1813     static unsigned short *conv_table = NULL;
1814     static int report_err = 0;
1815     register int more = f->f2;
1816     register long u = f->n;
1817 
1818     /*
1819      * "more" is the number of subsequent octets needed to complete a character,
1820      * it is stored in f->f2.
1821      * "u" is the accumulated Unicode character, it is stored in f->n
1822      */
1823 
1824     GF_INIT(f, f->next);
1825 
1826     if(flg == GF_DATA){
1827 	register unsigned char c;
1828 
1829 	while(GF_GETC(f, c)){
1830 	    if(!conv_table){	/* can't do much if no conversion table */
1831 		GF_PUTC(f->next, c);
1832 	    }
1833 				/* UTF-8 continuation? */
1834 	    else if((c > 0x7f) && (c < 0xc0)){
1835 		if(more){
1836 		    u <<= 6;	/* shift current value by 6 bits */
1837 		    u |= c & 0x3f;
1838 		    if (!--more){ /* last octet? */
1839 			if(u >= 0xffff || (u = conv_table[u]) == NOCHAR){
1840 			    /*
1841 			     * non-BMP character or a UTF-8 character
1842 			     * which is not representable in the
1843 			     * charset we're converting to.
1844 			     */
1845 			    c = '?';
1846 			    if(report_err){
1847 				if(f->opt)
1848 				  fs_give((void **) &f->opt);
1849 
1850 				/* TRANSLATORS: error while translating from one
1851 				   character set to another, for example from UTF-8
1852 				   to ISO-2022-JP or something like that. */
1853 				gf_error(_("translation error"));
1854 				/* NO RETURN */
1855 			    }
1856 			}
1857 			else{
1858 			    if(u > 0xff){
1859 				c = (unsigned char) (u >> 8);
1860 				GF_PUTC(f->next, c);
1861 			    }
1862 
1863 			    c = (unsigned char) u & 0xff;
1864 			}
1865 
1866 			GF_PUTC(f->next, c);
1867 		    }
1868 		}
1869 		else{		/* continuation when not in progress */
1870 		    GF_PUTC(f->next, '?');
1871 		}
1872 	    }
1873 	    else{
1874 	        if(more){	/* incomplete UTF-8 character */
1875 		    GF_PUTC(f->next, '?');
1876 		    more = 0;
1877 		}
1878 		if(c < 0x80){ /* U+0000 - U+007f */
1879 		    GF_PUTC(f->next, c);
1880 		}
1881 		else if(c < 0xe0){ /* U+0080 - U+07ff */
1882 		    u = c & 0x1f; /* first 5 bits of 12 */
1883 		    more = 1;
1884 		}
1885 		else if(c < 0xf0){ /* U+1000 - U+ffff */
1886 		    u = c & 0x0f; /* first 4 bits of 16 */
1887 		    more = 2;
1888 		}
1889 				/* in case we ever support non-BMP Unicode */
1890 		else if (c < 0xf8){ /* U+10000 - U+10ffff */
1891 		    u = c & 0x07; /* first 3 bits of 20.5 */
1892 		    more = 3;
1893 		}
1894 #if 0	/* ISO 10646 not in Unicode */
1895 		else if (c < 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 		    u = c & 0x03; /* first 2 bits of 26 */
1897 		    more = 4;
1898 		}
1899 		else if (c < 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 		    u = c & 0x03; /* first 2 bits of 26 */
1901 		    more = 5;
1902 		}
1903 #endif
1904 		else{		/* not in Unicode */
1905 		    GF_PUTC(f->next, '?');
1906 		}
1907 	    }
1908 	}
1909 
1910 	f->f2 = more;
1911 	f->n = u;
1912 	GF_END(f, f->next);
1913     }
1914     else if(flg == GF_EOD){
1915 	(void) GF_FLUSH(f->next);
1916 	if(f->opt)
1917 	  fs_give((void **) &f->opt);
1918 	(*f->next->f)(f->next, GF_EOD);
1919     }
1920     else if(flg == GF_RESET){
1921 	dprint((9, "-- gf_reset convert_utf8_charset\n"));
1922 	conv_table = ((UTF8C_S *) f->opt)->conv_table;
1923 	report_err = ((UTF8C_S *) f->opt)->report_err;
1924 	f->f2 = 0;
1925 	f->n = 0L;
1926     }
1927 }
1928 
1929 
1930 void *
gf_convert_utf8_charset_opt(void * table,int report_err)1931 gf_convert_utf8_charset_opt(void *table, int report_err)
1932 {
1933     UTF8C_S *utf8c;
1934 
1935     utf8c = (UTF8C_S *) fs_get(sizeof(UTF8C_S));
1936     utf8c->conv_table = table;
1937     utf8c->report_err = report_err;
1938     return((void *) utf8c);
1939 }
1940 
1941 
1942 /*
1943  * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1944  *
1945  * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1946  * or to Shift-JIS (if PC-Pine).
1947  */
1948 void
gf_2022_jp_to_euc(FILTER_S * f,int flg)1949 gf_2022_jp_to_euc(FILTER_S *f, int flg)
1950 {
1951     register unsigned char c;
1952     register int state = f->f1;
1953 
1954     /*
1955      * f->t lit means we're in middle of decoding a sequence of characters.
1956      * f->f2 keeps track of first character of pair for Shift-JIS.
1957      * f->f1 is the state.
1958      */
1959 
1960     GF_INIT(f, f->next);
1961 
1962     if(flg == GF_DATA){
1963 	while(GF_GETC(f, c)){
1964 	    switch(state){
1965 	      case ESC:				/* saw ESC */
1966 	        if(!f->t && c == '$')
1967 		  state = ESCDOL;
1968 	        else if(f->t && c == '(')
1969 		  state = ESCPAR;
1970 		else{
1971 		    GF_PUTC(f->next, '\033');
1972 		    GF_PUTC(f->next, c);
1973 		    state = DFL;
1974 		}
1975 
1976 	        break;
1977 
1978 	      case ESCDOL:			/* saw ESC $ */
1979 	        if(c == 'B' || c == '@'){
1980 		    state = EUC;
1981 		    f->t = 1;			/* filtering into euc */
1982 		    f->f2 = -1;			/* first character of pair */
1983 		}
1984 		else{
1985 		    GF_PUTC(f->next, '\033');
1986 		    GF_PUTC(f->next, '$');
1987 		    GF_PUTC(f->next, c);
1988 		    state = DFL;
1989 		}
1990 
1991 	        break;
1992 
1993 	      case ESCPAR:			/* saw ESC ( */
1994 	        if(c == 'B' || c == 'J' || c == 'H'){
1995 		    state = DFL;
1996 		    f->t = 0;			/* done filtering */
1997 		}
1998 		else{
1999 		    GF_PUTC(f->next, '\033');	/* Don't set hibit for     */
2000 		    GF_PUTC(f->next, '(');	/* escape sequences, which */
2001 		    GF_PUTC(f->next, c);	/* this appears to be.     */
2002 		}
2003 
2004 	        break;
2005 
2006 	      case EUC:				/* filtering into euc */
2007 		if(c == '\033')
2008 		  state = ESC;
2009 		else{
2010 #ifdef _WINDOWS					/* Shift-JIS */
2011 		    c &= 0x7f;			/* 8-bit can't win */
2012 		    if (f->f2 >= 0){		/* second of a pair? */
2013 			int rowOffset = (f->f2 < 95) ? 112 : 176;
2014 			int cellOffset = (f->f2 % 2) ? ((c > 95) ? 32 : 31)
2015 						     : 126;
2016 
2017 			GF_PUTC(f->next, ((f->f2 + 1) >> 1) + rowOffset);
2018 			GF_PUTC(f->next, c + cellOffset);
2019 			f->f2 = -1;		/* restart */
2020 		    }
2021 		    else if(c > 0x20 && c < 0x7f)
2022 		      f->f2 = c;		/* first of pair */
2023 		    else{
2024 			GF_PUTC(f->next, c);	/* write CTL as itself */
2025 			f->f2 = -1;
2026 		    }
2027 #else						/* EUC */
2028 		    GF_PUTC(f->next, (c > 0x20 && c < 0x7f) ? c | 0x80 : c);
2029 #endif
2030 		}
2031 
2032 	        break;
2033 
2034 	      case DFL:
2035 	      default:
2036 		if(c == '\033')
2037 		  state = ESC;
2038 		else
2039 		  GF_PUTC(f->next, c);
2040 
2041 		break;
2042 	    }
2043 	}
2044 
2045 	f->f1 = state;
2046 	GF_END(f, f->next);
2047     }
2048     else if(flg == GF_EOD){
2049 	switch(state){
2050 	  case ESC:
2051 	    GF_PUTC(f->next, '\033');
2052 	    break;
2053 
2054 	  case ESCDOL:
2055 	    GF_PUTC(f->next, '\033');
2056 	    GF_PUTC(f->next, '$');
2057 	    break;
2058 
2059 	  case ESCPAR:
2060 	    GF_PUTC(f->next, '\033');	/* Don't set hibit for     */
2061 	    GF_PUTC(f->next, '(');	/* escape sequences.       */
2062 	    break;
2063 	}
2064 
2065 	(void) GF_FLUSH(f->next);
2066 	(*f->next->f)(f->next, GF_EOD);
2067     }
2068     else if(flg == GF_RESET){
2069 	dprint((9, "-- gf_reset jp_to_euc\n"));
2070 	f->f1 = DFL;		/* state */
2071 	f->t = 0;		/* not translating to euc */
2072     }
2073 }
2074 
2075 
2076 /*
2077  * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2078  */
2079 void
gf_native8bitjapanese_to_2022_jp(FILTER_S * f,int flg)2080 gf_native8bitjapanese_to_2022_jp(FILTER_S *f, int flg)
2081 {
2082 #ifdef _WINDOWS
2083     gf_sjis_to_2022_jp(f, flg);
2084 #else
2085     gf_euc_to_2022_jp(f, flg);
2086 #endif
2087 }
2088 
2089 
2090 void
gf_euc_to_2022_jp(FILTER_S * f,int flg)2091 gf_euc_to_2022_jp(FILTER_S *f, int flg)
2092 {
2093     register unsigned char c;
2094 
2095     /*
2096      * f->t lit means we've sent the start esc seq but not the end seq.
2097      * f->f2 keeps track of first character of pair for Shift-JIS.
2098      */
2099 
2100     GF_INIT(f, f->next);
2101 
2102     if(flg == GF_DATA){
2103 	while(GF_GETC(f, c)){
2104 	    if(f->t){
2105 		if(c & 0x80){
2106 		    GF_PUTC(f->next, c & 0x7f);
2107 		}
2108 		else{
2109 		    GF_PUTC(f->next, '\033');
2110 		    GF_PUTC(f->next, '(');
2111 		    GF_PUTC(f->next, 'B');
2112 		    GF_PUTC(f->next, c);
2113 		    f->f2 = -1;
2114 		    f->t = 0;
2115 		}
2116 	    }
2117 	    else{
2118 		if(c & 0x80){
2119 		    GF_PUTC(f->next, '\033');
2120 		    GF_PUTC(f->next, '$');
2121 		    GF_PUTC(f->next, 'B');
2122 		    GF_PUTC(f->next, c & 0x7f);
2123 		    f->t = 1;
2124 		}
2125 		else{
2126 		    GF_PUTC(f->next, c);
2127 		}
2128 	    }
2129 	}
2130 
2131 	GF_END(f, f->next);
2132     }
2133     else if(flg == GF_EOD){
2134 	if(f->t){
2135 	    GF_PUTC(f->next, '\033');
2136 	    GF_PUTC(f->next, '(');
2137 	    GF_PUTC(f->next, 'B');
2138 	    f->t = 0;
2139 	    f->f2 = -1;
2140 	}
2141 
2142 	(void) GF_FLUSH(f->next);
2143 	(*f->next->f)(f->next, GF_EOD);
2144     }
2145     else if(flg == GF_RESET){
2146 	dprint((9, "-- gf_reset euc_to_jp\n"));
2147 	f->t = 0;
2148 	f->f2 = -1;
2149     }
2150 }
2151 
2152 void
gf_sjis_to_2022_jp(FILTER_S * f,int flg)2153 gf_sjis_to_2022_jp(FILTER_S *f, int flg)
2154 {
2155     register unsigned char c;
2156 
2157     /*
2158      * f->t lit means we've sent the start esc seq but not the end seq.
2159      * f->f2 keeps track of first character of pair for Shift-JIS.
2160      */
2161 
2162     GF_INIT(f, f->next);
2163 
2164     if(flg == GF_DATA){
2165 	while(GF_GETC(f, c)){
2166 	    if(f->t){
2167 		if(f->f2 >= 0){			/* second of a pair? */
2168 		    int adjust = c < 159;
2169 		    int rowOffset = f->f2 < 160 ? 112 : 176;
2170 		    int cellOffset = adjust ? (c > 127 ? 32 : 31) : 126;
2171 
2172 		    GF_PUTC(f->next, ((f->f2 - rowOffset) << 1) - adjust);
2173 		    GF_PUTC(f->next, c - cellOffset);
2174 		    f->f2 = -1;
2175 		}
2176 		else if(c & 0x80){
2177 		    f->f2 = c;			/* remember first of pair */
2178 		}
2179 		else{
2180 		    GF_PUTC(f->next, '\033');
2181 		    GF_PUTC(f->next, '(');
2182 		    GF_PUTC(f->next, 'B');
2183 		    GF_PUTC(f->next, c);
2184 		    f->f2 = -1;
2185 		    f->t = 0;
2186 		}
2187 	    }
2188 	    else{
2189 		if(c & 0x80){
2190 		    GF_PUTC(f->next, '\033');
2191 		    GF_PUTC(f->next, '$');
2192 		    GF_PUTC(f->next, 'B');
2193 		    f->f2 = c;
2194 		    f->t = 1;
2195 		}
2196 		else{
2197 		    GF_PUTC(f->next, c);
2198 		}
2199 	    }
2200 	}
2201 
2202 	GF_END(f, f->next);
2203     }
2204     else if(flg == GF_EOD){
2205 	if(f->t){
2206 	    GF_PUTC(f->next, '\033');
2207 	    GF_PUTC(f->next, '(');
2208 	    GF_PUTC(f->next, 'B');
2209 	    f->t = 0;
2210 	    f->f2 = -1;
2211 	}
2212 
2213 	(void) GF_FLUSH(f->next);
2214 	(*f->next->f)(f->next, GF_EOD);
2215     }
2216     else if(flg == GF_RESET){
2217 	dprint((9, "-- gf_reset sjis_to_jp\n"));
2218 	f->t = 0;
2219 	f->f2 = -1;
2220     }
2221 }
2222 
2223 
2224 
2225 /*
2226  * Various charset to UTF-8 Translation filter
2227  */
2228 
2229 /*
2230  * utf8 conversion options
2231  */
2232 typedef	struct _utf8_s {
2233     CHARSET	  *charset;
2234     unsigned long  ucsc;
2235 } UTF8_S;
2236 
2237 #define	UTF8_BLOCK	1024
2238 #define	UTF8_EOB(f)	((f)->line + (f)->f2 - 1)
2239 #define	UTF8_ADD(f, c) \
2240 			{ \
2241 			    if(p >= eobuf){ \
2242 				f->f2 += UTF8_BLOCK; \
2243 				fs_resize((void **)&f->line, \
2244 				      (size_t) f->f2 * sizeof(char)); \
2245 				eobuf = UTF8_EOB(f); \
2246 				p = eobuf - UTF8_BLOCK; \
2247 			    } \
2248 			    *p++ = c; \
2249 			}
2250 #define	GF_UTF8_FLUSH(f)	{ \
2251 				    register long n; \
2252 				    SIZEDTEXT     intext, outtext; \
2253 				    intext.data = (unsigned char *) f->line; \
2254 				    intext.size = p - f->line; \
2255 				    memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2256 				    if(!((UTF8_S *) f->opt)->charset){ \
2257 					for(n = 0; n < intext.size; n++) \
2258 					  GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2259 				    } \
2260 				    else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2261 					for(n = 0; n < outtext.size; n++) \
2262 					  GF_PUTC(f->next, outtext.data[n]); \
2263 					if(outtext.data && intext.data != outtext.data) \
2264 					  fs_give((void **) &outtext.data); \
2265 				    } \
2266 				    else{ \
2267 					for(n = 0; n < intext.size; n++) \
2268 					  GF_PUTC(f->next, '?'); \
2269 				    } \
2270 				}
2271 
2272 
2273 /*
2274  * gf_utf8 - text in specified charset to to UTF-8 filter
2275  *           Process line-at-a-time rather than character
2276  *           because ISO-2022-JP.  Call utf8_text_cs by hand
2277  *           rather than utf8_text to reduce the cost of
2278  *           utf8_charset() for each line.
2279  */
2280 void
gf_utf8(FILTER_S * f,int flg)2281 gf_utf8(FILTER_S *f, int flg)
2282 {
2283     register char *p = f->linep;
2284     register char *eobuf = UTF8_EOB(f);
2285     GF_INIT(f, f->next);
2286 
2287     if(flg == GF_DATA){
2288 	register int state = f->f1;
2289 	register unsigned char c;
2290 
2291 	while(GF_GETC(f, c)){
2292 
2293 	    switch(state){
2294 	      case CCR :
2295 		state = DFL;
2296 		if(c == '\012'){
2297 		    GF_UTF8_FLUSH(f);
2298 		    p = f->line;
2299 		    GF_PUTC(f->next, '\015');
2300 		    GF_PUTC(f->next, '\012');
2301 		}
2302 		else{
2303 		    UTF8_ADD(f, '\015');
2304 		    UTF8_ADD(f, c);
2305 		}
2306 
2307 		break;
2308 
2309 	      default :
2310 		if(c == '\015'){
2311 		    state = CCR;
2312 		}
2313 		else
2314 		  UTF8_ADD(f, c);
2315 	    }
2316 	}
2317 
2318 	f->f1 = state;
2319 	GF_END(f, f->next);
2320     }
2321     else if(flg == GF_EOD){
2322 
2323 	if(p != f->line)
2324 	  GF_UTF8_FLUSH(f);
2325 
2326 	fs_give((void **) &f->line);
2327 	fs_give((void **) &f->opt);
2328 	(void) GF_FLUSH(f->next);
2329 	(*f->next->f)(f->next, GF_EOD);
2330     }
2331     else if(GF_RESET){
2332 	dprint((9, "-- gf_reset utf8\n"));
2333 	f->f1 = DFL;
2334 	f->f2 = UTF8_BLOCK;		/* input buffer length */
2335 	f->line = p = (char *) fs_get(f->f2 * sizeof(char));
2336     }
2337 
2338     f->linep = p;
2339 }
2340 
2341 
2342 void *
gf_utf8_opt(char * charset)2343 gf_utf8_opt(char *charset)
2344 {
2345     UTF8_S *utf8;
2346 
2347     utf8 = (UTF8_S *) fs_get(sizeof(UTF8_S));
2348 
2349     utf8->charset = (CHARSET *) utf8_charset(charset);
2350 
2351     /*
2352      * When we get 8-bit non-ascii characters but it is supposed to
2353      * be ascii we want it to turn into question marks, not
2354      * just behave as if it is UTF-8 which is what happens
2355      * with ascii because there is no translation table.
2356      * So we need to catch the ascii special case here.
2357      */
2358     if(utf8->charset && utf8->charset->type == CT_ASCII)
2359       utf8->charset = NULL;
2360 
2361     return((void *) utf8);
2362 }
2363 
2364 
2365 /*
2366  * RICHTEXT-TO-PLAINTEXT filter
2367  */
2368 
2369 /*
2370  * option to be used by rich2plain (NOTE: if this filter is ever
2371  * used more than once in a pipe, all instances will have the same
2372  * option value)
2373  */
2374 
2375 
2376 /*----------------------------------------------------------------------
2377       richtext to plaintext filter
2378 
2379  Args: f --
2380 	flg  --
2381 
2382   This basically removes all richtext formatting. A cute hack is used
2383   to get bold and underlining to work.
2384   Further work could be done to handle things like centering and right
2385   and left flush, but then it could no longer be done in place. This
2386   operates on text *with* CRLF's.
2387 
2388   WARNING: does not wrap lines!
2389  ----*/
2390 void
gf_rich2plain(FILTER_S * f,int flg)2391 gf_rich2plain(FILTER_S *f, int flg)
2392 {
2393     static int rich_bold_on = 0, rich_uline_on = 0;
2394 
2395 /* BUG: quote incoming \255 values */
2396     GF_INIT(f, f->next);
2397 
2398     if(flg == GF_DATA){
2399 	 register unsigned char c;
2400 	 register int state = f->f1;
2401 	 register int plain;
2402 
2403 	 plain = f->opt ? (*(int *) f->opt) : 0;
2404 
2405 	 while(GF_GETC(f, c)){
2406 
2407 	     switch(state){
2408 	       case TOKEN :		/* collect a richtext token */
2409 		 if(c == '>'){		/* what should we do with it? */
2410 		     state       = DFL;	/* return to default next time */
2411 		     *(f->linep) = '\0';	/* cap off token */
2412 		     if(f->line[0] == 'l' && f->line[1] == 't'){
2413 			 GF_PUTC(f->next, '<'); /* literal '<' */
2414 		     }
2415 		     else if(f->line[0] == 'n' && f->line[1] == 'l'){
2416 			 GF_PUTC(f->next, '\015');/* newline! */
2417 			 GF_PUTC(f->next, '\012');
2418 		     }
2419 		     else if(!strcmp("comment", f->line)){
2420 			 (f->f2)++;
2421 		     }
2422 		     else if(!strcmp("/comment", f->line)){
2423 			 f->f2 = 0;
2424 		     }
2425 		     else if(!strcmp("/paragraph", f->line)) {
2426 			 GF_PUTC(f->next, '\r');
2427 			 GF_PUTC(f->next, '\n');
2428 			 GF_PUTC(f->next, '\r');
2429 			 GF_PUTC(f->next, '\n');
2430 		     }
2431 		     else if(!plain /* gf_rich_plain */){
2432 			 if(!strcmp(f->line, "bold")) {
2433 			     GF_PUTC(f->next, TAG_EMBED);
2434 			     GF_PUTC(f->next, TAG_BOLDON);
2435 			     rich_bold_on = 1;
2436 			 } else if(!strcmp(f->line, "/bold")) {
2437 			     GF_PUTC(f->next, TAG_EMBED);
2438 			     GF_PUTC(f->next, TAG_BOLDOFF);
2439 			     rich_bold_on = 0;
2440 			 } else if(!strcmp(f->line, "italic")) {
2441 			     GF_PUTC(f->next, TAG_EMBED);
2442 			     GF_PUTC(f->next, TAG_ULINEON);
2443 			     rich_uline_on = 1;
2444 			 } else if(!strcmp(f->line, "/italic")) {
2445 			     GF_PUTC(f->next, TAG_EMBED);
2446 			     GF_PUTC(f->next, TAG_ULINEOFF);
2447 			     rich_uline_on = 0;
2448 			 } else if(!strcmp(f->line, "underline")) {
2449 			     GF_PUTC(f->next, TAG_EMBED);
2450 			     GF_PUTC(f->next, TAG_ULINEON);
2451 			     rich_uline_on = 1;
2452 			 } else if(!strcmp(f->line, "/underline")) {
2453 			     GF_PUTC(f->next, TAG_EMBED);
2454 			     GF_PUTC(f->next, TAG_ULINEOFF);
2455 			     rich_uline_on = 0;
2456 			 }
2457 		     }
2458 		     /* else we just ignore the token! */
2459 
2460 		     f->linep = f->line;	/* reset token buffer */
2461 		 }
2462 		 else{			/* add char to token */
2463 		     if(f->linep - f->line > 40){
2464 			 /* What? rfc1341 says 40 char tokens MAX! */
2465 			 fs_give((void **)&(f->line));
2466 			 gf_error("Richtext token over 40 characters");
2467 			 /* NO RETURN */
2468 		     }
2469 
2470 		     *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2471 		 }
2472 		 break;
2473 
2474 	       case CCR   :
2475 		 state = DFL;		/* back to default next time */
2476 		 if(c == '\012'){	/* treat as single space?    */
2477 		     GF_PUTC(f->next, ' ');
2478 		     break;
2479 		 }
2480 		 /* fall thru to process c */
2481 
2482 	       case DFL   :
2483 	       default:
2484 		 if(c == '<')
2485 		   state = TOKEN;
2486 		 else if(c == '\015')
2487 		   state = CCR;
2488 		 else if(!f->f2)		/* not in comment! */
2489 		   GF_PUTC(f->next, c);
2490 
2491 		 break;
2492 	     }
2493 	 }
2494 
2495 	 f->f1 = state;
2496 	 GF_END(f, f->next);
2497     }
2498     else if(flg == GF_EOD){
2499 	 if((f->f1 = (f->linep != f->line)) != 0){
2500 	     /* incomplete token!! */
2501 	     gf_error("Incomplete token in richtext");
2502 	     /* NO RETURN */
2503 	 }
2504 
2505 	 if(rich_uline_on){
2506 	     GF_PUTC(f->next, TAG_EMBED);
2507 	     GF_PUTC(f->next, TAG_ULINEOFF);
2508 	     rich_uline_on = 0;
2509 	 }
2510 	 if(rich_bold_on){
2511 	     GF_PUTC(f->next, TAG_EMBED);
2512 	     GF_PUTC(f->next, TAG_BOLDOFF);
2513 	     rich_bold_on = 0;
2514 	 }
2515 
2516 	 fs_give((void **)&(f->line));
2517 	 (void) GF_FLUSH(f->next);
2518 	 (*f->next->f)(f->next, GF_EOD);
2519     }
2520     else if(flg == GF_RESET){
2521 	 dprint((9, "-- gf_reset rich2plain\n"));
2522 	 f->f1 = DFL;			/* state */
2523 	 f->f2 = 0;			/* set means we're in a comment */
2524 	 f->linep = f->line = (char *)fs_get(45 * sizeof(char));
2525     }
2526 }
2527 
2528 
2529 /*
2530  * function called from the outside to set
2531  * richtext filter's options
2532  */
2533 void *
gf_rich2plain_opt(int * plain)2534 gf_rich2plain_opt(int *plain)
2535 {
2536     return((void *) plain);
2537 }
2538 
2539 
2540 
2541 /*
2542  * ENRICHED-TO-PLAIN text filter
2543  */
2544 
2545 #define	TEF_QUELL	0x01
2546 #define	TEF_NOFILL	0x02
2547 
2548 
2549 
2550 /*----------------------------------------------------------------------
2551       enriched text to plain text filter (ala rfc1523)
2552 
2553  Args: f -- state and input data
2554 	flg --
2555 
2556   This basically removes all enriched formatting. A cute hack is used
2557   to get bold and underlining to work.
2558 
2559   Further work could be done to handle things like centering and right
2560   and left flush, but then it could no longer be done in place. This
2561   operates on text *with* CRLF's.
2562 
2563   WARNING: does not wrap lines!
2564  ----*/
2565 void
gf_enriched2plain(FILTER_S * f,int flg)2566 gf_enriched2plain(FILTER_S *f, int flg)
2567 {
2568     static int enr_uline_on = 0, enr_bold_on = 0;
2569 
2570 /* BUG: quote incoming \255 values */
2571     GF_INIT(f, f->next);
2572 
2573     if(flg == GF_DATA){
2574 	 register unsigned char c;
2575 	 register int state = f->f1;
2576 	 register int plain;
2577 
2578 	 plain = f->opt ? (*(int *) f->opt) : 0;
2579 
2580 	 while(GF_GETC(f, c)){
2581 
2582 	     switch(state){
2583 	       case TOKEN :		/* collect a richtext token */
2584 		 if(c == '>'){		/* what should we do with it? */
2585 		     int   off   = *f->line == '/';
2586 		     char *token = f->line + (off ? 1 : 0);
2587 		     state	= DFL;
2588 		     *f->linep   = '\0';
2589 		     if(!strcmp("param", token)){
2590 			 if(off)
2591 			   f->f2 &= ~TEF_QUELL;
2592 			 else
2593 			   f->f2 |= TEF_QUELL;
2594 		     }
2595 		     else if(!strcmp("nofill", token)){
2596 			 if(off)
2597 			   f->f2 &= ~TEF_NOFILL;
2598 			 else
2599 			   f->f2 |= TEF_NOFILL;
2600 		     }
2601 		     else if(!plain /* gf_enriched_plain */){
2602 			 /* Following is a cute hack or two to get
2603 			    bold and underline on the screen.
2604 			    See Putline0n() where these codes are
2605 			    interpreted */
2606 			 if(!strcmp("bold", token)) {
2607 			     GF_PUTC(f->next, TAG_EMBED);
2608 			     GF_PUTC(f->next, off ? TAG_BOLDOFF : TAG_BOLDON);
2609 			     enr_bold_on = off ? 0 : 1;
2610 			 } else if(!strcmp("italic", token)) {
2611 			     GF_PUTC(f->next, TAG_EMBED);
2612 			     GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2613 			     enr_uline_on = off ? 0 : 1;
2614 			 } else if(!strcmp("underline", token)) {
2615 			     GF_PUTC(f->next, TAG_EMBED);
2616 			     GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2617 			     enr_uline_on = off ? 0 : 1;
2618 			 }
2619 		     }
2620 		     /* else we just ignore the token! */
2621 
2622 		     f->linep = f->line;	/* reset token buffer */
2623 		 }
2624 		 else if(c == '<'){		/* literal '<'? */
2625 		     if(f->linep == f->line){
2626 			 GF_PUTC(f->next, '<');
2627 			 state = DFL;
2628 		     }
2629 		     else{
2630 			 fs_give((void **)&(f->line));
2631 			 gf_error("Malformed Enriched text: unexpected '<'");
2632 			 /* NO RETURN */
2633 		     }
2634 		 }
2635 		 else{			/* add char to token */
2636 		     if(f->linep - f->line > 60){ /* rfc1523 says 60 MAX! */
2637 			 fs_give((void **)&(f->line));
2638 			 gf_error("Malformed Enriched text: token too long");
2639 			 /* NO RETURN */
2640 		     }
2641 
2642 		     *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2643 		 }
2644 		 break;
2645 
2646 	       case CCR   :
2647 		 if(c != '\012'){	/* treat as single space?    */
2648 		     state = DFL;	/* lone cr? */
2649 		     f->f2 &= ~TEF_QUELL;
2650 		     GF_PUTC(f->next, '\015');
2651 		     goto df;
2652 		 }
2653 
2654 		 state = CLF;
2655 		 break;
2656 
2657 	       case CLF   :
2658 		 if(c == '\015'){	/* treat as single space?    */
2659 		     state = CCR;	/* repeat crlf's mean real newlines */
2660 		     f->f2 |= TEF_QUELL;
2661 		     GF_PUTC(f->next, '\r');
2662 		     GF_PUTC(f->next, '\n');
2663 		     break;
2664 		 }
2665 		 else{
2666 		     state = DFL;
2667 		     if(!((f->f2) & TEF_QUELL))
2668 		       GF_PUTC(f->next, ' ');
2669 
2670 		     f->f2 &= ~TEF_QUELL;
2671 		 }
2672 
2673 		 /* fall thru to take care of 'c' */
2674 
2675 	       case DFL   :
2676 	       default :
2677 	       df :
2678 		 if(c == '<')
2679 		   state = TOKEN;
2680 		 else if(c == '\015' && (!((f->f2) & TEF_NOFILL)))
2681 		   state = CCR;
2682 		 else if(!((f->f2) & TEF_QUELL))
2683 		   GF_PUTC(f->next, c);
2684 
2685 		 break;
2686 	     }
2687 	 }
2688 
2689 	 f->f1 = state;
2690 	 GF_END(f, f->next);
2691     }
2692     else if(flg == GF_EOD){
2693 	 if((f->f1 = (f->linep != f->line)) != 0){
2694 	     /* incomplete token!! */
2695 	     gf_error("Incomplete token in richtext");
2696 	     /* NO RETURN */
2697 	 }
2698 	 if(enr_uline_on){
2699 	     GF_PUTC(f->next, TAG_EMBED);
2700 	     GF_PUTC(f->next, TAG_ULINEOFF);
2701 	     enr_uline_on = 0;
2702 	 }
2703 	 if(enr_bold_on){
2704 	     GF_PUTC(f->next, TAG_EMBED);
2705 	     GF_PUTC(f->next, TAG_BOLDOFF);
2706 	     enr_bold_on = 0;
2707 	 }
2708 
2709 	 /* Make sure we end with a newline so everything gets flushed */
2710 	 GF_PUTC(f->next, '\015');
2711 	 GF_PUTC(f->next, '\012');
2712 
2713 	 fs_give((void **)&(f->line));
2714 
2715 	 (void) GF_FLUSH(f->next);
2716 	 (*f->next->f)(f->next, GF_EOD);
2717     }
2718     else if(flg == GF_RESET){
2719 	 dprint((9, "-- gf_reset enriched2plain\n"));
2720 	 f->f1 = DFL;			/* state */
2721 	 f->f2 = 0;			/* set means we're in a comment */
2722 	 f->linep = f->line = (char *)fs_get(65 * sizeof(char));
2723     }
2724 }
2725 
2726 
2727 /*
2728  * function called from the outside to set
2729  * richtext filter's options
2730  */
2731 void *
gf_enriched2plain_opt(int * plain)2732 gf_enriched2plain_opt(int *plain)
2733 {
2734     return((void *) plain);
2735 }
2736 
2737 
2738 
2739 /*
2740  * HTML-TO-PLAIN text filter
2741  */
2742 
2743 
2744 /* OK, here's the plan:
2745 
2746  * a universal output function handles writing  chars and worries
2747  *    about wrapping.
2748 
2749  * a unversal element collector reads chars and collects params
2750  * and dispatches the appropriate element handler.
2751 
2752  * element handlers are stacked.  The most recently dispatched gets
2753  * first crack at the incoming character stream.  It passes bytes it's
2754  * done with or not interested in to the next
2755 
2756  * installs that handler as the current one collecting data...
2757 
2758  * stacked handlers take their params from the element collector and
2759  * accept chars or do whatever they need to do.  Sort of a vertical
2760  * piping? recursion-like? hmmm.
2761 
2762  * at least I think this is how it'll work. tres simple, non?
2763 
2764  */
2765 
2766 
2767 /*
2768  * Some important constants
2769  */
2770 #define	HTML_BUF_LEN	2048		/* max scratch buffer length */
2771 #define	MAX_ENTITY	20		/* maximum length of an entity */
2772 #define	MAX_ELEMENT	72		/* maximum length of an element */
2773 #define HTML_MOREDATA	0		/* expect more entity data */
2774 #define HTML_ENTITY	1		/* valid entity collected */
2775 #define	HTML_BADVALUE	0x0100		/* good data, but bad entity value */
2776 #define	HTML_BADDATA	0x0200		/* bad data found looking for entity */
2777 #define	HTML_LITERAL	0x0400		/* Literal character value */
2778 #define	HTML_NEWLINE	0x010A		/* hard newline */
2779 #define	HTML_DOBOLD	0x0400		/* Start Bold display */
2780 #define	HTML_ID_GET	0		/* indent func: return current val */
2781 #define	HTML_ID_SET	1		/* indent func: set to absolute val */
2782 #define	HTML_ID_INC	2		/* indent func: increment by val */
2783 #define	HTML_HX_CENTER	0x0001
2784 #define	HTML_HX_ULINE	0x0002
2785 #define	RSS_ITEM_LIMIT	20		/* RSS 2.0 ITEM depth limit */
2786 
2787 
2788 /* types of lists that we will support */
2789 #define LIST_DECIMAL  (long) 0
2790 #define LIST_ALPHALO  (long) 1
2791 #define LIST_ALPHAUP  (long) 2
2792 #define LIST_ROMANLO  (long) 3
2793 #define LIST_ROMANUP  (long) 4
2794 #define LIST_UNKNOWN  (long) 10
2795 
2796 /*
2797  * Handler data, state information including function that uses it
2798  */
2799 typedef struct handler_s {
2800     FILTER_S	      *html_data;
2801     void	      *element;
2802     long	       x, y, z;
2803     void	      *dp;
2804     unsigned char     *s;
2805     struct handler_s  *below;
2806 } HANDLER_S;
2807 
2808 /*
2809  * Element Property structure
2810  */
2811 typedef struct _element_properties {
2812     char      *element;
2813     size_t     len;
2814     int	     (*handler)(HANDLER_S *, int, int);
2815     unsigned   blocklevel:1;
2816     unsigned   alternate:1;
2817 } ELPROP_S;
2818 
2819 /*
2820  * Types used to manage HTML parsing
2821  */
2822 static void html_handoff(HANDLER_S *, int);
2823 
2824 
2825 /*
2826  * to help manage line wrapping.
2827  */
2828 typedef	struct _wrap_line {
2829     char *buf;				/* buf to collect wrapped text */
2830     int	  used,				/* number of chars in buf */
2831 	   width,			/* text's width as displayed  */
2832 	   len;				/* length of allocated buf */
2833 } WRAPLINE_S;
2834 
2835 
2836 /*
2837  * to help manage centered text
2838  */
2839 typedef	struct _center_s {
2840     WRAPLINE_S line;			/* buf to assembled centered text */
2841     WRAPLINE_S word;			/* word being to append to Line */
2842     int	       anchor;
2843     short      space;
2844 } CENTER_S;
2845 
2846 
2847 /*
2848  * Collector data and state information
2849  */
2850 typedef	struct collector_s {
2851     char        buf[HTML_BUF_LEN];	/* buffer to collect data */
2852     int		len;			/* length of that buffer  */
2853     unsigned	unquoted_data:1;	/* parameter is not quoted... */
2854     unsigned    end_tag:1;		/* collecting a closing tag */
2855     unsigned    hit_equal:1;		/* collecting right half of attrib */
2856     unsigned	mkup_decl:1;		/* markup declaration */
2857     unsigned	start_comment:1;	/* markup declaration comment */
2858     unsigned	end_comment:1;		/* legit comment format */
2859     unsigned	hyphen:1;		/* markup hyphen read */
2860     unsigned	badform:1;		/* malformed markup element */
2861     unsigned	overrun:1;		/* Overran buf above */
2862     unsigned	proc_inst:1;		/* XML processing instructions */
2863     unsigned	empty:1;		/* empty element */
2864     unsigned	was_quoted:1;		/* basically to catch null string */
2865     char	quoted;			/* quoted element param value */
2866     char       *element;		/* element's collected name */
2867     PARAMETER  *attribs;		/* element's collected attributes */
2868     PARAMETER  *cur_attrib;		/* attribute now being collected */
2869 } CLCTR_S;
2870 
2871 
2872 /*
2873  * State information for all element handlers
2874  */
2875 typedef struct html_data {
2876     HANDLER_S  *h_stack;		/* handler list */
2877     CLCTR_S    *el_data;		/* element collector data */
2878     CENTER_S   *centered;		/* struct to manage centered text */
2879     int	      (*token)(FILTER_S *, int);
2880     char	quoted;			/* quoted, by either ' or ", text */
2881     short	indent_level;		/* levels of indention */
2882     int		in_anchor;		/* text now being written to anchor */
2883     int		blanks;			/* Consecutive blank line count */
2884     int		wrapcol;		/* column to wrap lines on */
2885     int	       *prefix;			/* buffer containing Anchor prefix */
2886     int		prefix_used;
2887     long        line_bufsize;           /* current size of the line buffer */
2888     COLOR_PAIR *color;
2889     struct {
2890 	 int   state;			/* embedded data state */
2891 	 char *color;			/* embedded color pointer */
2892     } embedded;
2893     CBUF_S      cb;			/* utf8->ucs4 conversion state */
2894     unsigned	wrapstate:1;		/* whether or not to wrap output */
2895     unsigned	li_pending:1;		/* <LI> next token expected */
2896     unsigned	de_pending:1;		/* <DT> or <DD> next token expected */
2897     unsigned	bold_on:1;		/* currently bolding text */
2898     unsigned	uline_on:1;		/* currently underlining text */
2899     unsigned	center:1;		/* center output text */
2900     unsigned	bitbucket:1;		/* Ignore input */
2901     unsigned	head:1;			/* In doc's HEAD */
2902     unsigned	body:1;			/* In doc's BODY */
2903     unsigned	alt_entity:1;		/* use alternative entity values */
2904     unsigned	wrote:1;		/* anything witten yet? */
2905 } HTML_DATA_S;
2906 
2907 
2908 /*
2909  * HTML filter options
2910  */
2911 typedef	struct _html_opts {
2912     char	*base;			/* Base URL for this html file */
2913     int		 columns,		/* Display columns (excluding margins) */
2914 		 indent;		/* Left margin */
2915     HANDLE_S   **handlesp;		/* Head of handles */
2916     htmlrisk_t   warnrisk_f;		/* Nasty link warning call */
2917     ELPROP_S	*element_table;		/* markup element table */
2918     RSS_FEED_S **feedp;			/* hook for RSS feed response */
2919     unsigned	strip:1;		/* Hilite TAGs allowed */
2920     unsigned	handles_loc:1;		/* Local handles requested? */
2921     unsigned	showserver:1;		/* Display server after anchors */
2922     unsigned	outputted:1;		/* any */
2923     unsigned	no_relative_links:1;	/* Disable embedded relative links */
2924     unsigned	related_content:1;	/* Embedded related content */
2925     unsigned	html:1;			/* Output content in HTML */
2926     unsigned	html_imgs:1;		/* Output IMG tags in HTML content */
2927 } HTML_OPT_S;
2928 
2929 
2930 
2931 /*
2932  * Some macros to make life a little easier
2933  */
2934 #define	WRAP_COLS(X)	((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2935 #define	HTML_INDENT(X)	((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2936 #define	HTML_WROTE(X)	(HD(X)->wrote)
2937 #define	HTML_BASE(X)	((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2938 #define	STRIP(X)	((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2939 #define	PASS_HTML(X)	((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2940 #define	PASS_IMAGES(X)	((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2941 #define	HANDLESP(X)	(((HTML_OPT_S *)(X)->opt)->handlesp)
2942 #define	DO_HANDLES(X)	((X)->opt && HANDLESP(X))
2943 #define	HANDLES_LOC(X)	((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2944 #define	SHOWSERVER(X)	((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2945 #define	NO_RELATIVE(X)	((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2946 #define	RELATED_OK(X)	((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2947 #define	ELEMENTS(X)	(((HTML_OPT_S *)(X)->opt)->element_table)
2948 #define	RSS_FEED(X)	(*(((HTML_OPT_S *)(X)->opt)->feedp))
2949 #define	MAKE_LITERAL(C)	(HTML_LITERAL | ((C) & 0xff))
2950 #define	IS_LITERAL(C)	(HTML_LITERAL & (C))
2951 #define	HD(X)		((HTML_DATA_S *)(X)->data)
2952 #define	ED(X)		(HD(X)->el_data)
2953 #define	EL(X)		((ELPROP_S *) (X)->element)
2954 #define	ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2955 #define	HTML_ISSPACE(C)	(IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2956 #define	NEW_CLCTR(X)	{						\
2957 			   ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S));  \
2958 			   memset(ED(X), 0, sizeof(CLCTR_S));	\
2959 			   HD(X)->token = html_element_collector;	\
2960 			 }
2961 
2962 #define	FREE_CLCTR(X)	{						\
2963 			   if(ED(X)->attribs){				\
2964 			       PARAMETER *p;				\
2965 			       while((p = ED(X)->attribs) != NULL){	\
2966 				   ED(X)->attribs = ED(X)->attribs->next; \
2967 				   if(p->attribute)			\
2968 				     fs_give((void **)&p->attribute);	\
2969 				   if(p->value)				\
2970 				     fs_give((void **)&p->value);	\
2971 				   fs_give((void **)&p);		\
2972 			       }					\
2973 			   }						\
2974 			   if(ED(X)->element)				\
2975 			     fs_give((void **) &ED(X)->element);	\
2976 			   fs_give((void **) &ED(X));			\
2977 			   HD(X)->token = NULL;				\
2978 			 }
2979 #define	HANDLERS(X)	(HD(X)->h_stack)
2980 #define	BOLD_BIT(X)	(HD(X)->bold_on)
2981 #define	ULINE_BIT(X)	(HD(X)->uline_on)
2982 #define	CENTER_BIT(X)	(HD(X)->center)
2983 #define	HTML_FLUSH(X)	{						    \
2984 			   html_write(X, (X)->line, (X)->linep - (X)->line); \
2985 			   (X)->linep = (X)->line;			    \
2986 			   (X)->f2 = 0L;   				    \
2987 			 }
2988 #define	HTML_BOLD(X, S) if(! STRIP(X)){					\
2989 			   if((S)){					\
2990 			       html_output((X), TAG_EMBED);		\
2991 			       html_output((X), TAG_BOLDON);		\
2992 			   }						\
2993 			   else if(!(S)){				\
2994 			       html_output((X), TAG_EMBED);		\
2995 			       html_output((X), TAG_BOLDOFF);		\
2996 			   }						\
2997 			 }
2998 #define	HTML_ULINE(X, S)						\
2999 			 if(! STRIP(X)){				\
3000 			   if((S)){					\
3001 			       html_output((X), TAG_EMBED);		\
3002 			       html_output((X), TAG_ULINEON);		\
3003 			   }						\
3004 			   else if(!(S)){				\
3005 			       html_output((X), TAG_EMBED);		\
3006 			       html_output((X), TAG_ULINEOFF);		\
3007 			   }						\
3008 			 }
3009 #define	HTML_ITALIC(X, S)						\
3010 			 if(! STRIP(X)){				\
3011 			   if(S){					\
3012 			       html_output((X), TAG_EMBED);		\
3013 			       html_output((X), TAG_ITALICON);		\
3014 			   }						\
3015 			   else if(!(S)){				\
3016 			       html_output((X), TAG_EMBED);		\
3017 			       html_output((X), TAG_ITALICOFF);		\
3018 			   }						\
3019 			 }
3020 #define	HTML_STRIKE(X, S)						\
3021 			 if(! STRIP(X)){				\
3022 			   if(S){					\
3023 			       html_output((X), TAG_EMBED);		\
3024 			       html_output((X), TAG_STRIKEON);		\
3025 			   }						\
3026 			   else if(!(S)){				\
3027 			       html_output((X), TAG_EMBED);		\
3028 			       html_output((X), TAG_STRIKEOFF);		\
3029 			   }						\
3030 			 }
3031 #define	HTML_BIG(X, S)							\
3032 			 if(! STRIP(X)){				\
3033 			   if(S){					\
3034 			       html_output((X), TAG_EMBED);		\
3035 			       html_output((X), TAG_BIGON);		\
3036 			   }						\
3037 			   else if(!(S)){				\
3038 			       html_output((X), TAG_EMBED);		\
3039 			       html_output((X), TAG_BIGOFF);		\
3040 			   }						\
3041 			 }
3042 #define	HTML_SMALL(X, S)							\
3043 			 if(! STRIP(X)){				\
3044 			   if(S){					\
3045 			       html_output((X), TAG_EMBED);		\
3046 			       html_output((X), TAG_SMALLON);		\
3047 			   }						\
3048 			   else if(!(S)){				\
3049 			       html_output((X), TAG_EMBED);		\
3050 			       html_output((X), TAG_SMALLOFF);		\
3051 			   }						\
3052 			 }
3053 #define WRAPPED_LEN(X)	((HD(f)->centered) \
3054 			    ? (HD(f)->centered->line.width \
3055 				+ HD(f)->centered->word.width \
3056 				+ ((HD(f)->centered->line.width \
3057 				    && HD(f)->centered->word.width) \
3058 				    ? 1 : 0)) \
3059 			    : 0)
3060 #define	HTML_DUMP_LIT(F, S, L)	{					    \
3061 				   int i, c;				    \
3062 				   for(i = 0; i < (L); i++){		    \
3063 				       c = ASCII_ISSPACE((unsigned char)(S)[i])   \
3064 					     ? (S)[i]			    \
3065 					     : MAKE_LITERAL((S)[i]);	    \
3066 				       HTML_TEXT(F, c);			    \
3067 				   }					    \
3068 				 }
3069 #define	HTML_PROC(F, C) {						    \
3070 			   if(HD(F)->token){				    \
3071 			       int i;					    \
3072 			       if((i = (*(HD(F)->token))(F, C)) != 0){	    \
3073 				   if(i < 0){				    \
3074 				       HTML_DUMP_LIT(F, "<", 1);	    \
3075 				       if(HD(F)->el_data->element){	    \
3076 					   HTML_DUMP_LIT(F,		    \
3077 					    HD(F)->el_data->element,	    \
3078 					    strlen(HD(F)->el_data->element));\
3079 				       }				    \
3080 				       if(HD(F)->el_data->len){		    \
3081 					   HTML_DUMP_LIT(F,		    \
3082 						    HD(F)->el_data->buf,    \
3083 						    HD(F)->el_data->len);   \
3084 				       }				    \
3085 				       HTML_TEXT(F, C);			    \
3086 				   }					    \
3087 				   FREE_CLCTR(F);			    \
3088 			       }					    \
3089 			    }						    \
3090 			    else if((C) == '<'){			    \
3091 				NEW_CLCTR(F);				    \
3092 			    }						    \
3093 			    else					    \
3094 			      HTML_TEXT(F, C);				    \
3095 			  }
3096 #define HTML_LINEP_PUTC(F, C) {						    \
3097 		   if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3098 		       size_t offset = (F)->linep - (F)->line;		    \
3099 		       fs_resize((void **) &(F)->line,			    \
3100 				 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3101 		       HD(F)->line_bufsize *= 2;			    \
3102 		       (F)->linep = &(F)->line[offset];			    \
3103 		   }							    \
3104 		   *(F)->linep++ = (C);					    \
3105 	       }
3106 #define	HTML_TEXT(F, C)	switch((F)->f1){				    \
3107 			     case WSPACE :				    \
3108 			       if(HTML_ISSPACE(C)) /* ignore repeated WS */  \
3109 				 break;					    \
3110 			       HTML_TEXT_OUT(F, ' ');	    \
3111 			       (F)->f1 = DFL;/* stop sending chars here */   \
3112 			       /* fall thru to process 'c' */		    \
3113 			     case DFL:					    \
3114 			       if(HD(F)->bitbucket)			    \
3115 				 (F)->f1 = DFL;	/* no op */		    \
3116 			       else if(HTML_ISSPACE(C) && HD(F)->wrapstate)  \
3117 				 (F)->f1 = WSPACE;/* coalesce white space */ \
3118 			       else HTML_TEXT_OUT(F, C);		    \
3119 			       break;					    \
3120 			 }
3121 #define	HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */	    \
3122 			      (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3123 			     else					    \
3124 			       html_output(F, C);
3125 #ifdef	DEBUG
3126 #define	HTML_DEBUG_EL(S, D)   {						    \
3127 				 dprint((5, "-- html %s: %s\n",  \
3128 					    S ? S : "?",		    \
3129 					    (D)->element		    \
3130 						 ? (D)->element : "NULL")); \
3131 				 if(debug > 5){				    \
3132 				     PARAMETER *p;			    \
3133 				     for(p = (D)->attribs;		    \
3134 					 p && p->attribute;		    \
3135 					 p = p->next)			    \
3136 				       dprint((6,		    \
3137 						  " PARM: %s%s%s\n",	    \
3138 						  p->attribute		    \
3139 						    ? p->attribute : "NULL",\
3140 						  p->value ? "=" : "",	    \
3141 						  p->value ? p->value : ""));\
3142 				 }					    \
3143 			       }
3144 #else
3145 #define	HTML_DEBUG_EL(S, D)
3146 #endif
3147 
3148 #ifndef SYSTEM_PINE_INFO_PATH
3149 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3150 #endif
3151 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH")   \
3152 			     ? SYSTEM_PINE_INFO_PATH : S)
3153 
3154 /*
3155  * Protos for Tag handlers
3156  */
3157 int	html_head(HANDLER_S *, int, int);
3158 int	html_base(HANDLER_S *, int, int);
3159 int	html_title(HANDLER_S *, int, int);
3160 int	html_body(HANDLER_S *, int, int);
3161 int	html_a(HANDLER_S *, int, int);
3162 int	html_br(HANDLER_S *, int, int);
3163 int	html_hr(HANDLER_S *, int, int);
3164 int	html_p(HANDLER_S *, int, int);
3165 int	html_table(HANDLER_S *, int, int);
3166 int	html_caption(HANDLER_S *, int, int);
3167 int	html_tr(HANDLER_S *, int, int);
3168 int	html_td(HANDLER_S *, int, int);
3169 int	html_th(HANDLER_S *, int, int);
3170 int	html_thead(HANDLER_S *, int, int);
3171 int	html_tbody(HANDLER_S *, int, int);
3172 int	html_tfoot(HANDLER_S *, int, int);
3173 int	html_col(HANDLER_S *, int, int);
3174 int	html_colgroup(HANDLER_S *, int, int);
3175 int	html_b(HANDLER_S *, int, int);
3176 int	html_u(HANDLER_S *, int, int);
3177 int	html_i(HANDLER_S *, int, int);
3178 int	html_em(HANDLER_S *, int, int);
3179 int	html_strong(HANDLER_S *, int, int);
3180 int	html_s(HANDLER_S *, int, int);
3181 int	html_big(HANDLER_S *, int, int);
3182 int	html_small(HANDLER_S *, int, int);
3183 int	html_font(HANDLER_S *, int, int);
3184 int	html_img(HANDLER_S *, int, int);
3185 int	html_map(HANDLER_S *, int, int);
3186 int	html_area(HANDLER_S *, int, int);
3187 int	html_form(HANDLER_S *, int, int);
3188 int	html_input(HANDLER_S *, int, int);
3189 int	html_option(HANDLER_S *, int, int);
3190 int	html_optgroup(HANDLER_S *, int, int);
3191 int	html_button(HANDLER_S *, int, int);
3192 int	html_select(HANDLER_S *, int, int);
3193 int	html_textarea(HANDLER_S *, int, int);
3194 int	html_label(HANDLER_S *, int, int);
3195 int	html_fieldset(HANDLER_S *, int, int);
3196 int	html_ul(HANDLER_S *, int, int);
3197 int	html_ol(HANDLER_S *, int, int);
3198 int	html_menu(HANDLER_S *, int, int);
3199 int	html_dir(HANDLER_S *, int, int);
3200 int	html_li(HANDLER_S *, int, int);
3201 int	html_h1(HANDLER_S *, int, int);
3202 int	html_h2(HANDLER_S *, int, int);
3203 int	html_h3(HANDLER_S *, int, int);
3204 int	html_h4(HANDLER_S *, int, int);
3205 int	html_h5(HANDLER_S *, int, int);
3206 int	html_h6(HANDLER_S *, int, int);
3207 int	html_blockquote(HANDLER_S *, int, int);
3208 int	html_address(HANDLER_S *, int, int);
3209 int	html_pre(HANDLER_S *, int, int);
3210 int	html_center(HANDLER_S *, int, int);
3211 int	html_div(HANDLER_S *, int, int);
3212 int	html_span(HANDLER_S *, int, int);
3213 int	html_dl(HANDLER_S *, int, int);
3214 int	html_dt(HANDLER_S *, int, int);
3215 int	html_dd(HANDLER_S *, int, int);
3216 int	html_script(HANDLER_S *, int, int);
3217 int	html_applet(HANDLER_S *, int, int);
3218 int	html_style(HANDLER_S *, int, int);
3219 int	html_kbd(HANDLER_S *, int, int);
3220 int	html_dfn(HANDLER_S *, int, int);
3221 int	html_var(HANDLER_S *, int, int);
3222 int	html_tt(HANDLER_S *, int, int);
3223 int	html_samp(HANDLER_S *, int, int);
3224 int	html_sub(HANDLER_S *, int, int);
3225 int	html_sup(HANDLER_S *, int, int);
3226 int	html_cite(HANDLER_S *, int, int);
3227 int	html_code(HANDLER_S *, int, int);
3228 int	html_ins(HANDLER_S *, int, int);
3229 int	html_del(HANDLER_S *, int, int);
3230 int	html_abbr(HANDLER_S *, int, int);
3231 char   *cid_tempfile_name(char *, long, int *);
3232 
3233 /*
3234  * Protos for RSS 2.0 Tag handlers
3235  */
3236 int	rss_rss(HANDLER_S *, int, int);
3237 int	rss_channel(HANDLER_S *, int, int);
3238 int	rss_title(HANDLER_S *, int, int);
3239 int	rss_image(HANDLER_S *, int, int);
3240 int	rss_link(HANDLER_S *, int, int);
3241 int	rss_description(HANDLER_S *, int, int);
3242 int	rss_ttl(HANDLER_S *, int, int);
3243 int	rss_item(HANDLER_S *, int, int);
3244 
3245 /*
3246  * Proto's for support routines
3247  */
3248 void	  html_pop(FILTER_S *, ELPROP_S *);
3249 int	  html_push(FILTER_S *, ELPROP_S *);
3250 int	  html_element_collector(FILTER_S *, int);
3251 int	  html_element_flush(CLCTR_S *);
3252 void	  html_element_comment(FILTER_S *, char *);
3253 void	  html_element_output(FILTER_S *, int);
3254 int	  html_entity_collector(FILTER_S *, int, UCS *, char **);
3255 void	  html_a_prefix(FILTER_S *);
3256 void	  html_a_finish(HANDLER_S *);
3257 void	  html_a_output_prefix(FILTER_S *, int);
3258 void	  html_a_output_info(HANDLER_S *);
3259 void	  html_a_relative(char *, char *, HANDLE_S *);
3260 int	  html_href_relative(char *);
3261 int	  html_indent(FILTER_S *, int, int);
3262 void	  html_blank(FILTER_S *, int);
3263 void	  html_newline(FILTER_S *);
3264 void	  html_output(FILTER_S *, int);
3265 void	  html_output_string(FILTER_S *, char *);
3266 void	  html_output_raw_tag(FILTER_S *, char *);
3267 void	  html_output_normal(FILTER_S *, int, int, int);
3268 void	  html_output_flush(FILTER_S *);
3269 void	  html_output_centered(FILTER_S *, int, int, int);
3270 void	  html_centered_handle(int *, char *, int);
3271 void	  html_centered_putc(WRAPLINE_S *, int);
3272 void	  html_centered_flush(FILTER_S *);
3273 void	  html_centered_flush_line(FILTER_S *);
3274 void	  html_write_anchor(FILTER_S *, int);
3275 void	  html_write_newline(FILTER_S *);
3276 void	  html_write_indent(FILTER_S *, int);
3277 void	  html_write(FILTER_S *, char *, int);
3278 void	  html_putc(FILTER_S *, int);
3279 int	  html_event_attribute(char *);
3280 char	 *rss_skip_whitespace(char *s);
3281 ELPROP_S *element_properties(FILTER_S *, char *);
3282 
3283 
3284 /*
3285  * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3286  *			 W3C doc "Additional named entities for HTML"
3287  */
3288 static struct html_entities {
3289     char *name;			/* entity name */
3290     UCS   value;		/* UCS entity value */
3291     char  *plain;		/* US-ASCII representation */
3292 } entity_tab[] = {
3293     {"quot",		0x0022},	    /* 34 - quotation mark */
3294     {"amp",		0x0026},	    /* 38 - ampersand */
3295     {"apos",		0x0027},	    /* 39 - apostrophe */
3296     {"lt",		0x003C},	    /* 60 - less-than sign */
3297     {"gt",		0x003E},	    /* 62 - greater-than sign */
3298     {"nbsp",		0x00A0, " "},	    /* 160 - no-break space */
3299     {"iexcl",		0x00A1},	    /* 161 - inverted exclamation mark */
3300     {"cent",		0x00A2},	    /* 162 - cent sign */
3301     {"pound",		0x00A3},	    /* 163 - pound sign */
3302     {"curren",		0x00A4, "CUR"},	    /* 164 - currency sign */
3303     {"yen",		0x00A5},	    /* 165 - yen sign */
3304     {"brvbar",		0x00A6, "|"},	    /* 166 - broken bar */
3305     {"sect",		0x00A7},	    /* 167 - section sign */
3306     {"uml",		0x00A8, "\""},	    /* 168 - diaeresis */
3307     {"copy",		0x00A9, "(C)"},	    /* 169 - copyright sign */
3308     {"ordf",		0x00AA, "a"},	    /* 170 - feminine ordinal indicator */
3309     {"laquo",		0x00AB, "<<"},	    /* 171 - left-pointing double angle quotation mark */
3310     {"not",		0x00AC, "NOT"},	    /* 172 - not sign */
3311     {"shy",		0x00AD, "-"},	    /* 173 - soft hyphen */
3312     {"reg",		0x00AE, "(R)"},	    /* 174 - registered sign */
3313     {"macr",		0x00AF},	    /* 175 - macron */
3314     {"deg",		0x00B0, "DEG"},	    /* 176 - degree sign */
3315     {"plusmn",		0x00B1, "+/-"},	    /* 177 - plus-minus sign */
3316     {"sup2",		0x00B2},	    /* 178 - superscript two */
3317     {"sup3",		0x00B3},	    /* 179 - superscript three */
3318     {"acute",		0x00B4, "'"},	    /* 180 - acute accent */
3319     {"micro",		0x00B5},	    /* 181 - micro sign */
3320     {"para",		0x00B6},	    /* 182 - pilcrow sign */
3321     {"middot",		0x00B7},	    /* 183 - middle dot */
3322     {"cedil",		0x00B8},	    /* 184 - cedilla */
3323     {"sup1",		0x00B9},	    /* 185 - superscript one */
3324     {"ordm",		0x00BA, "o"},	    /* 186 - masculine ordinal indicator */
3325     {"raquo",		0x00BB, ">>"},	    /* 187 - right-pointing double angle quotation mark */
3326     {"frac14",		0x00BC, " 1/4"},    /* 188 - vulgar fraction one quarter */
3327     {"frac12",		0x00BD, " 1/2"},    /* 189 - vulgar fraction one half */
3328     {"frac34",		0x00BE, " 3/4"},    /* 190 - vulgar fraction three quarters */
3329     {"iquest",		0x00BF},	    /* 191 - inverted question mark */
3330     {"Agrave",		0x00C0, "A"},	    /* 192 - latin capital letter a with grave */
3331     {"Aacute",		0x00C1, "A"},	    /* 193 - latin capital letter a with acute */
3332     {"Acirc",		0x00C2, "A"},	    /* 194 - latin capital letter a with circumflex */
3333     {"Atilde",		0x00C3, "A"},	    /* 195 - latin capital letter a with tilde */
3334     {"Auml",		0x00C4, "AE"},	    /* 196 - latin capital letter a with diaeresis */
3335     {"Aring",		0x00C5, "A"},	    /* 197 - latin capital letter a with ring above */
3336     {"AElig",		0x00C6, "AE"},	    /* 198 - latin capital letter ae */
3337     {"Ccedil",		0x00C7, "C"},	    /* 199 - latin capital letter c with cedilla */
3338     {"Egrave",		0x00C8, "E"},	    /* 200 - latin capital letter e with grave */
3339     {"Eacute",		0x00C9, "E"},	    /* 201 - latin capital letter e with acute */
3340     {"Ecirc",		0x00CA, "E"},	    /* 202 - latin capital letter e with circumflex */
3341     {"Euml",		0x00CB, "E"},	    /* 203 - latin capital letter e with diaeresis */
3342     {"Igrave",		0x00CC, "I"},	    /* 204 - latin capital letter i with grave */
3343     {"Iacute",		0x00CD, "I"},	    /* 205 - latin capital letter i with acute */
3344     {"Icirc",		0x00CE, "I"},	    /* 206 - latin capital letter i with circumflex */
3345     {"Iuml",		0x00CF, "I"},	    /* 207 - latin capital letter i with diaeresis */
3346     {"ETH",		0x00D0, "DH"},	    /* 208 - latin capital letter eth */
3347     {"Ntilde",		0x00D1, "N"},	    /* 209 - latin capital letter n with tilde */
3348     {"Ograve",		0x00D2, "O"},	    /* 210 - latin capital letter o with grave */
3349     {"Oacute",		0x00D3, "O"},	    /* 211 - latin capital letter o with acute */
3350     {"Ocirc",		0x00D4, "O"},	    /* 212 - latin capital letter o with circumflex */
3351     {"Otilde",		0x00D5, "O"},	    /* 213 - latin capital letter o with tilde */
3352     {"Ouml",		0x00D6, "O"},	    /* 214 - latin capital letter o with diaeresis */
3353     {"times",		0x00D7, "x"},	    /* 215 - multiplication sign */
3354     {"Oslash",		0x00D8, "O"},	    /* 216 - latin capital letter o with stroke */
3355     {"Ugrave",		0x00D9, "U"},	    /* 217 - latin capital letter u with grave */
3356     {"Uacute",		0x00DA, "U"},	    /* 218 - latin capital letter u with acute */
3357     {"Ucirc",		0x00DB, "U"},	    /* 219 - latin capital letter u with circumflex */
3358     {"Uuml",		0x00DC, "UE"},	    /* 220 - latin capital letter u with diaeresis */
3359     {"Yacute",		0x00DD, "Y"},	    /* 221 - latin capital letter y with acute */
3360     {"THORN",		0x00DE, "P"},	    /* 222 - latin capital letter thorn */
3361     {"szlig",		0x00DF, "ss"},	    /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3362     {"agrave",		0x00E0, "a"},	    /* 224 - latin small letter a with grave */
3363     {"aacute",		0x00E1, "a"},	    /* 225 - latin small letter a with acute */
3364     {"acirc",		0x00E2, "a"},	    /* 226 - latin small letter a with circumflex */
3365     {"atilde",		0x00E3, "a"},	    /* 227 - latin small letter a with tilde */
3366     {"auml",		0x00E4, "ae"},	    /* 228 - latin small letter a with diaeresis */
3367     {"aring",		0x00E5, "a"},	    /* 229 - latin small letter a with ring above */
3368     {"aelig",		0x00E6, "ae"},	    /* 230 - latin lowercase ligature ae */
3369     {"ccedil",		0x00E7, "c"},	    /* 231 - latin small letter c with cedilla */
3370     {"egrave",		0x00E8, "e"},	    /* 232 - latin small letter e with grave */
3371     {"eacute",		0x00E9, "e"},	    /* 233 - latin small letter e with acute */
3372     {"ecirc",		0x00EA, "e"},	    /* 234 - latin small letter e with circumflex */
3373     {"euml",		0x00EB, "e"},	    /* 235 - latin small letter e with diaeresis */
3374     {"igrave",		0x00EC, "i"},	    /* 236 - latin small letter i with grave */
3375     {"iacute",		0x00ED, "i"},	    /* 237 - latin small letter i with acute */
3376     {"icirc",		0x00EE, "i"},	    /* 238 - latin small letter i with circumflex */
3377     {"iuml",		0x00EF, "i"},	    /* 239 - latin small letter i with diaeresis */
3378     {"eth",		0x00F0, "dh"},	    /* 240 - latin small letter eth */
3379     {"ntilde",		0x00F1, "n"},	    /* 241 - latin small letter n with tilde */
3380     {"ograve",		0x00F2, "o"},	    /* 242 - latin small letter o with grave */
3381     {"oacute",		0x00F3, "o"},	    /* 243 - latin small letter o with acute */
3382     {"ocirc",		0x00F4, "o"},	    /* 244 - latin small letter o with circumflex */
3383     {"otilde",		0x00F5, "o"},	    /* 245 - latin small letter o with tilde */
3384     {"ouml",		0x00F6, "oe"},	    /* 246 - latin small letter o with diaeresis */
3385     {"divide",		0x00F7, "/"},	    /* 247 - division sign */
3386     {"oslash",		0x00F8, "o"},	    /* 248 - latin small letter o with stroke */
3387     {"ugrave",		0x00F9, "u"},	    /* 249 - latin small letter u with grave */
3388     {"uacute",		0x00FA, "u"},	    /* 250 - latin small letter u with acute */
3389     {"ucirc",		0x00FB, "u"},	    /* 251 - latin small letter u with circumflex */
3390     {"uuml",		0x00FC, "ue"},	    /* 252 - latin small letter u with diaeresis */
3391     {"yacute",		0x00FD, "y"},	    /* 253 - latin small letter y with acute */
3392     {"thorn",		0x00FE, "p"},	    /* 254 - latin small letter thorn */
3393     {"yuml",		0x00FF, "y"},	    /* 255 - latin small letter y with diaeresis */
3394     {"OElig",		0x0152, "OE"},	    /* 338 - latin capital ligature oe */
3395     {"oelig",		0x0153, "oe"},	    /* 339 - latin small ligature oe */
3396     {"Scaron",		0x0160, "S"},	    /* 352 - latin capital letter s with caron */
3397     {"scaron",		0x0161, "s"},	    /* 353 - latin small letter s with caron */
3398     {"Yuml",		0x0178, "Y"},	    /* 376 - latin capital letter y with diaeresis */
3399     {"fnof",		0x0192, "f"},	    /* 402 - latin small letter f with hook */
3400     {"circ",		0x02C6},	    /* 710 - modifier letter circumflex accent */
3401     {"tilde",		0x02DC, "~"},	    /* 732 - small tilde */
3402     {"Alpha",		0x0391},	    /* 913 - greek capital letter alpha */
3403     {"Beta",		0x0392},	    /* 914 - greek capital letter beta */
3404     {"Gamma",		0x0393},	    /* 915 - greek capital letter gamma */
3405     {"Delta",		0x0394},	    /* 916 - greek capital letter delta */
3406     {"Epsilon",		0x0395},	    /* 917 - greek capital letter epsilon */
3407     {"Zeta",		0x0396},	    /* 918 - greek capital letter zeta */
3408     {"Eta",		0x0397},	    /* 919 - greek capital letter eta */
3409     {"Theta",		0x0398},	    /* 920 - greek capital letter theta */
3410     {"Iota",		0x0399},	    /* 921 - greek capital letter iota */
3411     {"Kappa",		0x039A},	    /* 922 - greek capital letter kappa */
3412     {"Lambda",		0x039B},	    /* 923 - greek capital letter lamda */
3413     {"Mu",		0x039C},	    /* 924 - greek capital letter mu */
3414     {"Nu",		0x039D},	    /* 925 - greek capital letter nu */
3415     {"Xi",		0x039E},	    /* 926 - greek capital letter xi */
3416     {"Omicron",		0x039F},	    /* 927 - greek capital letter omicron */
3417     {"Pi",		0x03A0},	    /* 928 - greek capital letter pi */
3418     {"Rho",		0x03A1},	    /* 929 - greek capital letter rho */
3419     {"Sigma",		0x03A3},	    /* 931 - greek capital letter sigma */
3420     {"Tau",		0x03A4},	    /* 932 - greek capital letter tau */
3421     {"Upsilon",		0x03A5},	    /* 933 - greek capital letter upsilon */
3422     {"Phi",		0x03A6},	    /* 934 - greek capital letter phi */
3423     {"Chi",		0x03A7},	    /* 935 - greek capital letter chi */
3424     {"Psi",		0x03A8},	    /* 936 - greek capital letter psi */
3425     {"Omega",		0x03A9},	    /* 937 - greek capital letter omega */
3426     {"alpha",		0x03B1},	    /* 945 - greek small letter alpha */
3427     {"beta",		0x03B2},	    /* 946 - greek small letter beta */
3428     {"gamma",		0x03B3},	    /* 947 - greek small letter gamma */
3429     {"delta",		0x03B4},	    /* 948 - greek small letter delta */
3430     {"epsilon",		0x03B5},	    /* 949 - greek small letter epsilon */
3431     {"zeta",		0x03B6},	    /* 950 - greek small letter zeta */
3432     {"eta",		0x03B7},	    /* 951 - greek small letter eta */
3433     {"theta",		0x03B8},	    /* 952 - greek small letter theta */
3434     {"iota",		0x03B9},	    /* 953 - greek small letter iota */
3435     {"kappa",		0x03BA},	    /* 954 - greek small letter kappa */
3436     {"lambda",		0x03BB},	    /* 955 - greek small letter lamda */
3437     {"mu",		0x03BC},	    /* 956 - greek small letter mu */
3438     {"nu",		0x03BD},	    /* 957 - greek small letter nu */
3439     {"xi",		0x03BE},	    /* 958 - greek small letter xi */
3440     {"omicron",		0x03BF},	    /* 959 - greek small letter omicron */
3441     {"pi",		0x03C0},	    /* 960 - greek small letter pi */
3442     {"rho",		0x03C1},	    /* 961 - greek small letter rho */
3443     {"sigmaf",		0x03C2},	    /* 962 - greek small letter final sigma */
3444     {"sigma",		0x03C3},	    /* 963 - greek small letter sigma */
3445     {"tau",		0x03C4},	    /* 964 - greek small letter tau */
3446     {"upsilon",		0x03C5},	    /* 965 - greek small letter upsilon */
3447     {"phi",		0x03C6},	    /* 966 - greek small letter phi */
3448     {"chi",		0x03C7},	    /* 967 - greek small letter chi */
3449     {"psi",		0x03C8},	    /* 968 - greek small letter psi */
3450     {"omega",		0x03C9},	    /* 969 - greek small letter omega */
3451     {"thetasym",	0x03D1},	    /* 977 - greek theta symbol */
3452     {"upsih",		0x03D2},	    /* 978 - greek upsilon with hook symbol */
3453     {"piv",		0x03D6},	    /* 982 - greek pi symbol */
3454     {"ensp",		0x2002},	    /* 8194 - en space */
3455     {"emsp",		0x2003},	    /* 8195 - em space */
3456     {"thinsp",		0x2009},	    /* 8201 - thin space */
3457     {"zwnj",		0x200C},	    /* 8204 - zero width non-joiner */
3458     {"zwj",		0x200D},	    /* 8205 - zero width joiner */
3459     {"lrm",		0x200E},	    /* 8206 - left-to-right mark */
3460     {"rlm",		0x200F},	    /* 8207 - right-to-left mark */
3461     {"ndash",		0x2013},	    /* 8211 - en dash */
3462     {"mdash",		0x2014},	    /* 8212 - em dash */
3463     {"#8213",		 0x2015, "--"},	    /* 2015 - horizontal bar */
3464     {"#8214",		 0x2016, "||"},	    /* 2016 - double vertical line */
3465     {"#8215",		 0x2017, "__"},	    /* 2017 - double low line */
3466     {"lsquo",		0x2018},	    /* 8216 - left single quotation mark */
3467     {"rsquo",		0x2019},	    /* 8217 - right single quotation mark */
3468     {"sbquo",		0x201A},	    /* 8218 - single low-9 quotation mark */
3469     {"ldquo",		0x201C},	    /* 8220 - left double quotation mark */
3470     {"rdquo",		0x201D},	    /* 8221 - right double quotation mark */
3471     {"bdquo",		0x201E, ",,"},	    /* 8222 - double low-9 quotation mark */
3472     {"#8223",		0x201F, "``"},	    /* 201F -  double high reversed-9 quotation mark  */
3473     {"dagger",		0x2020},	    /* 8224 - dagger */
3474     {"Dagger",		0x2021},	    /* 8225 - double dagger */
3475     {"bull",		0x2022, "*"},	    /* 8226 - bullet */
3476     {"hellip",		0x2026},	    /* 8230 - horizontal ellipsis */
3477     {"permil",		0x2030},	    /* 8240 - per mille sign */
3478     {"prime",		0x2032, "\'"},	    /* 8242 - prime */
3479     {"Prime",		0x2033, "\'\'"},    /* 8243 - double prime */
3480     {"#8244",		0x2034, "\'\'\'"},  /* 2034 - triple prime */
3481     {"lsaquo",		0x2039},	    /* 8249 - single left-pointing angle quotation mark */
3482     {"rsaquo",		0x203A},	    /* 8250 - single right-pointing angle quotation mark */
3483     {"#8252",		0x203C, "!!"},	    /* 203C - double exclamation mark */
3484     {"oline",		0x203E, "-"},	    /* 8254 - overline */
3485     {"frasl",		0x2044},	    /* 8260 - fraction slash */
3486     {"#8263",		0x2047, "??"},	    /* 2047 - double question mark */
3487     {"#8264",		0x2048, "?!"},	    /* 2048 - question exclamation mark */
3488     {"#8265",		0x2049, "!?"},	    /* 2049 - exclamation question mark */
3489     {"#8279",		0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3490     {"euro",		0x20AC, "EUR"},	    /* 8364 - euro sign */
3491     {"image",		0x2111},	    /* 8465 - black-letter capital i */
3492     {"weierp",		0x2118},	    /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3493     {"real",		0x211C},	    /* 8476 - black-letter capital r */
3494     {"trade",		0x2122, "[tm]"},    /* 8482 - trademark sign */
3495     {"alefsym",		0x2135},	    /* 8501 - alef symbol */
3496     {"larr",		0x2190},	    /* 8592 - leftwards arrow */
3497     {"uarr",		0x2191},	    /* 8593 - upwards arrow */
3498     {"rarr",		0x2192},	    /* 8594 - rightwards arrow */
3499     {"darr",		0x2193},	    /* 8595 - downwards arrow */
3500     {"harr",		0x2194},	    /* 8596 - left right arrow */
3501     {"crarr",		0x21B5},	    /* 8629 - downwards arrow with corner leftwards */
3502     {"lArr",		0x21D0},	    /* 8656 - leftwards double arrow */
3503     {"uArr",		0x21D1},	    /* 8657 - upwards double arrow */
3504     {"rArr",		0x21D2},	    /* 8658 - rightwards double arrow */
3505     {"dArr",		0x21D3},	    /* 8659 - downwards double arrow */
3506     {"hArr",		0x21D4},	    /* 8660 - left right double arrow */
3507     {"forall",		0x2200},	    /* 8704 - for all */
3508     {"part",		0x2202},	    /* 8706 - partial differential */
3509     {"exist",		0x2203},	    /* 8707 - there exists */
3510     {"empty",		0x2205},	    /* 8709 - empty set */
3511     {"nabla",		0x2207},	    /* 8711 - nabla */
3512     {"isin",		0x2208},	    /* 8712 - element of */
3513     {"notin",		0x2209},	    /* 8713 - not an element of */
3514     {"ni",		0x220B},	    /* 8715 - contains as member */
3515     {"prod",		0x220F},	    /* 8719 - n-ary product */
3516     {"sum",		0x2211},	    /* 8721 - n-ary summation */
3517     {"minus",		0x2212},	    /* 8722 - minus sign */
3518     {"lowast",		0x2217},	    /* 8727 - asterisk operator */
3519     {"radic",		0x221A},	    /* 8730 - square root */
3520     {"prop",		0x221D},	    /* 8733 - proportional to */
3521     {"infin",		0x221E},	    /* 8734 - infinity */
3522     {"ang",		0x2220},	    /* 8736 - angle */
3523     {"and",		0x2227},	    /* 8743 - logical and */
3524     {"or",		0x2228},	    /* 8744 - logical or */
3525     {"cap",		0x2229},	    /* 8745 - intersection */
3526     {"cup",		0x222A},	    /* 8746 - union */
3527     {"int",		0x222B},	    /* 8747 - integral */
3528     {"there4",		0x2234},	    /* 8756 - therefore */
3529     {"sim",		0x223C},	    /* 8764 - tilde operator */
3530     {"cong",		0x2245},	    /* 8773 - congruent to */
3531     {"asymp",		0x2248},	    /* 8776 - almost equal to */
3532     {"ne",		0x2260},	    /* 8800 - not equal to */
3533     {"equiv",		0x2261},	    /* 8801 - identical to (equivalent to) */
3534     {"le",		0x2264},	    /* 8804 - less-than or equal to */
3535     {"ge",		0x2265},	    /* 8805 - greater-than or equal to */
3536     {"sub",		0x2282},	    /* 8834 - subset of */
3537     {"sup",		0x2283},	    /* 8835 - superset of */
3538     {"nsub",		0x2284},	    /* 8836 - not a subset of */
3539     {"sube",		0x2286},	    /* 8838 - subset of or equal to */
3540     {"supe",		0x2287},	    /* 8839 - superset of or equal to */
3541     {"oplus",		0x2295},	    /* 8853 - circled plus */
3542     {"otimes",		0x2297},	    /* 8855 - circled times */
3543     {"perp",		0x22A5},	    /* 8869 - up tack */
3544     {"sdot",		0x22C5},	    /* 8901 - dot operator */
3545     {"lceil",		0x2308},	    /* 8968 - left ceiling */
3546     {"rceil",		0x2309},	    /* 8969 - right ceiling */
3547     {"lfloor",		0x230A},	    /* 8970 - left floor */
3548     {"rfloor",		0x230B},	    /* 8971 - right floor */
3549     {"lang",		0x2329},	    /* 9001 - left-pointing angle bracket */
3550     {"rang",		0x232A},	    /* 9002 - right-pointing angle bracket */
3551     {"loz",		0x25CA},	    /* 9674 - lozenge */
3552     {"spades",		0x2660},	    /* 9824 - black spade suit */
3553     {"clubs",		0x2663},	    /* 9827 - black club suit */
3554     {"hearts",		0x2665},	    /* 9829 - black heart suit */
3555     {"diams",		0x2666}		    /* 9830 - black diamond suit */
3556 };
3557 
3558 
3559 /*
3560  * Table of supported elements and corresponding handlers
3561  */
3562 static ELPROP_S html_element_table[] = {
3563     {"HTML", 4},					/* HTML ignore if seen? */
3564     {"HEAD", 4,		html_head},		/* slurp until <BODY> ? */
3565     {"TITLE", 5,	html_title},		/* Document Title */
3566     {"BASE", 4,		html_base},		/* HREF base */
3567     {"BODY", 4,		html_body},		/* HTML BODY */
3568     {"A", 1,		html_a},		/* Anchor */
3569     {"ABBR", 4,		html_abbr},		/* Abbreviation */
3570     {"IMG", 3,		html_img},		/* Image */
3571     {"MAP", 3,		html_map},		/* Image Map */
3572     {"AREA", 4,		html_area},		/* Image Map Area */
3573     {"HR", 2,		html_hr, 1, 1},		/* Horizontal Rule */
3574     {"BR", 2,		html_br, 0, 1},		/* Line Break */
3575     {"P", 1,		html_p, 1},		/* Paragraph */
3576     {"OL", 2,		html_ol, 1},		/* Ordered List */
3577     {"UL", 2,		html_ul, 1},		/* Unordered List */
3578     {"MENU", 4,		html_menu},		/* Menu List */
3579     {"DIR", 3,		html_dir},		/* Directory List */
3580     {"LI", 2,		html_li},		/*  ... List Item */
3581     {"DL", 2,		html_dl, 1},		/* Definition List */
3582     {"DT", 2,		html_dt},		/*  ... Def. Term */
3583     {"DD", 2,		html_dd},		/*  ... Def. Definition */
3584     {"I", 1,		html_i},		/* Italic Text */
3585     {"EM", 2, 		html_em},		/* Typographic Emphasis */
3586     {"STRONG", 6,	html_strong},		/* STRONG Typo Emphasis */
3587     {"VAR", 3,		html_i},		/* Variable Name */
3588     {"B", 1,		html_b},		/* Bold Text */
3589     {"U", 1,		html_u},		/* Underline Text */
3590     {"S", 1,		html_s},		/* Strike-Through Text */
3591     {"STRIKE", 6,	html_s},		/* Strike-Through Text */
3592     {"BIG", 3,		html_big},		/* Big Font Text */
3593     {"SMALL", 5,	html_small},		/* Small Font Text */
3594     {"FONT", 4,		html_font},		/* Font display directives */
3595     {"BLOCKQUOTE", 10, 	html_blockquote, 1}, 	/* Blockquote */
3596     {"ADDRESS",	7,	html_address, 1},	/* Address */
3597     {"CENTER", 6,	html_center},		/* Centered Text v3.2 */
3598     {"DIV", 3,		html_div, 1},		/* Document Division 3.2 */
3599     {"SPAN", 4,		html_span},		/* Text Span */
3600     {"H1", 2,		html_h1, 1},		/* Headings... */
3601     {"H2", 2,		html_h2, 1},
3602     {"H3", 2,		html_h3,1},
3603     {"H4", 2,		html_h4, 1},
3604     {"H5", 2,		html_h5, 1},
3605     {"H6", 2,		html_h6, 1},
3606     {"PRE", 3,		html_pre, 1},		/* Preformatted Text */
3607     {"KBD", 3,		html_kbd},		/* Keyboard Input (NO OP) */
3608     {"DFN", 3,		html_dfn},		/* Definition (NO OP) */
3609     {"VAR", 3,		html_var},		/* Variable (NO OP) */
3610     {"TT", 2,		html_tt},		/* Typetype (NO OP) */
3611     {"SAMP", 4,		html_samp},		/* Sample Text (NO OP) */
3612     {"CITE", 4,		html_cite},		/* Citation (NO OP) */
3613     {"CODE", 4,		html_code},		/* Code Text (NO OP) */
3614     {"INS", 3,		html_ins},		/* Text Inserted (NO OP) */
3615     {"DEL", 3,		html_del},		/* Text Deleted (NO OP) */
3616     {"SUP", 3,		html_sup},		/* Text Superscript (NO OP) */
3617     {"SUB", 3,		html_sub},		/* Text Superscript (NO OP) */
3618     {"STYLE", 5,	html_style},		/* CSS Definitions */
3619 
3620 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3621 
3622     {"FORM", 4,		html_form, 1},		/* form within a document */
3623     {"INPUT", 5,	html_input},		/* One input field, options */
3624     {"BUTTON", 6,	html_button},		/* Push Button */
3625     {"OPTION", 6,	html_option},		/* One option within Select */
3626     {"OPTION", 6,	html_optgroup},		/* Option Group Definition */
3627     {"SELECT", 6,	html_select},		/* Selection from a set */
3628     {"TEXTAREA", 8,	html_textarea},		/* A multi-line input field */
3629     {"LABEL", 5,	html_label},		/* Control Label */
3630     {"FIELDSET", 8,	html_fieldset, 1},	/* Fieldset Control Group */
3631 
3632 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3633     {"SCRIPT", 6,	html_script},		/* Embedded scripting statements */
3634     {"APPLET", 6,	NULL},			/* Embedded applet statements */
3635     {"OBJECT", 6,	NULL},			/* Embedded object statements */
3636     {"LINK", 4,		NULL},			/* References to external data */
3637     {"PARAM", 5,	NULL},			/* Applet/Object parameters */
3638 
3639 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3640 
3641     {"TABLE", 5,	html_table, 1},		/* Table */
3642     {"CAPTION", 7,		html_caption},		/* Table Caption */
3643     {"TR", 2,		html_tr},		/* Table Table Row */
3644     {"TD", 2,		html_td},		/* Table Table Data */
3645     {"TH", 2,		html_th},		/* Table Table Head */
3646     {"THEAD", 5,	html_thead},		/* Table Table Head */
3647     {"TBODY", 5,	html_tbody},		/* Table Table Body */
3648     {"TFOOT", 5,	html_tfoot},		/* Table Table Foot */
3649     {"COL", 3,		html_col},		/* Table Column Attributes */
3650     {"COLGROUP", 8,	html_colgroup},		/* Table Column Group Attributes */
3651 
3652     {NULL, 0,		NULL}
3653 };
3654 
3655 
3656 /*
3657  * Table of supported RSS 2.0 elements
3658  */
3659 static ELPROP_S rss_element_table[] = {
3660     {"RSS", 3,		rss_rss},		/* RSS 2.0 version */
3661     {"CHANNEL", 7,	rss_channel},		/* RSS 2.0 Channel */
3662     {"TITLE", 5,	rss_title},		/* RSS 2.0 Title */
3663     {"IMAGE", 5,	rss_image},		/* RSS 2.0 Channel Image */
3664     {"LINK", 4,		rss_link},		/* RSS 2.0 Channel/Item Link */
3665     {"DESCRIPTION", 11,	rss_description},	/* RSS 2.0 Channel/Item Description */
3666     {"ITEM", 4,		rss_item},		/* RSS 2.0 Channel ITEM */
3667     {"TTL", 3,		rss_ttl},		/* RSS 2.0 Item TTL */
3668     {NULL, 0,		NULL}
3669 };
3670 
3671 
3672 /*
3673  * Initialize the given handler, and add it to the stack if it
3674  * requests it.
3675  *
3676  * Returns: 1 if handler chose to get pushed on stack
3677  *          0 if handler declined
3678  */
3679 int
html_push(FILTER_S * fd,ELPROP_S * ep)3680 html_push(FILTER_S *fd, ELPROP_S *ep)
3681 {
3682     HANDLER_S *new;
3683 
3684     new = (HANDLER_S *)fs_get(sizeof(HANDLER_S));
3685     memset(new, 0, sizeof(HANDLER_S));
3686     new->html_data = fd;
3687     new->element   = ep;
3688     if((*ep->handler)(new, 0, GF_RESET)){ /* stack the handler? */
3689 	 new->below   = HANDLERS(fd);
3690 	 HANDLERS(fd) = new;		/* push */
3691 	 return(1);
3692     }
3693 
3694     fs_give((void **) &new);
3695     return(0);
3696 }
3697 
3698 
3699 /*
3700  * Remove the most recently installed the given handler
3701  * after letting it accept its demise.
3702  */
3703 void
html_pop(FILTER_S * fd,ELPROP_S * ep)3704 html_pop(FILTER_S *fd, ELPROP_S *ep)
3705 {
3706     HANDLER_S *tp;
3707 
3708     for(tp = HANDLERS(fd); tp && ep != EL(tp); tp = tp->below){
3709 	HANDLER_S *tp2;
3710 
3711 	dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep->element, EL(tp)->element));
3712 	/* if no evidence of opening tag, ignore given closing tag */
3713 	for(tp2 = HANDLERS(fd); tp2 && ep != EL(tp2); tp2 = tp2->below)
3714 	  ;
3715 
3716 	if(!tp2){
3717 	    dprint((3, "-- html error: no opening tag for given tag /%s", ep->element));
3718 	    return;
3719 	}
3720 
3721 	(void) (*EL(tp)->handler)(tp, 0, GF_EOD);
3722 	HANDLERS(fd) = tp->below;
3723     }
3724 
3725     if(tp){
3726 	(void) (*EL(tp)->handler)(tp, 0, GF_EOD);	/* may adjust handler list */
3727 	if(tp != HANDLERS(fd)){
3728 	    HANDLER_S *p;
3729 
3730 	    for(p = HANDLERS(fd); p->below != tp; p = p->below)
3731 	      ;
3732 
3733 	    if(p)
3734 	      p->below = tp->below;	/* remove from middle of stack */
3735 	    /* BUG: else programming botch and we should die */
3736 	}
3737 	else
3738 	  HANDLERS(fd) = tp->below;	/* pop */
3739 
3740 	fs_give((void **)&tp);
3741     }
3742     else{
3743 	/* BUG: should MAKE SURE NOT TO EMIT IT */
3744 	dprint((3, "-- html error: end tag without a start: %s", ep->element));
3745     }
3746 }
3747 
3748 
3749 /*
3750  * Deal with data passed a handler in its GF_DATA state
3751  */
3752 static void
html_handoff(HANDLER_S * hd,int ch)3753 html_handoff(HANDLER_S *hd, int ch)
3754 {
3755     if(hd->below)
3756       (void) (*EL(hd->below)->handler)(hd->below, ch, GF_DATA);
3757     else
3758       html_output(hd->html_data, ch);
3759 }
3760 
3761 
3762 /*
3763  * HTML <BR> element handler
3764  */
3765 int
html_br(HANDLER_S * hd,int ch,int cmd)3766 html_br(HANDLER_S *hd, int ch, int cmd)
3767 {
3768     if(cmd == GF_RESET){
3769 	if(PASS_HTML(hd->html_data)){
3770 	    html_output_raw_tag(hd->html_data, "br");
3771 	}
3772 	else{
3773 	    html_output(hd->html_data, HTML_NEWLINE);
3774 	}
3775     }
3776 
3777     return(0);				/* don't get linked */
3778 }
3779 
3780 
3781 /*
3782  * HTML <HR> (Horizontal Rule) element handler
3783  */
3784 int
html_hr(HANDLER_S * hd,int ch,int cmd)3785 html_hr(HANDLER_S *hd, int ch, int cmd)
3786 {
3787     if(cmd == GF_RESET){
3788 	if(PASS_HTML(hd->html_data)){
3789 	    html_output_raw_tag(hd->html_data, "hr");
3790 	}
3791 	else{
3792 	    int	   i, old_wrap, width, align;
3793 	    PARAMETER *p;
3794 
3795 	    width = WRAP_COLS(hd->html_data);
3796 	    align = 0;
3797 	    for(p = HD(hd->html_data)->el_data->attribs;
3798 		p && p->attribute;
3799 		p = p->next)
3800 	      if(p->value){
3801 		  if(!strucmp(p->attribute, "ALIGN")){
3802 		      if(!strucmp(p->value, "LEFT"))
3803 			align = 1;
3804 		      else if(!strucmp(p->value, "RIGHT"))
3805 			align = 2;
3806 		  }
3807 		  else if(!strucmp(p->attribute, "WIDTH")){
3808 		      char *cp;
3809 
3810 		      width = 0;
3811 		      for(cp = p->value; *cp; cp++)
3812 			if(*cp == '%'){
3813 			    width = (WRAP_COLS(hd->html_data)*MIN(100,width))/100;
3814 			    break;
3815 			}
3816 			else if(isdigit((unsigned char) *cp))
3817 			  width = (width * 10) + (*cp - '0');
3818 
3819 		      width = MIN(width, WRAP_COLS(hd->html_data));
3820 		  }
3821 	      }
3822 
3823 	    html_blank(hd->html_data, 1);	/* at least one blank line */
3824 
3825 	    old_wrap = HD(hd->html_data)->wrapstate;
3826 	    HD(hd->html_data)->wrapstate = 0;
3827 	    if((i = MAX(0, WRAP_COLS(hd->html_data) - width))
3828 	       && ((align == 0) ? i /= 2 : (align == 2)))
3829 	      for(; i > 0; i--)
3830 		html_output(hd->html_data, ' ');
3831 
3832 	    for(i = 0; i < width; i++)
3833 	      html_output(hd->html_data, '_');
3834 
3835 	    html_blank(hd->html_data, 1);
3836 	    HD(hd->html_data)->wrapstate = old_wrap;
3837 	}
3838     }
3839 
3840     return(0);				/* don't get linked */
3841 }
3842 
3843 
3844 /*
3845  * HTML <P> (paragraph) element handler
3846  */
3847 int
html_p(HANDLER_S * hd,int ch,int cmd)3848 html_p(HANDLER_S *hd, int ch, int cmd)
3849 {
3850     if(cmd == GF_DATA){
3851 	html_handoff(hd, ch);
3852     }
3853     else if(cmd == GF_RESET){
3854 	if(PASS_HTML(hd->html_data)){
3855 	    html_output_raw_tag(hd->html_data, "p");
3856 	}
3857 	else{
3858 	    /* Make sure there's at least 1 blank line */
3859 	    html_blank(hd->html_data, 1);
3860 
3861 	    /* adjust indent level if needed */
3862 	    if(HD(hd->html_data)->li_pending){
3863 		html_indent(hd->html_data, 4, HTML_ID_INC);
3864 		HD(hd->html_data)->li_pending = 0;
3865 	    }
3866 	}
3867     }
3868     else if(cmd == GF_EOD){
3869 	if(PASS_HTML(hd->html_data)){
3870 	    html_output_string(hd->html_data, "</p>");
3871 	}
3872 	else{
3873 	    /* Make sure there's at least 1 blank line */
3874 	    html_blank(hd->html_data, 1);
3875 	}
3876     }
3877 
3878     return(1);				/* GET linked */
3879 }
3880 
3881 
3882 /*
3883  * HTML Table <TABLE> (paragraph) table row
3884  */
3885 int
html_table(HANDLER_S * hd,int ch,int cmd)3886 html_table(HANDLER_S *hd, int ch, int cmd)
3887 {
3888     if(cmd == GF_DATA){
3889 	if(PASS_HTML(hd->html_data)){
3890 	    html_handoff(hd, ch);
3891 	}
3892     }
3893     else if(cmd == GF_RESET){
3894 	if(PASS_HTML(hd->html_data)){
3895 	    html_output_raw_tag(hd->html_data, "table");
3896 	}
3897 	else
3898 	  /* Make sure there's at least 1 blank line */
3899 	  html_blank(hd->html_data, 0);
3900     }
3901     else if(cmd == GF_EOD){
3902 	if(PASS_HTML(hd->html_data)){
3903 	    html_output_string(hd->html_data, "</table>");
3904 	}
3905 	else
3906 	  /* Make sure there's at least 1 blank line */
3907 	  html_blank(hd->html_data, 0);
3908     }
3909     return(PASS_HTML(hd->html_data));		/* maybe get linked */
3910 }
3911 
3912 
3913 /*
3914  * HTML <CAPTION> (Table Caption) element handler
3915  */
3916 int
html_caption(HANDLER_S * hd,int ch,int cmd)3917 html_caption(HANDLER_S *hd, int ch, int cmd)
3918 {
3919     if(cmd == GF_DATA){
3920 	html_handoff(hd, ch);
3921     }
3922     else if(cmd == GF_RESET){
3923 	if(PASS_HTML(hd->html_data)){
3924 	    html_output_raw_tag(hd->html_data, "caption");
3925 	}
3926 	else{
3927 	    /* turn ON the centered bit */
3928 	    CENTER_BIT(hd->html_data) = 1;
3929 	}
3930     }
3931     else if(cmd == GF_EOD){
3932 	if(PASS_HTML(hd->html_data)){
3933 	    html_output_string(hd->html_data, "</caption>");
3934 	}
3935 	else{
3936 	    /* turn OFF the centered bit */
3937 	    CENTER_BIT(hd->html_data) = 0;
3938 	}
3939     }
3940 
3941     return(1);
3942 }
3943 
3944 
3945 /*
3946  * HTML Table <TR> (paragraph) table row
3947  */
3948 int
html_tr(HANDLER_S * hd,int ch,int cmd)3949 html_tr(HANDLER_S *hd, int ch, int cmd)
3950 {
3951     if(cmd == GF_DATA){
3952 	if(PASS_HTML(hd->html_data)){
3953 	    html_handoff(hd, ch);
3954 	}
3955     }
3956     else if(cmd == GF_RESET){
3957 	if(PASS_HTML(hd->html_data)){
3958 	    html_output_raw_tag(hd->html_data, "tr");
3959 	}
3960 	else
3961 	  /* Make sure there's at least 1 blank line */
3962 	  html_blank(hd->html_data, 0);
3963     }
3964     else if(cmd == GF_EOD){
3965 	if(PASS_HTML(hd->html_data)){
3966 	    html_output_string(hd->html_data, "</tr>");
3967 	}
3968 	else
3969 	  /* Make sure there's at least 1 blank line */
3970 	  html_blank(hd->html_data, 0);
3971     }
3972     return(PASS_HTML(hd->html_data));		/* maybe get linked */
3973 }
3974 
3975 
3976 /*
3977  * HTML Table <TD> (paragraph) table data
3978  */
3979 int
html_td(HANDLER_S * hd,int ch,int cmd)3980 html_td(HANDLER_S *hd, int ch, int cmd)
3981 {
3982     if(cmd == GF_DATA){
3983 	if(PASS_HTML(hd->html_data)){
3984 	    html_handoff(hd, ch);
3985 	}
3986     }
3987     else if(cmd == GF_RESET){
3988 	if(PASS_HTML(hd->html_data)){
3989 	    html_output_raw_tag(hd->html_data, "td");
3990 	}
3991 	else{
3992 	    PARAMETER *p;
3993 
3994 	    for(p = HD(hd->html_data)->el_data->attribs;
3995 		p && p->attribute;
3996 		p = p->next)
3997 	      if(!strucmp(p->attribute, "nowrap")
3998 		 && (hd->html_data->f2 || hd->html_data->n)){
3999 		  HTML_DUMP_LIT(hd->html_data, " | ", 3);
4000 		  break;
4001 	      }
4002 	}
4003     }
4004     else if(cmd == GF_EOD){
4005 	if(PASS_HTML(hd->html_data)){
4006 	    html_output_string(hd->html_data, "</td>");
4007 	}
4008     }
4009 
4010     return(PASS_HTML(hd->html_data));		/* maybe get linked */
4011 }
4012 
4013 
4014 /*
4015  * HTML Table <TH> (paragraph) table head
4016  */
4017 int
html_th(HANDLER_S * hd,int ch,int cmd)4018 html_th(HANDLER_S *hd, int ch, int cmd)
4019 {
4020     if(cmd == GF_DATA){
4021 	if(PASS_HTML(hd->html_data)){
4022 	    html_handoff(hd, ch);
4023 	}
4024     }
4025     else if(cmd == GF_RESET){
4026 	if(PASS_HTML(hd->html_data)){
4027 	    html_output_raw_tag(hd->html_data, "th");
4028 	}
4029 	else{
4030 	    PARAMETER *p;
4031 
4032 	    for(p = HD(hd->html_data)->el_data->attribs;
4033 		p && p->attribute;
4034 		p = p->next)
4035 	      if(!strucmp(p->attribute, "nowrap")
4036 		 && (hd->html_data->f2 || hd->html_data->n)){
4037 		  HTML_DUMP_LIT(hd->html_data, " | ", 3);
4038 		  break;
4039 	      }
4040 	  }
4041     }
4042     else if(cmd == GF_EOD){
4043 	if(PASS_HTML(hd->html_data)){
4044 	    html_output_string(hd->html_data, "</th>");
4045 	}
4046     }
4047 
4048     return(PASS_HTML(hd->html_data));		/* don't get linked */
4049 }
4050 
4051 
4052 /*
4053  * HTML Table <THEAD> table head
4054  */
4055 int
html_thead(HANDLER_S * hd,int ch,int cmd)4056 html_thead(HANDLER_S *hd, int ch, int cmd)
4057 {
4058     if(PASS_HTML(hd->html_data)){
4059 	if(cmd == GF_DATA){
4060 	    html_handoff(hd, ch);
4061 	}
4062 	else if(cmd == GF_RESET){
4063 	    html_output_raw_tag(hd->html_data, "thead");
4064 	}
4065 	else if(cmd == GF_EOD){
4066 	    html_output_string(hd->html_data, "</thead>");
4067 	}
4068 
4069 	return(1);		/* GET linked */
4070     }
4071 
4072     return(0);		/* don't get linked */
4073 }
4074 
4075 
4076 /*
4077  * HTML Table <TBODY> table body
4078  */
4079 int
html_tbody(HANDLER_S * hd,int ch,int cmd)4080 html_tbody(HANDLER_S *hd, int ch, int cmd)
4081 {
4082     if(PASS_HTML(hd->html_data)){
4083 	if(cmd == GF_DATA){
4084 	    html_handoff(hd, ch);
4085 	}
4086 	else if(cmd == GF_RESET){
4087 	    html_output_raw_tag(hd->html_data, "tbody");
4088 	}
4089 	else if(cmd == GF_EOD){
4090 	    html_output_string(hd->html_data, "</tbody>");
4091 	}
4092 
4093 	return(1);		/* GET linked */
4094     }
4095 
4096     return(0);		/* don't get linked */
4097 }
4098 
4099 
4100 /*
4101  * HTML Table <TFOOT> table body
4102  */
4103 int
html_tfoot(HANDLER_S * hd,int ch,int cmd)4104 html_tfoot(HANDLER_S *hd, int ch, int cmd)
4105 {
4106     if(PASS_HTML(hd->html_data)){
4107 	if(cmd == GF_DATA){
4108 	    html_handoff(hd, ch);
4109 	}
4110 	else if(cmd == GF_RESET){
4111 	    html_output_raw_tag(hd->html_data, "tfoot");
4112 	}
4113 	else if(cmd == GF_EOD){
4114 	    html_output_string(hd->html_data, "</tfoot>");
4115 	}
4116 
4117 	return(1);		/* GET linked */
4118     }
4119 
4120     return(0);		/* don't get linked */
4121 }
4122 
4123 
4124 /*
4125  * HTML <COL> (Table Column Attributes) element handler
4126  */
4127 int
html_col(HANDLER_S * hd,int ch,int cmd)4128 html_col(HANDLER_S *hd, int ch, int cmd)
4129 {
4130     if(cmd == GF_RESET){
4131 	if(PASS_HTML(hd->html_data)){
4132 	    html_output_raw_tag(hd->html_data, "col");
4133 	}
4134     }
4135 
4136     return(0);				/* don't get linked */
4137 }
4138 
4139 
4140 /*
4141  * HTML Table <COLGROUP> table body
4142  */
4143 int
html_colgroup(HANDLER_S * hd,int ch,int cmd)4144 html_colgroup(HANDLER_S *hd, int ch, int cmd)
4145 {
4146     if(PASS_HTML(hd->html_data)){
4147 	if(cmd == GF_DATA){
4148 	    html_handoff(hd, ch);
4149 	}
4150 	else if(cmd == GF_RESET){
4151 	    html_output_raw_tag(hd->html_data, "colgroup");
4152 	}
4153 	else if(cmd == GF_EOD){
4154 	    html_output_string(hd->html_data, "</colgroup>");
4155 	}
4156 
4157 	return(1);		/* GET linked */
4158     }
4159 
4160     return(0);		/* don't get linked */
4161 }
4162 
4163 
4164 /*
4165  * HTML <I> (italic text) element handler
4166  */
4167 int
html_i(HANDLER_S * hd,int ch,int cmd)4168 html_i(HANDLER_S *hd, int ch, int cmd)
4169 {
4170     if(cmd == GF_DATA){
4171 	/* include LITERAL in spaceness test! */
4172 	if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4173 	    HTML_ITALIC(hd->html_data, 1);
4174 	    hd->x = 0;
4175 	}
4176 
4177 	html_handoff(hd, ch);
4178     }
4179     else if(cmd == GF_RESET){
4180 	hd->x = 1;
4181     }
4182     else if(cmd == GF_EOD){
4183 	if(!hd->x)
4184 	  HTML_ITALIC(hd->html_data, 0);
4185     }
4186 
4187     return(1);				/* get linked */
4188 }
4189 
4190 
4191 /*
4192  * HTML <EM> element handler
4193  */
4194 int
html_em(HANDLER_S * hd,int ch,int cmd)4195 html_em(HANDLER_S *hd, int ch, int cmd)
4196 {
4197     if(cmd == GF_DATA){
4198 	if(!PASS_HTML(hd->html_data)){
4199 	    /* include LITERAL in spaceness test! */
4200 	    if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4201 		HTML_ITALIC(hd->html_data, 1);
4202 		hd->x = 0;
4203 	    }
4204 	}
4205 
4206 	html_handoff(hd, ch);
4207     }
4208     else if(cmd == GF_RESET){
4209 	if(PASS_HTML(hd->html_data)){
4210 	    html_output_raw_tag(hd->html_data, "em");
4211 	}
4212 	else{
4213 	    hd->x = 1;
4214 	}
4215     }
4216     else if(cmd == GF_EOD){
4217 	if(PASS_HTML(hd->html_data)){
4218 	    html_output_string(hd->html_data, "</em>");
4219 	}
4220 	else{
4221 	    if(!hd->x)
4222 	      HTML_ITALIC(hd->html_data, 0);
4223 	}
4224     }
4225 
4226     return(1);				/* get linked */
4227 }
4228 
4229 
4230 /*
4231  * HTML <STRONG> element handler
4232  */
4233 int
html_strong(HANDLER_S * hd,int ch,int cmd)4234 html_strong(HANDLER_S *hd, int ch, int cmd)
4235 {
4236     if(cmd == GF_DATA){
4237 	if(!PASS_HTML(hd->html_data)){
4238 	    /* include LITERAL in spaceness test! */
4239 	    if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4240 		HTML_ITALIC(hd->html_data, 1);
4241 		hd->x = 0;
4242 	    }
4243 	}
4244 
4245 	html_handoff(hd, ch);
4246     }
4247     else if(cmd == GF_RESET){
4248 	if(PASS_HTML(hd->html_data)){
4249 	    html_output_raw_tag(hd->html_data, "strong");
4250 	}
4251 	else{
4252 	    hd->x = 1;
4253 	}
4254     }
4255     else if(cmd == GF_EOD){
4256 	if(PASS_HTML(hd->html_data)){
4257 	    html_output_string(hd->html_data, "</strong>");
4258 	}
4259 	else{
4260 	    if(!hd->x)
4261 	      HTML_ITALIC(hd->html_data, 0);
4262 	}
4263     }
4264 
4265     return(1);				/* get linked */
4266 }
4267 
4268 
4269 /*
4270  * HTML <u> (Underline text) element handler
4271  */
4272 int
html_u(HANDLER_S * hd,int ch,int cmd)4273 html_u(HANDLER_S *hd, int ch, int cmd)
4274 {
4275     if(PASS_HTML(hd->html_data)){
4276 	if(cmd == GF_DATA){
4277 	    html_handoff(hd, ch);
4278 	}
4279 	else if(cmd == GF_RESET){
4280 	    html_output_raw_tag(hd->html_data, "u");
4281 	}
4282 	else if(cmd == GF_EOD){
4283 	    html_output_string(hd->html_data, "</u>");
4284 	}
4285 
4286 	return(1);		/* get linked */
4287     }
4288 
4289     return(0);			/* do NOT get linked */
4290 }
4291 
4292 
4293 /*
4294  * HTML <b> (Bold text) element handler
4295  */
4296 int
html_b(HANDLER_S * hd,int ch,int cmd)4297 html_b(HANDLER_S *hd, int ch, int cmd)
4298 {
4299     if(cmd == GF_DATA){
4300 	if(!PASS_HTML(hd->html_data)){
4301 	    /* include LITERAL in spaceness test! */
4302 	    if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4303 		HTML_BOLD(hd->html_data, 1);
4304 		hd->x = 0;
4305 	    }
4306 	}
4307 
4308 	html_handoff(hd, ch);
4309     }
4310     else if(cmd == GF_RESET){
4311 	if(PASS_HTML(hd->html_data)){
4312 	    html_output_raw_tag(hd->html_data, "b");
4313 	}
4314 	else{
4315 	    hd->x = 1;
4316 	}
4317     }
4318     else if(cmd == GF_EOD){
4319 	if(PASS_HTML(hd->html_data)){
4320 	    html_output_string(hd->html_data, "</b>");
4321 	}
4322 	else{
4323 	    if(!hd->x)
4324 	      HTML_BOLD(hd->html_data, 0);
4325 	}
4326     }
4327 
4328     return(1);				/* get linked */
4329 }
4330 
4331 
4332 /*
4333  * HTML <s> (strike-through text) element handler
4334  */
4335 int
html_s(HANDLER_S * hd,int ch,int cmd)4336 html_s(HANDLER_S *hd, int ch, int cmd)
4337 {
4338     if(cmd == GF_DATA){
4339 	if(!PASS_HTML(hd->html_data)){
4340 	    /* include LITERAL in spaceness test! */
4341 	    if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4342 		HTML_STRIKE(hd->html_data, 1);
4343 		hd->x = 0;
4344 	    }
4345 	}
4346 
4347 	html_handoff(hd, ch);
4348     }
4349     else if(cmd == GF_RESET){
4350 	if(PASS_HTML(hd->html_data)){
4351 	    html_output_raw_tag(hd->html_data, "s");
4352 	}
4353 	else{
4354 	    hd->x = 1;
4355 	}
4356     }
4357     else if(cmd == GF_EOD){
4358 	if(PASS_HTML(hd->html_data)){
4359 	    html_output_string(hd->html_data, "</s>");
4360 	}
4361 	else{
4362 	    if(!hd->x)
4363 	      HTML_STRIKE(hd->html_data, 0);
4364 	}
4365     }
4366 
4367     return(1);				/* get linked */
4368 }
4369 
4370 
4371 /*
4372  * HTML <big> (BIG text) element handler
4373  */
4374 int
html_big(HANDLER_S * hd,int ch,int cmd)4375 html_big(HANDLER_S *hd, int ch, int cmd)
4376 {
4377     if(cmd == GF_DATA){
4378 	/* include LITERAL in spaceness test! */
4379 	if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4380 	    HTML_BIG(hd->html_data, 1);
4381 	    hd->x = 0;
4382 	}
4383 
4384 	html_handoff(hd, ch);
4385     }
4386     else if(cmd == GF_RESET){
4387 	hd->x = 1;
4388     }
4389     else if(cmd == GF_EOD){
4390 	if(!hd->x)
4391 	  HTML_BIG(hd->html_data, 0);
4392     }
4393 
4394     return(1);				/* get linked */
4395 }
4396 
4397 
4398 /*
4399  * HTML <small> (SMALL text) element handler
4400  */
4401 int
html_small(HANDLER_S * hd,int ch,int cmd)4402 html_small(HANDLER_S *hd, int ch, int cmd)
4403 {
4404     if(cmd == GF_DATA){
4405 	/* include LITERAL in spaceness test! */
4406 	if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4407 	    HTML_SMALL(hd->html_data, 1);
4408 	    hd->x = 0;
4409 	}
4410 
4411 	html_handoff(hd, ch);
4412     }
4413     else if(cmd == GF_RESET){
4414 	hd->x = 1;
4415     }
4416     else if(cmd == GF_EOD){
4417 	if(!hd->x)
4418 	  HTML_SMALL(hd->html_data, 0);
4419     }
4420 
4421     return(1);				/* get linked */
4422 }
4423 
4424 
4425 /*
4426  * HTML <FONT> element handler
4427  */
4428 int
html_font(HANDLER_S * hd,int ch,int cmd)4429 html_font(HANDLER_S *hd, int ch, int cmd)
4430 {
4431     if(PASS_HTML(hd->html_data)){
4432 	if(cmd == GF_DATA){
4433 	    html_handoff(hd, ch);
4434 	}
4435 	else if(cmd == GF_RESET){
4436 	    html_output_raw_tag(hd->html_data, "font");
4437 	}
4438 	else if(cmd == GF_EOD){
4439 	    html_output_string(hd->html_data, "</font>");
4440 	}
4441 
4442 	return(1);				/* get linked */
4443     }
4444 
4445     return(0);
4446 }
4447 
4448 
4449 /*
4450  * HTML <IMG> element handler
4451  */
4452 int
html_img(HANDLER_S * hd,int ch,int cmd)4453 html_img(HANDLER_S *hd, int ch, int cmd)
4454 {
4455     PARAMETER *p;
4456     char      *alt = NULL, *src = NULL, *s;
4457 
4458     if(cmd == GF_RESET){
4459 	if(PASS_HTML(hd->html_data)){
4460 	    html_output_raw_tag(hd->html_data, "img");
4461 	}
4462 	else{
4463 	    for(p = HD(hd->html_data)->el_data->attribs;
4464 		p && p->attribute;
4465 		p = p->next)
4466 	      if(p->value && p->value[0]){
4467 		  if(!strucmp(p->attribute, "alt"))
4468 		    alt = p->value;
4469 		  if(!strucmp(p->attribute, "src"))
4470 		    src = p->value;
4471 	      }
4472 
4473 	    /*
4474 	     * Multipart/Related Content ID pointer
4475 	     * ONLY attached messages are recognized
4476 	     * if we ever decide web bugs aren't a problem
4477 	     * anymore then we might expand the scope
4478 	     */
4479 	    if(src
4480 	       && DO_HANDLES(hd->html_data)
4481 	       && RELATED_OK(hd->html_data)
4482 	       && struncmp(src, "cid:", 4) == 0){
4483 		char      buf[32];
4484 		int	      i, n;
4485 		HANDLE_S *h = new_handle(HANDLESP(hd->html_data));
4486 
4487 		h->type	 = IMG;
4488 		h->h.img.src = cpystr(src + 4);
4489 		h->h.img.alt = cpystr((alt) ? alt : "Attached Image");
4490 
4491 		HTML_TEXT(hd->html_data, TAG_EMBED);
4492 		HTML_TEXT(hd->html_data, TAG_HANDLE);
4493 
4494 		sprintf(buf, "%d", h->key);
4495 		n = strlen(buf);
4496 		HTML_TEXT(hd->html_data, n);
4497 		for(i = 0; i < n; i++){
4498 		    unsigned int uic = buf[i];
4499 		    HTML_TEXT(hd->html_data, uic);
4500 		}
4501 
4502 		return(0);
4503 	    }
4504 	    else if(alt && strlen(alt) < 256){ /* arbitrary "reasonable" limit */
4505 		HTML_DUMP_LIT(hd->html_data, alt, strlen(alt));
4506 		HTML_TEXT(hd->html_data, ' ');
4507 		return(0);
4508 	    }
4509 	    else if(src
4510 		    && (s = strrindex(src, '/'))
4511 		    && *++s != '\0'){
4512 		HTML_TEXT(hd->html_data, '[');
4513 		HTML_DUMP_LIT(hd->html_data, s, strlen(s));
4514 		HTML_TEXT(hd->html_data, ']');
4515 		HTML_TEXT(hd->html_data, ' ');
4516 		return(0);
4517 	    }
4518 
4519 	    /* text filler of last resort */
4520 	    HTML_DUMP_LIT(hd->html_data, "[IMAGE] ", 7);
4521 	}
4522     }
4523 
4524     return(0);				/* don't get linked */
4525 }
4526 
4527 
4528 /*
4529  * HTML <MAP> (Image Map) element handler
4530  */
4531 int
html_map(HANDLER_S * hd,int ch,int cmd)4532 html_map(HANDLER_S *hd, int ch, int cmd)
4533 {
4534     if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4535 	if(cmd == GF_DATA){
4536 	    html_handoff(hd, ch);
4537 	}
4538 	else if(cmd == GF_RESET){
4539 	    html_output_raw_tag(hd->html_data, "map");
4540 	}
4541 	else if(cmd == GF_EOD){
4542 	    html_output_string(hd->html_data, "</map>");
4543 	}
4544 
4545 	return(1);
4546     }
4547 
4548     return(0);
4549 }
4550 
4551 
4552 /*
4553  * HTML <AREA> (Image Map Area) element handler
4554  */
4555 int
html_area(HANDLER_S * hd,int ch,int cmd)4556 html_area(HANDLER_S *hd, int ch, int cmd)
4557 {
4558     if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4559 	if(cmd == GF_DATA){
4560 	    html_handoff(hd, ch);
4561 	}
4562 	else if(cmd == GF_RESET){
4563 	    html_output_raw_tag(hd->html_data, "area");
4564 	}
4565 	else if(cmd == GF_EOD){
4566 	    html_output_string(hd->html_data, "</area>");
4567 	}
4568 
4569 	return(1);
4570     }
4571 
4572     return(0);
4573 }
4574 
4575 
4576 /*
4577  * HTML <FORM> (Form) element handler
4578  */
4579 int
html_form(HANDLER_S * hd,int ch,int cmd)4580 html_form(HANDLER_S *hd, int ch, int cmd)
4581 {
4582     if(PASS_HTML(hd->html_data)){
4583 	if(cmd == GF_DATA){
4584 	    html_handoff(hd, ch);
4585 	}
4586 	else if(cmd == GF_RESET){
4587 	    PARAMETER **pp;
4588 
4589 	    /* SECURITY: make sure to redirect to new browser instance */
4590 	    for(pp = &(HD(hd->html_data)->el_data->attribs);
4591 		*pp && (*pp)->attribute;
4592 		pp = &(*pp)->next)
4593 	      if(!strucmp((*pp)->attribute, "target")){
4594 		  if((*pp)->value)
4595 		    fs_give((void **) &(*pp)->value);
4596 
4597 		  (*pp)->value = cpystr("_blank");
4598 	      }
4599 
4600 	    if(!*pp){
4601 		*pp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4602 		memset(*pp, 0, sizeof(PARAMETER));
4603 		(*pp)->attribute = cpystr("target");
4604 		(*pp)->value = cpystr("_blank");
4605 	    }
4606 
4607 	    html_output_raw_tag(hd->html_data, "form");
4608 	}
4609 	else if(cmd == GF_EOD){
4610 	    html_output_string(hd->html_data, "</form>");
4611 	}
4612     }
4613     else{
4614 	if(cmd == GF_RESET){
4615 	    html_blank(hd->html_data, 0);
4616 	    HTML_DUMP_LIT(hd->html_data, "[FORM]", 6);
4617 	    html_blank(hd->html_data, 0);
4618 	}
4619     }
4620 
4621     return(PASS_HTML(hd->html_data));		/* maybe get linked */
4622 }
4623 
4624 
4625 /*
4626  * HTML <INPUT> (Form) element handler
4627  */
4628 int
html_input(HANDLER_S * hd,int ch,int cmd)4629 html_input(HANDLER_S *hd, int ch, int cmd)
4630 {
4631     if(PASS_HTML(hd->html_data)){
4632 	if(cmd == GF_RESET){
4633 	    html_output_raw_tag(hd->html_data, "input");
4634 	}
4635     }
4636 
4637     return(0);				/* don't get linked */
4638 }
4639 
4640 
4641 /*
4642  * HTML <BUTTON> (Form) element handler
4643  */
4644 int
html_button(HANDLER_S * hd,int ch,int cmd)4645 html_button(HANDLER_S *hd, int ch, int cmd)
4646 {
4647     if(PASS_HTML(hd->html_data)){
4648 	if(cmd == GF_DATA){
4649 	    html_handoff(hd, ch);
4650 	}
4651 	else if(cmd == GF_RESET){
4652 	    html_output_raw_tag(hd->html_data, "button");
4653 	}
4654 	else if(cmd == GF_EOD){
4655 	    html_output_string(hd->html_data, "</button>");
4656 	}
4657 
4658 	return(1);				/* get linked */
4659     }
4660 
4661     return(0);
4662 }
4663 
4664 
4665 /*
4666  * HTML <OPTION> (Form) element handler
4667  */
4668 int
html_option(HANDLER_S * hd,int ch,int cmd)4669 html_option(HANDLER_S *hd, int ch, int cmd)
4670 {
4671     if(PASS_HTML(hd->html_data)){
4672 	if(cmd == GF_DATA){
4673 	    html_handoff(hd, ch);
4674 	}
4675 	else if(cmd == GF_RESET){
4676 	    html_output_raw_tag(hd->html_data, "option");
4677 	}
4678 	else if(cmd == GF_EOD){
4679 	    html_output_string(hd->html_data, "</option>");
4680 	}
4681 
4682 	return(1);				/* get linked */
4683     }
4684 
4685     return(0);
4686 }
4687 
4688 
4689 /*
4690  * HTML <OPTGROUP> (Form) element handler
4691  */
4692 int
html_optgroup(HANDLER_S * hd,int ch,int cmd)4693 html_optgroup(HANDLER_S *hd, int ch, int cmd)
4694 {
4695     if(PASS_HTML(hd->html_data)){
4696 	if(cmd == GF_DATA){
4697 	    html_handoff(hd, ch);
4698 	}
4699 	else if(cmd == GF_RESET){
4700 	    html_output_raw_tag(hd->html_data, "optgroup");
4701 	}
4702 	else if(cmd == GF_EOD){
4703 	    html_output_string(hd->html_data, "</optgroup>");
4704 	}
4705 
4706 	return(1);				/* get linked */
4707     }
4708 
4709     return(0);
4710 }
4711 
4712 
4713 /*
4714  * HTML <SELECT> (Form) element handler
4715  */
4716 int
html_select(HANDLER_S * hd,int ch,int cmd)4717 html_select(HANDLER_S *hd, int ch, int cmd)
4718 {
4719     if(PASS_HTML(hd->html_data)){
4720 	if(cmd == GF_DATA){
4721 	    html_handoff(hd, ch);
4722 	}
4723 	else if(cmd == GF_RESET){
4724 	    html_output_raw_tag(hd->html_data, "select");
4725 	}
4726 	else if(cmd == GF_EOD){
4727 	    html_output_string(hd->html_data, "</select>");
4728 	}
4729 
4730 	return(1);				/* get linked */
4731     }
4732 
4733     return(0);
4734 }
4735 
4736 
4737 /*
4738  * HTML <TEXTAREA> (Form) element handler
4739  */
4740 int
html_textarea(HANDLER_S * hd,int ch,int cmd)4741 html_textarea(HANDLER_S *hd, int ch, int cmd)
4742 {
4743     if(PASS_HTML(hd->html_data)){
4744 	if(cmd == GF_DATA){
4745 	    html_handoff(hd, ch);
4746 	}
4747 	else if(cmd == GF_RESET){
4748 	    html_output_raw_tag(hd->html_data, "textarea");
4749 	}
4750 	else if(cmd == GF_EOD){
4751 	    html_output_string(hd->html_data, "</textarea>");
4752 	}
4753 
4754 	return(1);				/* get linked */
4755     }
4756 
4757     return(0);
4758 }
4759 
4760 
4761 /*
4762  * HTML <LABEL> (Form) element handler
4763  */
4764 int
html_label(HANDLER_S * hd,int ch,int cmd)4765 html_label(HANDLER_S *hd, int ch, int cmd)
4766 {
4767     if(PASS_HTML(hd->html_data)){
4768 	if(cmd == GF_DATA){
4769 	    html_handoff(hd, ch);
4770 	}
4771 	else if(cmd == GF_RESET){
4772 	    html_output_raw_tag(hd->html_data, "label");
4773 	}
4774 	else if(cmd == GF_EOD){
4775 	    html_output_string(hd->html_data, "</label>");
4776 	}
4777 
4778 	return(1);				/* get linked */
4779     }
4780 
4781     return(0);
4782 }
4783 
4784 
4785 /*
4786  * HTML <FIELDSET> (Form) element handler
4787  */
4788 int
html_fieldset(HANDLER_S * hd,int ch,int cmd)4789 html_fieldset(HANDLER_S *hd, int ch, int cmd)
4790 {
4791     if(PASS_HTML(hd->html_data)){
4792 	if(cmd == GF_DATA){
4793 	    html_handoff(hd, ch);
4794 	}
4795 	else if(cmd == GF_RESET){
4796 	    html_output_raw_tag(hd->html_data, "fieldset");
4797 	}
4798 	else if(cmd == GF_EOD){
4799 	    html_output_string(hd->html_data, "</fieldset>");
4800 	}
4801 
4802 	return(1);				/* get linked */
4803     }
4804 
4805     return(0);
4806 }
4807 
4808 
4809 /*
4810  * HTML <HEAD> element handler
4811  */
4812 int
html_head(HANDLER_S * hd,int ch,int cmd)4813 html_head(HANDLER_S *hd, int ch, int cmd)
4814 {
4815     if(cmd == GF_DATA){
4816 	html_handoff(hd, ch);
4817     }
4818     else if(cmd == GF_RESET){
4819 	HD(hd->html_data)->head = 1;
4820     }
4821     else if(cmd == GF_EOD){
4822 	HD(hd->html_data)->head = 0;
4823     }
4824 
4825     return(1);				/* get linked */
4826 }
4827 
4828 
4829 /*
4830  * HTML <BASE> element handler
4831  */
4832 int
html_base(HANDLER_S * hd,int ch,int cmd)4833 html_base(HANDLER_S *hd, int ch, int cmd)
4834 {
4835     if(cmd == GF_RESET){
4836 	if(HD(hd->html_data)->head && !HTML_BASE(hd->html_data)){
4837 	    PARAMETER *p;
4838 
4839 	    for(p = HD(hd->html_data)->el_data->attribs;
4840 		p && p->attribute && strucmp(p->attribute, "HREF");
4841 		p = p->next)
4842 	      ;
4843 
4844 	    if(p && p->value && !((HTML_OPT_S *)(hd->html_data)->opt)->base)
4845 	      ((HTML_OPT_S *)(hd->html_data)->opt)->base = cpystr(p->value);
4846 	}
4847     }
4848 
4849     return(0);				/* DON'T get linked */
4850 }
4851 
4852 
4853 /*
4854  * HTML <TITLE> element handler
4855  */
4856 int
html_title(HANDLER_S * hd,int ch,int cmd)4857 html_title(HANDLER_S *hd, int ch, int cmd)
4858 {
4859     if(cmd == GF_DATA){
4860 	if(hd->x + 1 >= hd->y){
4861 	    hd->y += 80;
4862 	    fs_resize((void **)&hd->s, (size_t)hd->y * sizeof(unsigned char));
4863 	}
4864 
4865 	hd->s[hd->x++] = (unsigned char) ch;
4866     }
4867     else if(cmd == GF_RESET){
4868 	hd->x = 0L;
4869 	hd->y = 80L;
4870 	hd->s = (unsigned char *)fs_get((size_t)hd->y * sizeof(unsigned char));
4871     }
4872     else if(cmd == GF_EOD){
4873 	/* Down the road we probably want to give these bytes to
4874 	 * someone...
4875 	 */
4876 	hd->s[hd->x] = '\0';
4877 	fs_give((void **)&hd->s);
4878     }
4879 
4880     return(1);				/* get linked */
4881 }
4882 
4883 
4884 /*
4885  * HTML <BODY> element handler
4886  */
4887 int
html_body(HANDLER_S * hd,int ch,int cmd)4888 html_body(HANDLER_S *hd, int ch, int cmd)
4889 {
4890     if(cmd == GF_DATA){
4891 	html_handoff(hd, ch);
4892     }
4893     else if(cmd == GF_RESET){
4894 	if(PASS_HTML(hd->html_data)){
4895 	    PARAMETER  *p, *tp;
4896 	    char      **style = NULL, *text = NULL, *bgcolor = NULL, *pcs;
4897 
4898 	    /* modify any attributes in a useful way? */
4899 	    for(p = HD(hd->html_data)->el_data->attribs;
4900 		p && p->attribute;
4901 		p = p->next)
4902 	      if(p->value){
4903 		  if(!strucmp(p->attribute, "style"))
4904 		    style = &p->value;
4905 		  else if(!strucmp(p->attribute, "text"))
4906 		    text = p->value;
4907 		  /*
4908 		   * bgcolor NOT passed since user setting takes precedence
4909 		   *
4910 		  else if(!strucmp(p->attribute, "bgcolor"))
4911 		    bgcolor = p->value;
4912 		  */
4913 	      }
4914 
4915 	    /* colors pretty much it */
4916 	    if(text || bgcolor){
4917 		if(!style){
4918 		    tp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4919 		    memset(tp, 0, sizeof(PARAMETER));
4920 		    tp->next = HD(hd->html_data)->el_data->attribs;
4921 		    HD(hd->html_data)->el_data->attribs = tp;
4922 		    tp->attribute = cpystr("style");
4923 
4924 		    tmp_20k_buf[0] = '\0';
4925 		    style = &tp->value;
4926 		    pcs = "%s%s%s%s%s";
4927 		}
4928 		else{
4929 		    snprintf(tmp_20k_buf, SIZEOF_20KBUF, "%s", *style);
4930 		    fs_give((void **) style);
4931 		    pcs = "; %s%s%s%s%s";
4932 		}
4933 
4934 		snprintf(tmp_20k_buf + strlen(tmp_20k_buf),
4935 			 SIZEOF_20KBUF - strlen(tmp_20k_buf),
4936 			 pcs,
4937 			 (text) ? "color: " : "", (text) ? text : "",
4938 			 (text && bgcolor) ? ";" : "",
4939 			 (bgcolor) ? "background-color: " : "", (bgcolor) ? bgcolor : "");
4940 		*style = cpystr(tmp_20k_buf);
4941 	    }
4942 
4943 	    html_output_raw_tag(hd->html_data, "div");
4944 	}
4945 
4946 	HD(hd->html_data)->body = 1;
4947     }
4948     else if(cmd == GF_EOD){
4949 	if(PASS_HTML(hd->html_data)){
4950 	    html_output_string(hd->html_data, "</div>");
4951 	}
4952 
4953 	HD(hd->html_data)->body = 0;
4954     }
4955 
4956     return(1);				/* get linked */
4957 }
4958 
4959 
4960 /*
4961  * HTML <A> (Anchor) element handler
4962  */
4963 int
html_a(HANDLER_S * hd,int ch,int cmd)4964 html_a(HANDLER_S *hd, int ch, int cmd)
4965 {
4966     if(cmd == GF_DATA){
4967 	html_handoff(hd, ch);
4968 
4969 	if(hd->dp)		/* remember text within anchor tags */
4970 	  so_writec(ch, (STORE_S *) hd->dp);
4971     }
4972     else if(cmd == GF_RESET){
4973 	int	   i, n, x;
4974 	char	   buf[256];
4975 	HANDLE_S  *h;
4976 	PARAMETER *p, *href = NULL, *name = NULL;
4977 
4978 	/*
4979 	 * Pending Anchor!?!?
4980 	 * space insertion/line breaking that's yet to get done...
4981 	 */
4982 	if(HD(hd->html_data)->prefix){
4983 	    dprint((2, "-- html error: nested or unterminated anchor\n"));
4984 	    html_a_finish(hd);
4985 	}
4986 
4987 	/*
4988 	 * Look for valid Anchor data vis the filter installer's parms
4989 	 * (e.g., Only allow references to our internal URLs if asked)
4990 	 */
4991 	for(p = HD(hd->html_data)->el_data->attribs;
4992 	    p && p->attribute;
4993 	    p = p->next)
4994 	  if(!strucmp(p->attribute, "HREF")
4995 	     && p->value
4996 	     && (HANDLES_LOC(hd->html_data)
4997 		 || struncmp(p->value, "x-alpine-", 9)
4998 		 || struncmp(p->value, "x-pine-help", 11)
4999 		 || p->value[0] == '#'))
5000 	    href = p;
5001 	  else if(!strucmp(p->attribute, "NAME"))
5002 	    name = p;
5003 
5004 	if(DO_HANDLES(hd->html_data) && (href || name)){
5005 	    h = new_handle(HANDLESP(hd->html_data));
5006 
5007 	    /*
5008 	     * Enhancement: we might want to get fancier and parse the
5009 	     * href a bit further such that we can launch images using
5010 	     * our image viewer, or browse local files or directories
5011 	     * with our internal tools.  Of course, having the jump-off
5012 	     * point into text/html always be the defined "web-browser",
5013 	     * just might be the least confusing UI-wise...
5014 	     */
5015 	    h->type = URL;
5016 
5017 	    if(name && name->value)
5018 	      h->h.url.name = cpystr(name->value);
5019 
5020 	    /*
5021 	     * Prepare to build embedded prefix...
5022 	     */
5023 	    HD(hd->html_data)->prefix = (int *) fs_get(64 * sizeof(int));
5024 	    x = 0;
5025 
5026 	    /*
5027 	     * Is this something that looks like a URL?  If not and
5028 	     * we were giving some "base" string, proceed ala RFC1808...
5029 	     */
5030 	    if(href){
5031 		if(href->value)
5032 		   removing_leading_and_trailing_white_space(href->value);
5033 		if(HTML_BASE(hd->html_data) && !rfc1738_scan(href->value, &n)){
5034 		    html_a_relative(HTML_BASE(hd->html_data), href->value, h);
5035 		}
5036 		else if(!(NO_RELATIVE(hd->html_data) && html_href_relative(href->value)))
5037 		  h->h.url.path = cpystr(href->value);
5038 
5039 		if(pico_usingcolor()){
5040 		    char *fg = NULL, *bg = NULL, *q;
5041 
5042 		    if(ps_global->VAR_SLCTBL_FORE_COLOR
5043 		       && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5044 				   ps_global->VAR_NORM_FORE_COLOR))
5045 		      fg = ps_global->VAR_SLCTBL_FORE_COLOR;
5046 
5047 		    if(ps_global->VAR_SLCTBL_BACK_COLOR
5048 		       && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5049 				   ps_global->VAR_NORM_BACK_COLOR))
5050 		      bg = ps_global->VAR_SLCTBL_BACK_COLOR;
5051 
5052 		    if(fg || bg){
5053 			COLOR_PAIR *tmp;
5054 
5055 			/*
5056 			 * The blacks are just known good colors for testing
5057 			 * whether the other color is good.
5058 			 */
5059 			tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5060 					     bg ? bg : colorx(COL_BLACK));
5061 			if(pico_is_good_colorpair(tmp)){
5062 			    q = color_embed(fg, bg);
5063 
5064 			    for(i = 0; q[i]; i++)
5065 			      HD(hd->html_data)->prefix[x++] = q[i];
5066 			}
5067 
5068 			if(tmp)
5069 			  free_color_pair(&tmp);
5070 		    }
5071 
5072 		    if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5073 		      HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5074 		}
5075 		else
5076 		  HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5077 	    }
5078 
5079 	    HD(hd->html_data)->prefix[x++] = TAG_EMBED;
5080 	    HD(hd->html_data)->prefix[x++] = TAG_HANDLE;
5081 
5082 	    snprintf(buf, sizeof(buf), "%ld", hd->x = h->key);
5083 	    HD(hd->html_data)->prefix[x++] = n = strlen(buf);
5084 	    for(i = 0; i < n; i++)
5085 	      HD(hd->html_data)->prefix[x++] = buf[i];
5086 
5087 	    HD(hd->html_data)->prefix_used = x;
5088 
5089 	    hd->dp = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
5090 	}
5091     }
5092     else if(cmd == GF_EOD){
5093 	html_a_finish(hd);
5094     }
5095 
5096     return(1);				/* get linked */
5097 }
5098 
5099 
5100 void
html_a_prefix(FILTER_S * f)5101 html_a_prefix(FILTER_S *f)
5102 {
5103     int *prefix, n;
5104 
5105     /* Do this so we don't visit from html_output... */
5106     prefix = HD(f)->prefix;
5107     HD(f)->prefix = NULL;
5108 
5109     for(n = 0; n < HD(f)->prefix_used; n++)
5110       html_a_output_prefix(f, prefix[n]);
5111 
5112     fs_give((void **) &prefix);
5113 }
5114 
5115 
5116 /*
5117  * html_a_finish - house keeping associated with end of link tag
5118  */
5119 void
html_a_finish(HANDLER_S * hd)5120 html_a_finish(HANDLER_S *hd)
5121 {
5122     if(DO_HANDLES(hd->html_data)){
5123 	if(HD(hd->html_data)->prefix){
5124 	    if(!PASS_HTML(hd->html_data)){
5125 		char *empty_link = "[LINK]";
5126 		int   i;
5127 
5128 		html_a_prefix(hd->html_data);
5129 		for(i = 0; empty_link[i]; i++)
5130 		  html_output(hd->html_data, empty_link[i]);
5131 	    }
5132 	}
5133 
5134 	if(pico_usingcolor()){
5135 	    char *fg = NULL, *bg = NULL, *p;
5136 	    int   i;
5137 
5138 	    if(ps_global->VAR_SLCTBL_FORE_COLOR
5139 	       && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5140 			   ps_global->VAR_NORM_FORE_COLOR))
5141 	      fg = ps_global->VAR_NORM_FORE_COLOR;
5142 
5143 	    if(ps_global->VAR_SLCTBL_BACK_COLOR
5144 	       && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5145 			   ps_global->VAR_NORM_BACK_COLOR))
5146 	      bg = ps_global->VAR_NORM_BACK_COLOR;
5147 
5148 	    if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5149 	      HTML_BOLD(hd->html_data, 0);	/* turn OFF bold */
5150 
5151 	    if(fg || bg){
5152 		COLOR_PAIR *tmp;
5153 
5154 		/*
5155 		 * The blacks are just known good colors for testing
5156 		 * whether the other color is good.
5157 		 */
5158 		tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5159 				     bg ? bg : colorx(COL_BLACK));
5160 		if(pico_is_good_colorpair(tmp)){
5161 		    p = color_embed(fg, bg);
5162 
5163 		    for(i = 0; p[i]; i++)
5164 		      html_output(hd->html_data, p[i]);
5165 		}
5166 
5167 		if(tmp)
5168 		  free_color_pair(&tmp);
5169 	    }
5170 	}
5171 	else
5172 	  HTML_BOLD(hd->html_data, 0);	/* turn OFF bold */
5173 
5174 	html_output(hd->html_data, TAG_EMBED);
5175 	html_output(hd->html_data, TAG_HANDLEOFF);
5176 
5177 	html_a_output_info(hd);
5178     }
5179 }
5180 
5181 
5182 /*
5183  * html_output_a_prefix - dump Anchor prefix data
5184  */
5185 void
html_a_output_prefix(FILTER_S * f,int c)5186 html_a_output_prefix(FILTER_S *f, int c)
5187 {
5188     switch(c){
5189       case HTML_DOBOLD :
5190 	HTML_BOLD(f, 1);
5191 	break;
5192 
5193       default :
5194 	html_output(f, c);
5195 	break;
5196     }
5197 }
5198 
5199 
5200 
5201 /*
5202  * html_a_output_info - dump possibly deceptive link info into text.
5203  *                      phark the phishers.
5204  */
5205 void
html_a_output_info(HANDLER_S * hd)5206 html_a_output_info(HANDLER_S *hd)
5207 {
5208     int	      l, risky = 0, hl = 0, tl;
5209     char     *url = NULL, *hn = NULL, *txt;
5210     HANDLE_S *h;
5211 
5212     /* find host anchor references */
5213     if((h = get_handle(*HANDLESP(hd->html_data), (int) hd->x)) != NULL
5214        && h->h.url.path != NULL
5215        && (hn = rfc1738_scan(rfc1738_str(url = cpystr(h->h.url.path)), &l)) != NULL
5216        && (hn = srchstr(hn,"://")) != NULL){
5217 
5218 	for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++)
5219 	  ;
5220     }
5221 
5222     if(hn && hl){
5223 	/*
5224 	 * look over anchor's text to see if there's a
5225 	 * mismatch between href target and url-ish
5226 	 * looking text.  throw a red flag if so.
5227 	 * similarly, toss one if the target's referenced
5228 	 * by a
5229 	 */
5230 	if(hd->dp){
5231 	    so_writec('\0', (STORE_S *) hd->dp);
5232 
5233 	    if((txt = (char *) so_text((STORE_S *) hd->dp)) != NULL
5234 	       && (txt = rfc1738_scan(txt, &tl)) != NULL
5235 	       && (txt = srchstr(txt,"://")) != NULL){
5236 
5237 		for(txt += 3, tl = 0; txt[tl] && txt[tl] != '/' && txt[tl] != '?'; tl++)
5238 		  ;
5239 
5240 		if(tl != hl)
5241 		  risky++;
5242 		else
5243 		  /* look for non matching text */
5244 		  for(l = 0; l < tl && l < hl; l++)
5245 		    if(tolower((unsigned char) txt[l]) != tolower((unsigned char) hn[l])){
5246 			risky++;
5247 			break;
5248 		    }
5249 	    }
5250 
5251 	    so_give((STORE_S **) &hd->dp);
5252 	}
5253 
5254 	/* look for literal IP, anything possibly encoded or auth specifier */
5255 	if(!risky){
5256 	    int digits = 1;
5257 
5258 	    for(l = 0; l < hl; l++){
5259 		if(hn[l] == '@' || hn[l] == '%'){
5260 		    risky++;
5261 		    break;
5262 		}
5263 		else if(!(hn[l] == '.' || isdigit((unsigned char) hn[l])))
5264 		  digits = 0;
5265 	    }
5266 
5267 	    if(digits)
5268 	      risky++;
5269 	}
5270 
5271 	/* Insert text of link's domain */
5272 	if(SHOWSERVER(hd->html_data)){
5273 	    char *q;
5274 	    COLOR_PAIR *col = NULL, *colnorm = NULL;
5275 
5276 	    html_output(hd->html_data, ' ');
5277 	    html_output(hd->html_data, '[');
5278 
5279 	    if(pico_usingcolor()
5280 	       && ps_global->VAR_METAMSG_FORE_COLOR
5281 	       && ps_global->VAR_METAMSG_BACK_COLOR
5282 	       && (col = new_color_pair(ps_global->VAR_METAMSG_FORE_COLOR,
5283 					ps_global->VAR_METAMSG_BACK_COLOR))){
5284 		if(!pico_is_good_colorpair(col))
5285 		  free_color_pair(&col);
5286 
5287 		if(col){
5288 		    q = color_embed(col->fg, col->bg);
5289 
5290 		    for(l = 0; q[l]; l++)
5291 		      html_output(hd->html_data, q[l]);
5292 		}
5293 	    }
5294 
5295 	    for(l = 0; l < hl; l++)
5296 	      html_output(hd->html_data, hn[l]);
5297 
5298 	    if(col){
5299 		if(ps_global->VAR_NORM_FORE_COLOR
5300 		   && ps_global->VAR_NORM_BACK_COLOR
5301 		   && (colnorm = new_color_pair(ps_global->VAR_NORM_FORE_COLOR,
5302 						ps_global->VAR_NORM_BACK_COLOR))){
5303 		    if(!pico_is_good_colorpair(colnorm))
5304 		      free_color_pair(&colnorm);
5305 
5306 		    if(colnorm){
5307 			q = color_embed(colnorm->fg, colnorm->bg);
5308 			free_color_pair(&colnorm);
5309 
5310 			for(l = 0; q[l]; l++)
5311 			  html_output(hd->html_data, q[l]);
5312 		    }
5313 		}
5314 
5315 		free_color_pair(&col);
5316 	    }
5317 
5318 	    html_output(hd->html_data, ']');
5319 	}
5320     }
5321 
5322     /*
5323      * if things look OK so far, make sure nothing within
5324      * the url looks too fishy...
5325      */
5326     while(!risky && hn
5327 	  && (hn = rfc1738_scan(hn, &l)) != NULL
5328 	  && (hn = srchstr(hn,"://")) != NULL){
5329 	int digits = 1;
5330 
5331 	for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++){
5332 	    /*
5333 	     * auth spec, encoded characters, or possibly non-standard port
5334 	     * should raise a red flag
5335 	     */
5336 	    if(hn[hl] == '@' || hn[hl] == '%' || hn[hl] == ':'){
5337 		risky++;
5338 		break;
5339 	    }
5340 	    else if(!(hn[hl] == '.' || isdigit((unsigned char) hn[hl])))
5341 	      digits = 0;
5342 	}
5343 
5344 	/* dotted-dec/raw-int address should cause suspicion as well */
5345 	if(digits)
5346 	  risky++;
5347     }
5348 
5349     if(risky && ((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)
5350       (*((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)();
5351 
5352     if(hd->dp)
5353        so_give((STORE_S **) &hd->dp);
5354 
5355 
5356     fs_give((void **) &url);
5357 }
5358 
5359 
5360 
5361 /*
5362  * relative_url - put full url path in h based on base and relative url
5363  */
5364 void
html_a_relative(char * base_url,char * rel_url,HANDLE_S * h)5365 html_a_relative(char *base_url, char *rel_url, HANDLE_S *h)
5366 {
5367     size_t  len;
5368     char    tmp[MAILTMPLEN], *p, *q;
5369     char   *scheme = NULL, *net = NULL, *path = NULL,
5370 	   *parms = NULL, *query = NULL, *frag = NULL,
5371 	   *base_scheme = NULL, *base_net_loc = NULL,
5372 	   *base_path = NULL, *base_parms = NULL,
5373 	   *base_query = NULL, *base_frag = NULL,
5374 	   *rel_scheme = NULL, *rel_net_loc = NULL,
5375 	   *rel_path = NULL, *rel_parms = NULL,
5376 	   *rel_query = NULL, *rel_frag = NULL;
5377 
5378     /* Rough parse of base URL */
5379     rfc1808_tokens(base_url, &base_scheme, &base_net_loc, &base_path,
5380 		   &base_parms, &base_query, &base_frag);
5381 
5382     /* Rough parse of this URL */
5383     rfc1808_tokens(rel_url, &rel_scheme, &rel_net_loc, &rel_path,
5384 		   &rel_parms, &rel_query, &rel_frag);
5385 
5386     scheme = rel_scheme;	/* defaults */
5387     net    = rel_net_loc;
5388     path   = rel_path;
5389     parms  = rel_parms;
5390     query  = rel_query;
5391     frag   = rel_frag;
5392     if(!scheme && base_scheme){
5393 	scheme = base_scheme;
5394 	if(!net){
5395 	    net = base_net_loc;
5396 	    if(path){
5397 		if(*path != '/'){
5398 		    if(base_path){
5399 			for(p = q = base_path;	/* Drop base path's tail */
5400 			    (p = strchr(p, '/'));
5401 			    q = ++p)
5402 			  ;
5403 
5404 			len = q - base_path;
5405 		    }
5406 		    else
5407 		      len = 0;
5408 
5409 		    if(len + strlen(rel_path) < sizeof(tmp)-1){
5410 			if(len)
5411 			  snprintf(path = tmp, sizeof(tmp), "%.*s", (int) len, base_path);
5412 
5413 			strncpy(tmp + len, rel_path, sizeof(tmp)-len);
5414 			tmp[sizeof(tmp)-1] = '\0';
5415 
5416 			/* Follow RFC 1808 "Step 6" */
5417 			for(p = tmp; (p = strchr(p, '.')); )
5418 			  switch(*(p+1)){
5419 			      /*
5420 			       * a) All occurrences of "./", where "." is a
5421 			       *    complete path segment, are removed.
5422 			       */
5423 			    case '/' :
5424 			      if(p > tmp)
5425 				for(q = p; (*q = *(q+2)) != '\0'; q++)
5426 				  ;
5427 			      else
5428 				p++;
5429 
5430 			      break;
5431 
5432 			      /*
5433 			       * b) If the path ends with "." as a
5434 			       *    complete path segment, that "." is
5435 			       *    removed.
5436 			       */
5437 			    case '\0' :
5438 			      if(p == tmp || *(p-1) == '/')
5439 				*p = '\0';
5440 			      else
5441 				p++;
5442 
5443 			      break;
5444 
5445 			      /*
5446 			       * c) All occurrences of "<segment>/../",
5447 			       *    where <segment> is a complete path
5448 			       *    segment not equal to "..", are removed.
5449 			       *    Removal of these path segments is
5450 			       *    performed iteratively, removing the
5451 			       *    leftmost matching pattern on each
5452 			       *    iteration, until no matching pattern
5453 			       *    remains.
5454 			       *
5455 			       * d) If the path ends with "<segment>/..",
5456 			       *    where <segment> is a complete path
5457 			       *    segment not equal to "..", that
5458 			       *    "<segment>/.." is removed.
5459 			       */
5460 			    case '.' :
5461 			      if(p > tmp + 1){
5462 				  for(q = p - 2; q > tmp && *q != '/'; q--)
5463 				    ;
5464 
5465 				  if(*q == '/')
5466 				    q++;
5467 
5468 				  if(q + 1 == p		/* no "//.." */
5469 				     || (*q == '.'	/* and "../.." */
5470 					 && *(q+1) == '.'
5471 					 && *(q+2) == '/')){
5472 				      p += 2;
5473 				      break;
5474 				  }
5475 
5476 				  switch(*(p+2)){
5477 				    case '/' :
5478 				      len = (p - q) + 3;
5479 				      p = q;
5480 				      for(; (*q = *(q+len)) != '\0'; q++)
5481 					;
5482 
5483 				      break;
5484 
5485 				    case '\0':
5486 				      *(p = q) = '\0';
5487 				      break;
5488 
5489 				    default:
5490 				      p += 2;
5491 				      break;
5492 				  }
5493 			      }
5494 			      else
5495 				p += 2;
5496 
5497 			      break;
5498 
5499 			    default :
5500 			      p++;
5501 			      break;
5502 			  }
5503 		    }
5504 		    else
5505 		      path = "";		/* lame. */
5506 		}
5507 	    }
5508 	    else{
5509 		path = base_path;
5510 		if(!parms){
5511 		    parms = base_parms;
5512 		    if(!query)
5513 		      query = base_query;
5514 		}
5515 	    }
5516 	}
5517     }
5518 
5519     len = (scheme ? strlen(scheme) : 0) + (net ? strlen(net) : 0)
5520 	  + (path ? strlen(path) : 0) + (parms ? strlen(parms) : 0)
5521 	  + (query ? strlen(query) : 0) + (frag  ? strlen(frag ) : 0) + 8;
5522 
5523     h->h.url.path = (char *) fs_get(len * sizeof(char));
5524     snprintf(h->h.url.path, len, "%s%s%s%s%s%s%s%s%s%s%s%s",
5525 	    scheme ? scheme : "", scheme ? ":" : "",
5526 	    net ? "//" : "", net ? net : "",
5527 	    (path && *path == '/') ? "" : ((path && net) ? "/" : ""),
5528 	    path ? path : "",
5529 	    parms ? ";" : "", parms ? parms : "",
5530 	    query ? "?" : "", query ? query : "",
5531 	    frag ? "#" : "", frag ? frag : "");
5532 
5533     if(base_scheme)
5534       fs_give((void **) &base_scheme);
5535 
5536     if(base_net_loc)
5537       fs_give((void **) &base_net_loc);
5538 
5539     if(base_path)
5540       fs_give((void **) &base_path);
5541 
5542     if(base_parms)
5543       fs_give((void **) &base_parms);
5544 
5545     if(base_query)
5546       fs_give((void **) &base_query);
5547 
5548     if(base_frag)
5549       fs_give((void **) &base_frag);
5550 
5551     if(rel_scheme)
5552       fs_give((void **) &rel_scheme);
5553 
5554     if(rel_net_loc)
5555       fs_give((void **) &rel_net_loc);
5556 
5557     if(rel_parms)
5558       fs_give((void **) &rel_parms);
5559 
5560     if(rel_query)
5561       fs_give((void **) &rel_query);
5562 
5563     if(rel_frag)
5564       fs_give((void **) &rel_frag);
5565 
5566     if(rel_path)
5567       fs_give((void **) &rel_path);
5568 }
5569 
5570 
5571 /*
5572  * html_href_relative - href
5573  */
5574 int
html_href_relative(char * url)5575 html_href_relative(char *url)
5576 {
5577     int i;
5578 
5579     if(url)
5580       for(i = 0; i < 32 && url[i]; i++)
5581 	if(!(isalpha((unsigned char) url[i]) || url[i] == '_' || url[i] == '-')){
5582 	  if(url[i] == ':')
5583 	    return(FALSE);
5584 	  else
5585 	    break;
5586 	}
5587 
5588     return(TRUE);
5589 }
5590 
5591 
5592 /*
5593  * HTML <UL> (Unordered List) element handler
5594  */
5595 int
html_ul(HANDLER_S * hd,int ch,int cmd)5596 html_ul(HANDLER_S *hd, int ch, int cmd)
5597 {
5598     if(cmd == GF_DATA){
5599 	html_handoff(hd, ch);
5600     }
5601     else if(cmd == GF_RESET){
5602 	if(PASS_HTML(hd->html_data)){
5603 	    html_output_raw_tag(hd->html_data, "ul");
5604 	}
5605 	else{
5606 	    HD(hd->html_data)->li_pending = 1;
5607 	    html_blank(hd->html_data, 0);
5608 	}
5609     }
5610     else if(cmd == GF_EOD){
5611 	if(PASS_HTML(hd->html_data)){
5612 	    html_output_string(hd->html_data, "</ul>");
5613 	}
5614 	else{
5615 	    html_blank(hd->html_data, 0);
5616 
5617 	    if(!HD(hd->html_data)->li_pending)
5618 	      html_indent(hd->html_data, -4, HTML_ID_INC);
5619 	    else
5620 	      HD(hd->html_data)->li_pending = 0;
5621 	}
5622     }
5623 
5624     return(1);				/* get linked */
5625 }
5626 
5627 
5628 /*
5629  * HTML <OL> (Ordered List) element handler
5630  */
5631 int
html_ol(HANDLER_S * hd,int ch,int cmd)5632 html_ol(HANDLER_S *hd, int ch, int cmd)
5633 {
5634     if(cmd == GF_DATA){
5635 	html_handoff(hd, ch);
5636     }
5637     else if(cmd == GF_RESET){
5638 	if(PASS_HTML(hd->html_data)){
5639 	    html_output_raw_tag(hd->html_data, "ol");
5640 	}
5641 	else{
5642 	    PARAMETER *p;
5643 	    /*
5644 	     * Signal that we're expecting to see <LI> as our next element
5645 	     * and set the the initial ordered count.
5646 	     */
5647 	    hd->x = 1L;			/* set default */
5648 	    hd->y = LIST_DECIMAL;	/* set default */
5649 	    for(p = HD(hd->html_data)->el_data->attribs;
5650 		p && p->attribute;
5651 		p = p->next)
5652 	    if(p->value){
5653 		if(!strucmp(p->attribute, "TYPE")){
5654 		   if(!strucmp(p->value, "a"))	/* alpha, lowercase */
5655 		      hd->y = LIST_ALPHALO;
5656 		   else if(!strucmp(p->value, "A"))	/* alpha, uppercase */
5657 		      hd->y = LIST_ALPHAUP;
5658 		   else if(!strucmp(p->value, "i"))	/* roman, lowercase */
5659 		      hd->y = LIST_ROMANLO;
5660 		   else if(!strucmp(p->value, "I"))	/* roman, uppercase */
5661 		      hd->y = LIST_ROMANUP;
5662 		   else if(strucmp(p->value, "1"))	/* decimal, the default */
5663 		      hd->y = LIST_UNKNOWN;
5664 		}
5665 		else if(!strucmp(p->attribute, "START"))
5666 			hd->x = atol(p->value);
5667 //		else ADD SUPPORT FOR OTHER ATTRIBUTES... LATER
5668 //		this is not so simple. The main missing support
5669 //		is for the STYLE attribute, but implementing that
5670 //		correctly will take time, so will be implemented
5671 //		after version 2.21 is released.
5672 	    }
5673 	    HD(hd->html_data)->li_pending = 1;
5674 	    html_blank(hd->html_data, 0);
5675 	}
5676     }
5677     else if(cmd == GF_EOD){
5678 	if(PASS_HTML(hd->html_data)){
5679 	    html_output_string(hd->html_data, "</ol>");
5680 	}
5681 	else{
5682 	    html_blank(hd->html_data, 0);
5683 
5684 	    if(!HD(hd->html_data)->li_pending)
5685 	      html_indent(hd->html_data, -4, HTML_ID_INC);
5686 	    else
5687 	      HD(hd->html_data)->li_pending = 0;
5688 	}
5689     }
5690 
5691     return(1);				/* get linked */
5692 }
5693 
5694 
5695 /*
5696  * HTML <MENU> (Menu List) element handler
5697  */
5698 int
html_menu(HANDLER_S * hd,int ch,int cmd)5699 html_menu(HANDLER_S *hd, int ch, int cmd)
5700 {
5701     if(cmd == GF_DATA){
5702 	html_handoff(hd, ch);
5703     }
5704     else if(cmd == GF_RESET){
5705 	if(PASS_HTML(hd->html_data)){
5706 	    html_output_raw_tag(hd->html_data, "menu");
5707 	}
5708 	else{
5709 	    HD(hd->html_data)->li_pending = 1;
5710 	}
5711     }
5712     else if(cmd == GF_EOD){
5713 	if(PASS_HTML(hd->html_data)){
5714 	    html_output_string(hd->html_data, "</menu>");
5715 	}
5716 	else{
5717 	    html_blank(hd->html_data, 0);
5718 
5719 	    if(!HD(hd->html_data)->li_pending)
5720 	      html_indent(hd->html_data, -4, HTML_ID_INC);
5721 	    else
5722 	      HD(hd->html_data)->li_pending = 0;
5723 	}
5724     }
5725 
5726     return(1);				/* get linked */
5727 }
5728 
5729 
5730 /*
5731  * HTML <DIR> (Directory List) element handler
5732  */
5733 int
html_dir(HANDLER_S * hd,int ch,int cmd)5734 html_dir(HANDLER_S *hd, int ch, int cmd)
5735 {
5736     if(cmd == GF_DATA){
5737 	html_handoff(hd, ch);
5738     }
5739     else if(cmd == GF_RESET){
5740 	if(PASS_HTML(hd->html_data)){
5741 	    html_output_raw_tag(hd->html_data, "dir");
5742 	}
5743 	else{
5744 	    HD(hd->html_data)->li_pending = 1;
5745 	}
5746     }
5747     else if(cmd == GF_EOD){
5748 	if(PASS_HTML(hd->html_data)){
5749 	    html_output_string(hd->html_data, "</dir>");
5750 	}
5751 	else{
5752 	    html_blank(hd->html_data, 0);
5753 
5754 	    if(!HD(hd->html_data)->li_pending)
5755 	      html_indent(hd->html_data, -4, HTML_ID_INC);
5756 	    else
5757 	      HD(hd->html_data)->li_pending = 0;
5758 	}
5759     }
5760 
5761     return(1);				/* get linked */
5762 }
5763 
5764 
5765 /*
5766  * HTML <LI> (List Item) element handler
5767  */
5768 int
html_li(HANDLER_S * hd,int ch,int cmd)5769 html_li(HANDLER_S *hd, int ch, int cmd)
5770 {
5771     if(cmd == GF_DATA){
5772 	if(PASS_HTML(hd->html_data)){
5773 	    html_handoff(hd, ch);
5774 	}
5775     }
5776     else if(cmd == GF_RESET){
5777 	HANDLER_S *p, *found = NULL;
5778 
5779 	/*
5780 	 * There better be a an unordered list, ordered list,
5781 	 * Menu or Directory handler installed
5782 	 * or else we crap out...
5783 	 */
5784 	for(p = HANDLERS(hd->html_data); p; p = p->below)
5785 	  if(EL(p)->handler == html_ul
5786 	     || EL(p)->handler == html_ol
5787 	     || EL(p)->handler == html_menu
5788 	     || EL(p)->handler == html_dir){
5789 	      found = p;
5790 	      break;
5791 	  }
5792 
5793 	if(found){
5794 	    if(PASS_HTML(hd->html_data)){
5795 	    }
5796 	    else{
5797 		char buf[20], tmp[16], *p;
5798 		int  wrapstate;
5799 
5800 		/* Start a new line */
5801 		html_blank(hd->html_data, 0);
5802 
5803 		/* adjust indent level if needed */
5804 		if(HD(hd->html_data)->li_pending){
5805 		    html_indent(hd->html_data, 4, HTML_ID_INC);
5806 		    HD(hd->html_data)->li_pending = 0;
5807 		}
5808 
5809 		if(EL(found)->handler == html_ul){
5810 		    int l = html_indent(hd->html_data, 0, HTML_ID_GET);
5811 
5812 		    strncpy(buf, "   ", sizeof(buf));
5813 		    buf[1] = (l < 5) ? '*' : (l < 9) ? '+' : (l < 17) ? 'o' : '#';
5814 		}
5815 		else if(EL(found)->handler == html_ol){
5816 		  if(found->y == LIST_DECIMAL || found->y == LIST_UNKNOWN)
5817 		    snprintf(tmp, sizeof(tmp), "%ld", found->x++);
5818 		  else if(found->y == LIST_ALPHALO)
5819 		    convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'a');
5820 		  else if(found->y == LIST_ALPHAUP)
5821 		    convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'A');
5822 		  else if(found->y == LIST_ROMANLO)
5823 		    convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'i');
5824 		  else if(found->y == LIST_ROMANUP)
5825 		    convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'I');
5826 		  snprintf(buf, sizeof(buf), " %s.", tmp);
5827 		  buf[sizeof(buf)-1] = '\0';
5828 		}
5829 		else if(EL(found)->handler == html_menu){
5830 		    strncpy(buf, " ->", sizeof(buf));
5831 		    buf[sizeof(buf)-1] = '\0';
5832 		}
5833 
5834 		html_indent(hd->html_data, -4, HTML_ID_INC);
5835 
5836 		/* So we don't munge whitespace */
5837 		wrapstate = HD(hd->html_data)->wrapstate;
5838 		HD(hd->html_data)->wrapstate = 0;
5839 
5840 		html_write_indent(hd->html_data, HD(hd->html_data)->indent_level);
5841 		for(p = buf; *p; p++)
5842 		  html_output(hd->html_data, (int) *p);
5843 		HD(hd->html_data)->wrapstate = wrapstate;
5844 		html_indent(hd->html_data, 4, HTML_ID_INC);
5845 	    }
5846 	    /* else BUG: should really bitch about this */
5847 	}
5848 
5849 	if(PASS_HTML(hd->html_data)){
5850 	    html_output_raw_tag(hd->html_data, "li");
5851 	    return(1);				/* get linked */
5852 	}
5853     }
5854     else if(cmd == GF_EOD){
5855 	if(PASS_HTML(hd->html_data)){
5856 	    html_output_string(hd->html_data, "</li>");
5857 	}
5858     }
5859 
5860     return(PASS_HTML(hd->html_data));	/* DON'T get linked */
5861 }
5862 
5863 
5864 /*
5865  * HTML <DL> (Definition List) element handler
5866  */
5867 int
html_dl(HANDLER_S * hd,int ch,int cmd)5868 html_dl(HANDLER_S *hd, int ch, int cmd)
5869 {
5870     if(cmd == GF_DATA){
5871 	html_handoff(hd, ch);
5872     }
5873     else if(cmd == GF_RESET){
5874 	if(PASS_HTML(hd->html_data)){
5875 	    html_output_raw_tag(hd->html_data, "dl");
5876 	}
5877 	else{
5878 	    /*
5879 	     * Set indention level for definition terms and definitions...
5880 	     */
5881 	    hd->x = html_indent(hd->html_data, 0, HTML_ID_GET);
5882 	    hd->y = hd->x + 2;
5883 	    hd->z = hd->y + 4;
5884 	}
5885     }
5886     else if(cmd == GF_EOD){
5887 	if(PASS_HTML(hd->html_data)){
5888 	    html_output_string(hd->html_data, "</dl>");
5889 	}
5890 	else{
5891 	    html_indent(hd->html_data, (int) hd->x, HTML_ID_SET);
5892 	    html_blank(hd->html_data, 1);
5893 	}
5894     }
5895 
5896     return(1);				/* get linked */
5897 }
5898 
5899 
5900 /*
5901  * HTML <DT> (Definition Term) element handler
5902  */
5903 int
html_dt(HANDLER_S * hd,int ch,int cmd)5904 html_dt(HANDLER_S *hd, int ch, int cmd)
5905 {
5906     if(PASS_HTML(hd->html_data)){
5907 	if(cmd == GF_DATA){
5908 	    html_handoff(hd, ch);
5909 	}
5910 	else if(cmd == GF_RESET){
5911 	    html_output_raw_tag(hd->html_data, "dt");
5912 	}
5913 	else if(cmd == GF_EOD){
5914 	    html_output_string(hd->html_data, "</dt>");
5915 	}
5916 
5917 	return(1);				/* get linked */
5918     }
5919 
5920     if(cmd == GF_RESET){
5921 	HANDLER_S *p;
5922 
5923 	/*
5924 	 * There better be a Definition Handler installed
5925 	 * or else we crap out...
5926 	 */
5927 	for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5928 	  ;
5929 
5930 	if(p){				/* adjust indent level if needed */
5931 	    html_indent(hd->html_data, (int) p->y, HTML_ID_SET);
5932 	    html_blank(hd->html_data, 1);
5933 	}
5934 	/* BUG: else should really bitch about this */
5935     }
5936 
5937     return(0);				/* DON'T get linked */
5938 }
5939 
5940 
5941 /*
5942  * HTML <DD> (Definition Definition) element handler
5943  */
5944 int
html_dd(HANDLER_S * hd,int ch,int cmd)5945 html_dd(HANDLER_S *hd, int ch, int cmd)
5946 {
5947     if(PASS_HTML(hd->html_data)){
5948 	if(cmd == GF_DATA){
5949 	    html_handoff(hd, ch);
5950 	}
5951 	else if(cmd == GF_RESET){
5952 	    html_output_raw_tag(hd->html_data, "dd");
5953 	}
5954 	else if(cmd == GF_EOD){
5955 	    html_output_string(hd->html_data, "</dd>");
5956 	}
5957 
5958 	return(1);				/* get linked */
5959     }
5960 
5961     if(cmd == GF_RESET){
5962 	HANDLER_S *p;
5963 
5964 	/*
5965 	 * There better be a Definition Handler installed
5966 	 * or else we crap out...
5967 	 */
5968 	for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5969 	  ;
5970 
5971 	if(p){				/* adjust indent level if needed */
5972 	    html_indent(hd->html_data, (int) p->z, HTML_ID_SET);
5973 	    html_blank(hd->html_data, 0);
5974 	}
5975 	/* BUG: should really bitch about this */
5976     }
5977 
5978     return(0);				/* DON'T get linked */
5979 }
5980 
5981 
5982 /*
5983  * HTML <H1> (Headings 1) element handler.
5984  *
5985  * Bold, very-large font, CENTERED. One or two blank lines
5986  * above and below.  For our silly character cell's that
5987  * means centered and ALL CAPS...
5988  */
5989 int
html_h1(HANDLER_S * hd,int ch,int cmd)5990 html_h1(HANDLER_S *hd, int ch, int cmd)
5991 {
5992     if(cmd == GF_DATA){
5993 	html_handoff(hd, ch);
5994     }
5995     else if(cmd == GF_RESET){
5996 	if(PASS_HTML(hd->html_data)){
5997 	    html_output_raw_tag(hd->html_data, "h1");
5998 	}
5999 	else{
6000 	    /* turn ON the centered bit */
6001 	    CENTER_BIT(hd->html_data) = 1;
6002 	}
6003     }
6004     else if(cmd == GF_EOD){
6005 	if(PASS_HTML(hd->html_data)){
6006 	    html_output_string(hd->html_data, "</h1>");
6007 	}
6008 	else{
6009 	    /* turn OFF the centered bit, add blank line */
6010 	    CENTER_BIT(hd->html_data) = 0;
6011 	    html_blank(hd->html_data, 1);
6012 	}
6013     }
6014 
6015     return(1);				/* get linked */
6016 }
6017 
6018 
6019 /*
6020  * HTML <H2> (Headings 2) element handler
6021  */
6022 int
html_h2(HANDLER_S * hd,int ch,int cmd)6023 html_h2(HANDLER_S *hd, int ch, int cmd)
6024 {
6025     if(cmd == GF_DATA){
6026 	if(PASS_HTML(hd->html_data)){
6027 	    html_handoff(hd, ch);
6028 	}
6029 	else{
6030 	    if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6031 		HTML_ULINE(hd->html_data, 1);
6032 		hd->x ^= HTML_HX_ULINE;	/* only once! */
6033 	    }
6034 
6035 	    html_handoff(hd, (ch < 128 && islower((unsigned char) ch))
6036 			 ? toupper((unsigned char) ch) : ch);
6037 	}
6038     }
6039     else if(cmd == GF_RESET){
6040 	if(PASS_HTML(hd->html_data)){
6041 	    html_output_raw_tag(hd->html_data, "h2");
6042 	}
6043 	else{
6044 	    /*
6045 	     * Bold, large font, flush-left. One or two blank lines
6046 	     * above and below.
6047 	     */
6048 	    if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6049 	      hd->x = HTML_HX_CENTER;
6050 	    else
6051 	      hd->x = 0;
6052 
6053 	    hd->x |= HTML_HX_ULINE;
6054 
6055 	    CENTER_BIT(hd->html_data) = 0;
6056 	    hd->y = html_indent(hd->html_data, 0, HTML_ID_SET);
6057 	    hd->z = HD(hd->html_data)->wrapcol;
6058 	    HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6059 	    html_blank(hd->html_data, 1);
6060 	}
6061     }
6062     else if(cmd == GF_EOD){
6063 	if(PASS_HTML(hd->html_data)){
6064 	    html_output_string(hd->html_data, "</h2>");
6065 	}
6066 	else{
6067 	    /*
6068 	     * restore previous centering, and indent level
6069 	     */
6070 	    if(!(hd->x & HTML_HX_ULINE))
6071 	      HTML_ULINE(hd->html_data, 0);
6072 
6073 	    html_indent(hd->html_data, hd->y, HTML_ID_SET);
6074 	    html_blank(hd->html_data, 1);
6075 	    CENTER_BIT(hd->html_data)  = (hd->x & HTML_HX_CENTER) != 0;
6076 	    HD(hd->html_data)->wrapcol = hd->z;
6077 	}
6078     }
6079 
6080     return(1);				/* get linked */
6081 }
6082 
6083 
6084 /*
6085  * HTML <H3> (Headings 3) element handler
6086  */
6087 int
html_h3(HANDLER_S * hd,int ch,int cmd)6088 html_h3(HANDLER_S *hd, int ch, int cmd)
6089 {
6090     if(cmd == GF_DATA){
6091 	if(!PASS_HTML(hd->html_data)){
6092 	    if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6093 		HTML_ULINE(hd->html_data, 1);
6094 		hd->x ^= HTML_HX_ULINE;	/* only once! */
6095 	    }
6096 	}
6097 
6098 	html_handoff(hd, ch);
6099     }
6100     else if(cmd == GF_RESET){
6101 	if(PASS_HTML(hd->html_data)){
6102 	    html_output_raw_tag(hd->html_data, "h3");
6103 	}
6104 	else{
6105 	    /*
6106 	     * Italic, large font, slightly indented from the left
6107 	     * margin. One or two blank lines above and below.
6108 	     */
6109 	    if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6110 	      hd->x = HTML_HX_CENTER;
6111 	    else
6112 	      hd->x = 0;
6113 
6114 	    hd->x |= HTML_HX_ULINE;
6115 	    CENTER_BIT(hd->html_data) = 0;
6116 	    hd->y = html_indent(hd->html_data, 2, HTML_ID_SET);
6117 	    hd->z = HD(hd->html_data)->wrapcol;
6118 	    HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6119 	    html_blank(hd->html_data, 1);
6120 	}
6121     }
6122     else if(cmd == GF_EOD){
6123 	if(PASS_HTML(hd->html_data)){
6124 	    html_output_string(hd->html_data, "</h3>");
6125 	}
6126 	else{
6127 	    /*
6128 	     * restore previous centering, and indent level
6129 	     */
6130 	    if(!(hd->x & HTML_HX_ULINE))
6131 	      HTML_ULINE(hd->html_data, 0);
6132 
6133 	    html_indent(hd->html_data, hd->y, HTML_ID_SET);
6134 	    html_blank(hd->html_data, 1);
6135 	    CENTER_BIT(hd->html_data)  = (hd->x & HTML_HX_CENTER) != 0;
6136 	    HD(hd->html_data)->wrapcol = hd->z;
6137 	}
6138     }
6139 
6140     return(1);				/* get linked */
6141 }
6142 
6143 
6144 /*
6145  * HTML <H4> (Headings 4) element handler
6146  */
6147 int
html_h4(HANDLER_S * hd,int ch,int cmd)6148 html_h4(HANDLER_S *hd, int ch, int cmd)
6149 {
6150     if(cmd == GF_DATA){
6151 	html_handoff(hd, ch);
6152     }
6153     else if(cmd == GF_RESET){
6154 	if(PASS_HTML(hd->html_data)){
6155 	    html_output_raw_tag(hd->html_data, "h4");
6156 	}
6157 	else{
6158 	    /*
6159 	     * Bold, normal font, indented more than H3. One blank line
6160 	     * above and below.
6161 	     */
6162 	    hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6163 	    CENTER_BIT(hd->html_data) = 0;
6164 	    hd->y = html_indent(hd->html_data, 4, HTML_ID_SET);
6165 	    hd->z = HD(hd->html_data)->wrapcol;
6166 	    HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6167 	    html_blank(hd->html_data, 1);
6168 	}
6169     }
6170     else if(cmd == GF_EOD){
6171 	if(PASS_HTML(hd->html_data)){
6172 	    html_output_string(hd->html_data, "</h4>");
6173 	}
6174 	else{
6175 	    /*
6176 	     * restore previous centering, and indent level
6177 	     */
6178 	    html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6179 	    html_blank(hd->html_data, 1);
6180 	    CENTER_BIT(hd->html_data)  = hd->x;
6181 	    HD(hd->html_data)->wrapcol = hd->z;
6182 	}
6183     }
6184 
6185     return(1);				/* get linked */
6186 }
6187 
6188 
6189 /*
6190  * HTML <H5> (Headings 5) element handler
6191  */
6192 int
html_h5(HANDLER_S * hd,int ch,int cmd)6193 html_h5(HANDLER_S *hd, int ch, int cmd)
6194 {
6195     if(cmd == GF_DATA){
6196 	html_handoff(hd, ch);
6197     }
6198     else if(cmd == GF_RESET){
6199 	if(PASS_HTML(hd->html_data)){
6200 	    html_output_raw_tag(hd->html_data, "h5");
6201 	}
6202 	else{
6203 	    /*
6204 	     * Italic, normal font, indented as H4. One blank line
6205 	     * above.
6206 	     */
6207 	    hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6208 	    CENTER_BIT(hd->html_data) = 0;
6209 	    hd->y = html_indent(hd->html_data, 6, HTML_ID_SET);
6210 	    hd->z = HD(hd->html_data)->wrapcol;
6211 	    HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6212 	    html_blank(hd->html_data, 1);
6213 	}
6214     }
6215     else if(cmd == GF_EOD){
6216 	if(PASS_HTML(hd->html_data)){
6217 	    html_output_string(hd->html_data, "</h5>");
6218 	}
6219 	else{
6220 	    /*
6221 	     * restore previous centering, and indent level
6222 	     */
6223 	    html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6224 	    html_blank(hd->html_data, 1);
6225 	    CENTER_BIT(hd->html_data)  = hd->x;
6226 	    HD(hd->html_data)->wrapcol = hd->z;
6227 	}
6228     }
6229 
6230     return(1);				/* get linked */
6231 }
6232 
6233 
6234 /*
6235  * HTML <H6> (Headings 6) element handler
6236  */
6237 int
html_h6(HANDLER_S * hd,int ch,int cmd)6238 html_h6(HANDLER_S *hd, int ch, int cmd)
6239 {
6240     if(cmd == GF_DATA){
6241 	html_handoff(hd, ch);
6242     }
6243     else if(cmd == GF_RESET){
6244 	if(PASS_HTML(hd->html_data)){
6245 	    html_output_raw_tag(hd->html_data, "h6");
6246 	}
6247 	else{
6248 	    /*
6249 	     * Bold, indented same as normal text, more than H5. One
6250 	     * blank line above.
6251 	     */
6252 	    hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6253 	    CENTER_BIT(hd->html_data) = 0;
6254 	    hd->y = html_indent(hd->html_data, 8, HTML_ID_SET);
6255 	    hd->z = HD(hd->html_data)->wrapcol;
6256 	    HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6257 	    html_blank(hd->html_data, 1);
6258 	}
6259     }
6260     else if(cmd == GF_EOD){
6261 	if(PASS_HTML(hd->html_data)){
6262 	    html_output_string(hd->html_data, "</h6>");
6263 	}
6264 	else{
6265 	    /*
6266 	     * restore previous centering, and indent level
6267 	     */
6268 	    html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6269 	    html_blank(hd->html_data, 1);
6270 	    CENTER_BIT(hd->html_data)  = hd->x;
6271 	    HD(hd->html_data)->wrapcol = hd->z;
6272 	}
6273     }
6274 
6275     return(1);				/* get linked */
6276 }
6277 
6278 
6279 /*
6280  * HTML <BlockQuote> element handler
6281  */
6282 int
html_blockquote(HANDLER_S * hd,int ch,int cmd)6283 html_blockquote(HANDLER_S *hd, int ch, int cmd)
6284 {
6285     int	 j;
6286 #define	HTML_BQ_INDENT	6
6287 
6288     if(cmd == GF_DATA){
6289 	html_handoff(hd, ch);
6290     }
6291     else if(cmd == GF_RESET){
6292 	if(PASS_HTML(hd->html_data)){
6293 	    html_output_raw_tag(hd->html_data, "blockquote");
6294 	}
6295 	else{
6296 	    /*
6297 	     * A typical rendering might be a slight extra left and
6298 	     * right indent, and/or italic font. The Blockquote element
6299 	     * causes a paragraph break, and typically provides space
6300 	     * above and below the quote.
6301 	     */
6302 	    html_indent(hd->html_data, HTML_BQ_INDENT, HTML_ID_INC);
6303 	    j = HD(hd->html_data)->wrapstate;
6304 	    HD(hd->html_data)->wrapstate = 0;
6305 	    html_blank(hd->html_data, 1);
6306 	    HD(hd->html_data)->wrapstate = j;
6307 	    HD(hd->html_data)->wrapcol  -= HTML_BQ_INDENT;
6308 	}
6309     }
6310     else if(cmd == GF_EOD){
6311 	if(PASS_HTML(hd->html_data)){
6312 	    html_output_string(hd->html_data, "</blockquote>");
6313 	}
6314 	else{
6315 	    html_blank(hd->html_data, 1);
6316 
6317 	    j = HD(hd->html_data)->wrapstate;
6318 	    HD(hd->html_data)->wrapstate = 0;
6319 	    html_indent(hd->html_data, -(HTML_BQ_INDENT), HTML_ID_INC);
6320 	    HD(hd->html_data)->wrapstate = j;
6321 	    HD(hd->html_data)->wrapcol  += HTML_BQ_INDENT;
6322 	}
6323     }
6324 
6325     return(1);				/* get linked */
6326 }
6327 
6328 
6329 /*
6330  * HTML <Address> element handler
6331  */
6332 int
html_address(HANDLER_S * hd,int ch,int cmd)6333 html_address(HANDLER_S *hd, int ch, int cmd)
6334 {
6335     int	 j;
6336 #define	HTML_ADD_INDENT	2
6337 
6338     if(cmd == GF_DATA){
6339 	html_handoff(hd, ch);
6340     }
6341     else if(cmd == GF_RESET){
6342 	if(PASS_HTML(hd->html_data)){
6343 	    html_output_raw_tag(hd->html_data, "address");
6344 	}
6345 	else{
6346 	    /*
6347 	     * A typical rendering might be a slight extra left and
6348 	     * right indent, and/or italic font. The Blockquote element
6349 	     * causes a paragraph break, and typically provides space
6350 	     * above and below the quote.
6351 	     */
6352 	    html_indent(hd->html_data, HTML_ADD_INDENT, HTML_ID_INC);
6353 	    j = HD(hd->html_data)->wrapstate;
6354 	    HD(hd->html_data)->wrapstate = 0;
6355 	    html_blank(hd->html_data, 1);
6356 	    HD(hd->html_data)->wrapstate = j;
6357 	}
6358     }
6359     else if(cmd == GF_EOD){
6360 	if(PASS_HTML(hd->html_data)){
6361 	    html_output_string(hd->html_data, "</address>");
6362 	}
6363 	else{
6364 	    html_blank(hd->html_data, 1);
6365 
6366 	    j = HD(hd->html_data)->wrapstate;
6367 	    HD(hd->html_data)->wrapstate = 0;
6368 	    html_indent(hd->html_data, -(HTML_ADD_INDENT), HTML_ID_INC);
6369 	    HD(hd->html_data)->wrapstate = j;
6370 	}
6371     }
6372 
6373     return(1);				/* get linked */
6374 }
6375 
6376 
6377 /*
6378  * HTML <PRE> (Preformatted Text) element handler
6379  */
6380 int
html_pre(HANDLER_S * hd,int ch,int cmd)6381 html_pre(HANDLER_S *hd, int ch, int cmd)
6382 {
6383     if(cmd == GF_DATA){
6384 	/*
6385 	 * remove CRLF after '>' in element.
6386 	 * We see CRLF because wrapstate is off.
6387 	 */
6388 	switch(hd->y){
6389 	  case 2 :
6390 	    if(ch == '\012'){
6391 		hd->y = 3;
6392 		return(1);
6393 	    }
6394 	    else
6395 	      html_handoff(hd, '\015');
6396 
6397 	    break;
6398 
6399 	  case 1 :
6400 	    if(ch == '\015'){
6401 		hd->y = 2;
6402 		return(1);
6403 	    }
6404 
6405 	  case 3 :
6406 	    /* passing tags?  replace CRLF with <BR> to make
6407 	     * sure hard newline survives in the end...
6408 	     */
6409 	    if(PASS_HTML(hd->html_data))
6410 	      hd->y = 4;		/* keep looking for CRLF */
6411 	    else
6412 	      hd->y = 0;		/* stop looking */
6413 
6414 	    break;
6415 
6416 	  case 4 :
6417 	    if(ch == '\015'){
6418 		hd->y = 5;
6419 		return(1);
6420 	    }
6421 
6422 	    break;
6423 
6424 	  case 5 :
6425 	    hd->y = 4;
6426 	    if(ch == '\012'){
6427 		html_output_string(hd->html_data, "<br />");
6428 		return(1);
6429 	    }
6430 	    else
6431 	      html_handoff(hd, '\015');	/* not CRLF, pass raw CR */
6432 
6433 	    break;
6434 
6435 	  default :			/* zero case */
6436 	    break;
6437 	}
6438 
6439 	html_handoff(hd, ch);
6440     }
6441     else if(cmd == GF_RESET){
6442 	hd->y = 1;
6443 	if(PASS_HTML(hd->html_data)){
6444 	    html_output_raw_tag(hd->html_data, "pre");
6445 	}
6446 	else{
6447 	    if(hd->html_data)
6448 	      hd->html_data->f1 = DFL;				\
6449 
6450 	    html_blank(hd->html_data, 1);
6451 	    hd->x = HD(hd->html_data)->wrapstate;
6452 	    HD(hd->html_data)->wrapstate = 0;
6453 	}
6454     }
6455     else if(cmd == GF_EOD){
6456 	if(PASS_HTML(hd->html_data)){
6457 	    html_output_string(hd->html_data, "</pre>");
6458 	}
6459 	else{
6460 	    HD(hd->html_data)->wrapstate = (hd->x != 0);
6461 	    html_blank(hd->html_data, 0);
6462 	}
6463     }
6464 
6465     return(1);
6466 }
6467 
6468 
6469 /*
6470  * HTML <CENTER> (Centered Text) element handler
6471  */
6472 int
html_center(HANDLER_S * hd,int ch,int cmd)6473 html_center(HANDLER_S *hd, int ch, int cmd)
6474 {
6475     if(cmd == GF_DATA){
6476 	html_handoff(hd, ch);
6477     }
6478     else if(cmd == GF_RESET){
6479 	if(PASS_HTML(hd->html_data)){
6480 	    html_output_raw_tag(hd->html_data, "center");
6481 	}
6482 	else{
6483 	    /* turn ON the centered bit */
6484 	    CENTER_BIT(hd->html_data) = 1;
6485 	}
6486     }
6487     else if(cmd == GF_EOD){
6488 	if(PASS_HTML(hd->html_data)){
6489 	    html_output_string(hd->html_data, "</center>");
6490 	}
6491 	else{
6492 	    /* turn OFF the centered bit */
6493 	    CENTER_BIT(hd->html_data) = 0;
6494 	}
6495     }
6496 
6497     return(1);
6498 }
6499 
6500 
6501 /*
6502  * HTML <DIV> (Document Divisions) element handler
6503  */
6504 int
html_div(HANDLER_S * hd,int ch,int cmd)6505 html_div(HANDLER_S *hd, int ch, int cmd)
6506 {
6507     if(cmd == GF_DATA){
6508 	html_handoff(hd, ch);
6509     }
6510     else if(cmd == GF_RESET){
6511 	if(PASS_HTML(hd->html_data)){
6512 	    html_output_raw_tag(hd->html_data, "div");
6513 	}
6514 	else{
6515 	    PARAMETER *p;
6516 
6517 	    for(p = HD(hd->html_data)->el_data->attribs;
6518 		p && p->attribute;
6519 		p = p->next)
6520 	      if(!strucmp(p->attribute, "ALIGN")){
6521 		  if(p->value){
6522 		      /* remember previous values */
6523 		      hd->x = CENTER_BIT(hd->html_data);
6524 		      hd->y = html_indent(hd->html_data, 0, HTML_ID_GET);
6525 
6526 		      html_blank(hd->html_data, 0);
6527 		      CENTER_BIT(hd->html_data) = !strucmp(p->value, "CENTER");
6528 		      html_indent(hd->html_data, 0, HTML_ID_SET);
6529 		      /* NOTE: "RIGHT" not supported yet */
6530 		  }
6531 	      }
6532 	  }
6533     }
6534     else if(cmd == GF_EOD){
6535 	if(PASS_HTML(hd->html_data)){
6536 	    html_output_string(hd->html_data, "</div>");
6537 	}
6538 	else{
6539 	    /* restore centered bit and indentiousness */
6540 	    CENTER_BIT(hd->html_data) = hd->y;
6541 	    html_indent(hd->html_data, hd->y, HTML_ID_SET);
6542 	    html_blank(hd->html_data, 0);
6543 	}
6544     }
6545 
6546     return(1);
6547 }
6548 
6549 
6550 /*
6551  * HTML <SPAN> (Text Span) element handler
6552  */
6553 int
html_span(HANDLER_S * hd,int ch,int cmd)6554 html_span(HANDLER_S *hd, int ch, int cmd)
6555 {
6556     if(PASS_HTML(hd->html_data)){
6557 	if(cmd == GF_DATA){
6558 	    html_handoff(hd, ch);
6559 	}
6560 	else if(cmd == GF_RESET){
6561 	    html_output_raw_tag(hd->html_data, "span");
6562 	}
6563 	else if(cmd == GF_EOD){
6564 	    html_output_string(hd->html_data, "</span>");
6565 	}
6566 
6567 	return(1);
6568     }
6569 
6570     return(0);
6571 }
6572 
6573 
6574 /*
6575  * HTML <KBD> (Text Kbd) element handler
6576  */
6577 int
html_kbd(HANDLER_S * hd,int ch,int cmd)6578 html_kbd(HANDLER_S *hd, int ch, int cmd)
6579 {
6580     if(PASS_HTML(hd->html_data)){
6581 	if(cmd == GF_DATA){
6582 	    html_handoff(hd, ch);
6583 	}
6584 	else if(cmd == GF_RESET){
6585 	    html_output_raw_tag(hd->html_data, "kbd");
6586 	}
6587 	else if(cmd == GF_EOD){
6588 	    html_output_string(hd->html_data, "</kbd>");
6589 	}
6590 
6591 	return(1);
6592     }
6593 
6594     return(0);
6595 }
6596 
6597 
6598 /*
6599  * HTML <DFN> (Text Definition) element handler
6600  */
6601 int
html_dfn(HANDLER_S * hd,int ch,int cmd)6602 html_dfn(HANDLER_S *hd, int ch, int cmd)
6603 {
6604     if(PASS_HTML(hd->html_data)){
6605 	if(cmd == GF_DATA){
6606 	    html_handoff(hd, ch);
6607 	}
6608 	else if(cmd == GF_RESET){
6609 	    html_output_raw_tag(hd->html_data, "dfn");
6610 	}
6611 	else if(cmd == GF_EOD){
6612 	    html_output_string(hd->html_data, "</dfn>");
6613 	}
6614 
6615 	return(1);
6616     }
6617 
6618     return(0);
6619 }
6620 
6621 
6622 /*
6623  * HTML <TT> (Text Tt) element handler
6624  */
6625 int
html_tt(HANDLER_S * hd,int ch,int cmd)6626 html_tt(HANDLER_S *hd, int ch, int cmd)
6627 {
6628     if(PASS_HTML(hd->html_data)){
6629 	if(cmd == GF_DATA){
6630 	    html_handoff(hd, ch);
6631 	}
6632 	else if(cmd == GF_RESET){
6633 	    html_output_raw_tag(hd->html_data, "tt");
6634 	}
6635 	else if(cmd == GF_EOD){
6636 	    html_output_string(hd->html_data, "</tt>");
6637 	}
6638 
6639 	return(1);
6640     }
6641 
6642     return(0);
6643 }
6644 
6645 
6646 /*
6647  * HTML <VAR> (Text Var) element handler
6648  */
6649 int
html_var(HANDLER_S * hd,int ch,int cmd)6650 html_var(HANDLER_S *hd, int ch, int cmd)
6651 {
6652     if(PASS_HTML(hd->html_data)){
6653 	if(cmd == GF_DATA){
6654 	    html_handoff(hd, ch);
6655 	}
6656 	else if(cmd == GF_RESET){
6657 	    html_output_raw_tag(hd->html_data, "var");
6658 	}
6659 	else if(cmd == GF_EOD){
6660 	    html_output_string(hd->html_data, "</var>");
6661 	}
6662 
6663 	return(1);
6664     }
6665 
6666     return(0);
6667 }
6668 
6669 
6670 /*
6671  * HTML <SAMP> (Text Samp) element handler
6672  */
6673 int
html_samp(HANDLER_S * hd,int ch,int cmd)6674 html_samp(HANDLER_S *hd, int ch, int cmd)
6675 {
6676     if(PASS_HTML(hd->html_data)){
6677 	if(cmd == GF_DATA){
6678 	    html_handoff(hd, ch);
6679 	}
6680 	else if(cmd == GF_RESET){
6681 	    html_output_raw_tag(hd->html_data, "samp");
6682 	}
6683 	else if(cmd == GF_EOD){
6684 	    html_output_string(hd->html_data, "</samp>");
6685 	}
6686 
6687 	return(1);
6688     }
6689 
6690     return(0);
6691 }
6692 
6693 
6694 /*
6695  * HTML <SUP> (Text Superscript) element handler
6696  */
6697 int
html_sup(HANDLER_S * hd,int ch,int cmd)6698 html_sup(HANDLER_S *hd, int ch, int cmd)
6699 {
6700     if(PASS_HTML(hd->html_data)){
6701 	if(cmd == GF_DATA){
6702 	    html_handoff(hd, ch);
6703 	}
6704 	else if(cmd == GF_RESET){
6705 	    html_output_raw_tag(hd->html_data, "sup");
6706 	}
6707 	else if(cmd == GF_EOD){
6708 	    html_output_string(hd->html_data, "</sup>");
6709 	}
6710 
6711 	return(1);
6712     }
6713 
6714     return(0);
6715 }
6716 
6717 
6718 /*
6719  * HTML <SUB> (Text Subscript) element handler
6720  */
6721 int
html_sub(HANDLER_S * hd,int ch,int cmd)6722 html_sub(HANDLER_S *hd, int ch, int cmd)
6723 {
6724     if(PASS_HTML(hd->html_data)){
6725 	if(cmd == GF_DATA){
6726 	    html_handoff(hd, ch);
6727 	}
6728 	else if(cmd == GF_RESET){
6729 	    html_output_raw_tag(hd->html_data, "sub");
6730 	}
6731 	else if(cmd == GF_EOD){
6732 	    html_output_string(hd->html_data, "</sub>");
6733 	}
6734 
6735 	return(1);
6736     }
6737 
6738     return(0);
6739 }
6740 
6741 
6742 /*
6743  * HTML <CITE> (Text Citation) element handler
6744  */
6745 int
html_cite(HANDLER_S * hd,int ch,int cmd)6746 html_cite(HANDLER_S *hd, int ch, int cmd)
6747 {
6748     if(PASS_HTML(hd->html_data)){
6749 	if(cmd == GF_DATA){
6750 	    html_handoff(hd, ch);
6751 	}
6752 	else if(cmd == GF_RESET){
6753 	    html_output_raw_tag(hd->html_data, "cite");
6754 	}
6755 	else if(cmd == GF_EOD){
6756 	    html_output_string(hd->html_data, "</cite>");
6757 	}
6758 
6759 	return(1);
6760     }
6761 
6762     return(0);
6763 }
6764 
6765 
6766 /*
6767  * HTML <CODE> (Text Code) element handler
6768  */
6769 int
html_code(HANDLER_S * hd,int ch,int cmd)6770 html_code(HANDLER_S *hd, int ch, int cmd)
6771 {
6772     if(PASS_HTML(hd->html_data)){
6773 	if(cmd == GF_DATA){
6774 	    html_handoff(hd, ch);
6775 	}
6776 	else if(cmd == GF_RESET){
6777 	    html_output_raw_tag(hd->html_data, "code");
6778 	}
6779 	else if(cmd == GF_EOD){
6780 	    html_output_string(hd->html_data, "</code>");
6781 	}
6782 
6783 	return(1);
6784     }
6785 
6786     return(0);
6787 }
6788 
6789 
6790 /*
6791  * HTML <INS> (Text Inserted) element handler
6792  */
6793 int
html_ins(HANDLER_S * hd,int ch,int cmd)6794 html_ins(HANDLER_S *hd, int ch, int cmd)
6795 {
6796     if(PASS_HTML(hd->html_data)){
6797 	if(cmd == GF_DATA){
6798 	    html_handoff(hd, ch);
6799 	}
6800 	else if(cmd == GF_RESET){
6801 	    html_output_raw_tag(hd->html_data, "ins");
6802 	}
6803 	else if(cmd == GF_EOD){
6804 	    html_output_string(hd->html_data, "</ins>");
6805 	}
6806 
6807 	return(1);
6808     }
6809 
6810     return(0);
6811 }
6812 
6813 
6814 /*
6815  * HTML <DEL> (Text Deleted) element handler
6816  */
6817 int
html_del(HANDLER_S * hd,int ch,int cmd)6818 html_del(HANDLER_S *hd, int ch, int cmd)
6819 {
6820     if(PASS_HTML(hd->html_data)){
6821 	if(cmd == GF_DATA){
6822 	    html_handoff(hd, ch);
6823 	}
6824 	else if(cmd == GF_RESET){
6825 	    html_output_raw_tag(hd->html_data, "del");
6826 	}
6827 	else if(cmd == GF_EOD){
6828 	    html_output_string(hd->html_data, "</del>");
6829 	}
6830 
6831 	return(1);
6832     }
6833 
6834     return(0);
6835 }
6836 
6837 
6838 /*
6839  * HTML <ABBR> (Text Abbreviation) element handler
6840  */
6841 int
html_abbr(HANDLER_S * hd,int ch,int cmd)6842 html_abbr(HANDLER_S *hd, int ch, int cmd)
6843 {
6844     if(PASS_HTML(hd->html_data)){
6845 	if(cmd == GF_DATA){
6846 	    html_handoff(hd, ch);
6847 	}
6848 	else if(cmd == GF_RESET){
6849 	    html_output_raw_tag(hd->html_data, "abbr");
6850 	}
6851 	else if(cmd == GF_EOD){
6852 	    html_output_string(hd->html_data, "</abbr>");
6853 	}
6854 
6855 	return(1);
6856     }
6857 
6858     return(0);
6859 }
6860 
6861 
6862 /*
6863  * HTML <SCRIPT> element handler
6864  */
6865 int
html_script(HANDLER_S * hd,int ch,int cmd)6866 html_script(HANDLER_S *hd, int ch, int cmd)
6867 {
6868     /* Link in and drop everything within on the floor */
6869     return(1);
6870 }
6871 
6872 
6873 /*
6874  * HTML <APPLET> element handler
6875  */
6876 int
html_applet(HANDLER_S * hd,int ch,int cmd)6877 html_applet(HANDLER_S *hd, int ch, int cmd)
6878 {
6879     /* Link in and drop everything within on the floor */
6880     return(1);
6881 }
6882 
6883 
6884 /*
6885  * HTML <STYLE> CSS element handler
6886  */
6887 int
html_style(HANDLER_S * hd,int ch,int cmd)6888 html_style(HANDLER_S *hd, int ch, int cmd)
6889 {
6890     static STORE_S  *css_stuff ;
6891 
6892     if(PASS_HTML(hd->html_data)){
6893 	if(cmd == GF_DATA){
6894 	    /* collect style settings */
6895 	    so_writec(ch, css_stuff);
6896 	}
6897 	else if(cmd == GF_RESET){
6898 	    if(css_stuff)
6899 	      so_give(&css_stuff);
6900 
6901 	    css_stuff = so_get(CharStar, NULL, EDIT_ACCESS);
6902 	}
6903 	else if(cmd == GF_EOD){
6904 	    /*
6905 	     * TODO: strip anything mischievous and pass on
6906 	     */
6907 
6908 	    so_give(&css_stuff);
6909 	}
6910     }
6911 
6912     return(1);
6913 }
6914 
6915 /*
6916  *  RSS 2.0 <RSS> version
6917  */
6918 int
rss_rss(HANDLER_S * hd,int ch,int cmd)6919 rss_rss(HANDLER_S *hd, int ch, int cmd)
6920 {
6921     if(cmd == GF_RESET){
6922 	PARAMETER *p;
6923 
6924 	for(p = HD(hd->html_data)->el_data->attribs;
6925 	    p && p->attribute;
6926 	    p = p->next)
6927 	  if(!strucmp(p->attribute, "VERSION")){
6928 	      if(p->value && !strucmp(p->value,"2.0"))
6929 		return(0);	/* do not link in */
6930 	  }
6931 
6932 	gf_error("Incompatible RSS version");
6933 	/* NO RETURN */
6934     }
6935 
6936     return(0);	/* not linked or error means we never get here */
6937 }
6938 
6939 /*
6940  *  RSS 2.0 <CHANNEL>
6941  */
6942 int
rss_channel(HANDLER_S * hd,int ch,int cmd)6943 rss_channel(HANDLER_S *hd, int ch, int cmd)
6944 {
6945     if(cmd == GF_DATA){
6946 	html_handoff(hd, ch);
6947     }
6948     else if(cmd == GF_RESET){
6949 	RSS_FEED_S *feed;
6950 
6951 	feed = RSS_FEED(hd->html_data) = fs_get(sizeof(RSS_FEED_S));
6952 	memset(feed, 0, sizeof(RSS_FEED_S));
6953     }
6954 
6955     return(1);			/* link in */
6956 }
6957 
6958 /*
6959  *  RSS 2.0 <TITLE>
6960  */
6961 int
rss_title(HANDLER_S * hd,int ch,int cmd)6962 rss_title(HANDLER_S *hd, int ch, int cmd)
6963 {
6964     static STORE_S *title_so;
6965 
6966     if(cmd == GF_DATA){
6967 	/* collect data */
6968 	if(title_so){
6969 	    so_writec(ch, title_so);
6970 	}
6971     }
6972     else if(cmd == GF_RESET){
6973 	if(RSS_FEED(hd->html_data)){
6974 	    /* prepare for data */
6975 	    if(title_so)
6976 	      so_give(&title_so);
6977 
6978 	    title_so = so_get(CharStar, NULL, EDIT_ACCESS);
6979 	}
6980     }
6981     else if(cmd == GF_EOD){
6982 	if(title_so){
6983 	    RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6984 	    RSS_ITEM_S *rip;
6985 
6986 	    if(feed){
6987 		if((rip = feed->items) != NULL){
6988 		    for(; rip->next; rip = rip->next)
6989 		      ;
6990 
6991 		    if(rip->title)
6992 		      fs_give((void **) &rip->title);
6993 
6994 		    rip->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6995 		}
6996 		else{
6997 		    if(feed->title)
6998 		      fs_give((void **) &feed->title);
6999 
7000 		    feed->title = cpystr(rss_skip_whitespace(so_text(title_so)));
7001 		}
7002 	    }
7003 
7004 	    so_give(&title_so);
7005 	}
7006     }
7007 
7008     return(1);			/* link in */
7009 }
7010 
7011 /*
7012  *  RSS 2.0 <IMAGE>
7013  */
7014 int
rss_image(HANDLER_S * hd,int ch,int cmd)7015 rss_image(HANDLER_S *hd, int ch, int cmd)
7016 {
7017     static STORE_S *img_so;
7018 
7019     if(cmd == GF_DATA){
7020 	/* collect data */
7021 	if(img_so){
7022 	    so_writec(ch, img_so);
7023 	}
7024     }
7025     else if(cmd == GF_RESET){
7026 	if(RSS_FEED(hd->html_data)){
7027 	    /* prepare to collect data */
7028 	    if(img_so)
7029 	      so_give(&img_so);
7030 
7031 	    img_so = so_get(CharStar, NULL, EDIT_ACCESS);
7032 	}
7033     }
7034     else if(cmd == GF_EOD){
7035 	if(img_so){
7036 	    RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7037 
7038 	    if(feed){
7039 		if(feed->image)
7040 		  fs_give((void **) &feed->image);
7041 
7042 		feed->image = cpystr(rss_skip_whitespace(so_text(img_so)));
7043 	    }
7044 
7045 	    so_give(&img_so);
7046 	}
7047     }
7048 
7049     return(1);			/* link in */
7050 }
7051 
7052 /*
7053  *  RSS 2.0 <LINK>
7054  */
7055 int
rss_link(HANDLER_S * hd,int ch,int cmd)7056 rss_link(HANDLER_S *hd, int ch, int cmd)
7057 {
7058     static STORE_S *link_so;
7059 
7060     if(cmd == GF_DATA){
7061 	/* collect data */
7062 	if(link_so){
7063 	    so_writec(ch, link_so);
7064 	}
7065     }
7066     else if(cmd == GF_RESET){
7067 	if(RSS_FEED(hd->html_data)){
7068 	    /* prepare to collect data */
7069 	    if(link_so)
7070 	      so_give(&link_so);
7071 
7072 	    link_so = so_get(CharStar, NULL, EDIT_ACCESS);
7073 	}
7074     }
7075     else if(cmd == GF_EOD){
7076 	if(link_so){
7077 	    RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7078 	    RSS_ITEM_S *rip;
7079 
7080 	    if(feed){
7081 		if((rip = feed->items) != NULL){
7082 		    for(; rip->next; rip = rip->next)
7083 		      ;
7084 
7085 		    if(rip->link)
7086 		      fs_give((void **) &rip->link);
7087 
7088 		    rip->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7089 		}
7090 		else{
7091 		    if(feed->link)
7092 		      fs_give((void **) &feed->link);
7093 
7094 		    feed->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7095 		}
7096 	    }
7097 
7098 	    so_give(&link_so);
7099 	}
7100     }
7101 
7102     return(1);			/* link in */
7103 }
7104 
7105 /*
7106  *  RSS 2.0 <DESCRIPTION>
7107  */
7108 int
rss_description(HANDLER_S * hd,int ch,int cmd)7109 rss_description(HANDLER_S *hd, int ch, int cmd)
7110 {
7111     static STORE_S *desc_so;
7112 
7113     if(cmd == GF_DATA){
7114 	/* collect data */
7115 	if(desc_so){
7116 	    so_writec(ch, desc_so);
7117 	}
7118     }
7119     else if(cmd == GF_RESET){
7120 	if(RSS_FEED(hd->html_data)){
7121 	    /* prepare to collect data */
7122 	    if(desc_so)
7123 	      so_give(&desc_so);
7124 
7125 	    desc_so = so_get(CharStar, NULL, EDIT_ACCESS);
7126 	}
7127     }
7128     else if(cmd == GF_EOD){
7129 	if(desc_so){
7130 	    RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7131 	    RSS_ITEM_S *rip;
7132 
7133 	    if(feed){
7134 		if((rip = feed->items) != NULL){
7135 		    for(; rip->next; rip = rip->next)
7136 		      ;
7137 
7138 		    if(rip->description)
7139 		      fs_give((void **) &rip->description);
7140 
7141 		    rip->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7142 		}
7143 		else{
7144 		    if(feed->description)
7145 		      fs_give((void **) &feed->description);
7146 
7147 		    feed->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7148 		}
7149 	    }
7150 
7151 	    so_give(&desc_so);
7152 	}
7153     }
7154 
7155     return(1);			/* link in */
7156 }
7157 
7158 /*
7159  *  RSS 2.0 <TTL> (in minutes)
7160  */
7161 int
rss_ttl(HANDLER_S * hd,int ch,int cmd)7162 rss_ttl(HANDLER_S *hd, int ch, int cmd)
7163 {
7164     RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7165 
7166     if(cmd == GF_DATA){
7167 	if(isdigit((unsigned char) ch))
7168 	  feed->ttl = ((feed->ttl * 10) + (ch - '0'));
7169     }
7170     else if(cmd == GF_RESET){
7171 	/* prepare to collect data */
7172 	feed->ttl = 0;
7173     }
7174     else if(cmd == GF_EOD){
7175     }
7176 
7177     return(1);			/* link in */
7178 }
7179 
7180 /*
7181  *  RSS 2.0 <ITEM>
7182  */
7183 int
rss_item(HANDLER_S * hd,int ch,int cmd)7184 rss_item(HANDLER_S *hd, int ch, int cmd)
7185 {
7186     /* BUG: verify no ITEM nesting? */
7187     if(cmd == GF_RESET){
7188 	RSS_FEED_S *feed;
7189 
7190 	if((feed = RSS_FEED(hd->html_data)) != NULL){
7191 	    RSS_ITEM_S **rip;
7192 	    int		 n = 0;
7193 
7194 	    for(rip = &feed->items; *rip; rip = &(*rip)->next)
7195 	      if(++n > RSS_ITEM_LIMIT)
7196 		return(0);
7197 
7198 	    *rip = fs_get(sizeof(RSS_ITEM_S));
7199 	    memset(*rip, 0, sizeof(RSS_ITEM_S));
7200 	}
7201     }
7202 
7203     return(0);			/* don't link in */
7204 }
7205 
7206 
7207 char *
rss_skip_whitespace(char * s)7208 rss_skip_whitespace(char *s)
7209 {
7210     for(; *s && isspace((unsigned char) *s); s++)
7211       ;
7212 
7213     return(s);
7214 }
7215 
7216 
7217 /*
7218  * return the function associated with the given element name
7219  */
7220 ELPROP_S *
element_properties(FILTER_S * fd,char * el_name)7221 element_properties(FILTER_S *fd, char *el_name)
7222 {
7223     register ELPROP_S *el_table = ELEMENTS(fd);
7224     size_t len_name = strlen(el_name);
7225 
7226     for(; el_table->element; el_table++)
7227       if(!strucmp(el_name, el_table->element)
7228 	|| (el_table->alternate
7229 	&& len_name == el_table->len + 1
7230 	&& el_name[el_table->len] == '/'
7231 	&& !struncmp(el_name, el_table->element, el_table->len)))
7232 	return(el_table);
7233 
7234     return(NULL);
7235 }
7236 
7237 
7238 /*
7239  * collect element's name and any attribute/value pairs then
7240  * dispatch to the appropriate handler.
7241  *
7242  * Returns 1 : got what we wanted
7243  *	   0 : we need more data
7244  *	  -1 : bogus input
7245  */
7246 int
html_element_collector(FILTER_S * fd,int ch)7247 html_element_collector(FILTER_S *fd, int ch)
7248 {
7249     if(ch == '>'){
7250 	if(ED(fd)->overrun){
7251 	    /*
7252 	     * If problem processing, don't bother doing anything
7253 	     * internally, just return such that none of what we've
7254 	     * digested is displayed.
7255 	     */
7256 	    HTML_DEBUG_EL("too long", ED(fd));
7257 	    return(1);			/* Let it go, Jim */
7258 	}
7259 	else if(ED(fd)->mkup_decl){
7260 	    if(ED(fd)->badform){
7261 		dprint((2, "-- html error: bad form: %.*s\n",
7262 			   ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7263 		/*
7264 		 * Invalid comment -- make some guesses as
7265 		 * to whether we should stop with this greater-than...
7266 		 */
7267 		if(ED(fd)->buf[0] != '-'
7268 		   || ED(fd)->len < 4
7269 		   || (ED(fd)->buf[1] == '-'
7270 		       && ED(fd)->buf[ED(fd)->len - 1] == '-'
7271 		       && ED(fd)->buf[ED(fd)->len - 2] == '-'))
7272 		  return(1);
7273 	    }
7274 	    else{
7275 		dprint((5, "-- html: OK: %.*s\n",
7276 			   ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7277 		if(ED(fd)->start_comment == ED(fd)->end_comment){
7278 		    if(ED(fd)->len > 10){
7279 			ED(fd)->buf[ED(fd)->len - 2] = '\0';
7280 			html_element_comment(fd, ED(fd)->buf + 2);
7281 		    }
7282 
7283 		    return(1);
7284 		}
7285 		/* else keep collecting comment below */
7286 	    }
7287 	}
7288 	else if(ED(fd)->proc_inst){
7289 	    return(1);			/* return without display... */
7290 	}
7291 	else if(!ED(fd)->quoted || ED(fd)->badform){
7292 	    ELPROP_S *ep;
7293 
7294 	    /*
7295 	     * We either have the whole thing or all that we could
7296 	     * salvage from it.  Try our best...
7297 	     */
7298 
7299 	    if(HD(fd)->bitbucket)
7300 	      return(1);		/* element inside chtml clause! */
7301 
7302 	    if(!ED(fd)->badform && html_element_flush(ED(fd)))
7303 	      return(1);		/* return without display... */
7304 
7305 	    /*
7306 	     * If we ran into an empty tag or we don't know how to deal
7307 	     * with it, just go on, ignoring it...
7308 	     */
7309 	    if(ED(fd)->element && (ep = element_properties(fd, ED(fd)->element))){
7310 		if(ep->handler){
7311 		    /* dispatch the element's handler */
7312 		    HTML_DEBUG_EL(ED(fd)->end_tag ? "POP" : "PUSH", ED(fd));
7313 		    if(ED(fd)->end_tag){
7314 			html_pop(fd, ep);	/* remove it's handler */
7315 		    }
7316 		    else{
7317 			/* if a block element, pop any open <p>'s */
7318 			if(ep->blocklevel){
7319 			    HANDLER_S *tp;
7320 
7321 			    for(tp = HANDLERS(fd); tp && EL(tp)->handler == html_p; tp = tp->below){
7322 				HTML_DEBUG_EL("Unclosed <P>", ED(fd));
7323 				html_pop(fd, EL(tp));
7324 				break;
7325 			    }
7326 			}
7327 
7328 			/* enforce table nesting */
7329 			if(!strucmp(ep->element, "tr")){
7330 			    if(!HANDLERS(fd) || (strucmp(EL(HANDLERS(fd))->element, "table") && strucmp(EL(HANDLERS(fd))->element, "tbody") && strucmp(EL(HANDLERS(fd))->element, "thead"))){
7331 				dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd)) ? EL(HANDLERS(fd))->element : "NO-HANDLERS"));
7332 				if(HANDLERS(fd) && !strucmp(EL(HANDLERS(fd))->element,"tr")){
7333 				    dprint((2, "-- html error: bad nesting popping previous <TR>"));
7334 				    html_pop(fd, EL(HANDLERS(fd)));
7335 				}
7336 				else{
7337 				    dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7338 				    html_push(fd, element_properties(fd, "table"));
7339 				}
7340 			    }
7341 			}
7342 			else if(!strucmp(ep->element, "td") || !strucmp(ep->element, "th")){
7343 			    if(!HANDLERS(fd)){
7344 				dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7345 				html_push(fd, element_properties(fd, "table"));
7346 				html_push(fd, element_properties(fd, "tr"));
7347 			    }
7348 			    else if(strucmp(EL(HANDLERS(fd))->element, "tr")){
7349 				dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd))->element));
7350 				html_push(fd, element_properties(fd, "tr"));
7351 			    }
7352 			    else if(!strucmp(EL(HANDLERS(fd))->element, "td")){
7353 				dprint((2, "-- html error: bad nesting popping <TD>"));
7354 				html_pop(fd, EL(HANDLERS(fd)));
7355 			    }
7356 			}
7357 
7358 			/* add it's handler */
7359 			if(html_push(fd, ep)){
7360 			    if(ED(fd)->empty){
7361 				/* remove empty element */
7362 				html_pop(fd, ep);
7363 			    }
7364 			}
7365 		    }
7366 		}
7367 		else {
7368 		    HTML_DEBUG_EL("IGNORED", ED(fd));
7369 		}
7370 	    }
7371 	    else{			/* else, empty or unrecognized */
7372 		HTML_DEBUG_EL("?", ED(fd));
7373 	    }
7374 
7375 	    return(1);			/* all done! see, that didn't hurt */
7376 	}
7377     }
7378     else if(ch == '/' && ED(fd)->element && ED(fd)->len){
7379 	ED(fd)->empty = 1;
7380     }
7381     else
7382       ED(fd)->empty = 0;
7383 
7384     if(ED(fd)->mkup_decl){
7385 	if((ch &= 0xff) == '-'){
7386 	    if(ED(fd)->hyphen){
7387 		ED(fd)->hyphen = 0;
7388 		if(ED(fd)->start_comment)
7389 		  ED(fd)->end_comment = 1;
7390 		else
7391 		  ED(fd)->start_comment = 1;
7392 	    }
7393 	    else
7394 	      ED(fd)->hyphen = 1;
7395 	}
7396 	else{
7397 	    if(ED(fd)->end_comment)
7398 	      ED(fd)->start_comment = ED(fd)->end_comment = 0;
7399 
7400 	    /*
7401 	     * no "--" after ! or non-whitespace between comments - bad
7402 	     */
7403 	    if(ED(fd)->len < 2 || (!ED(fd)->start_comment
7404 				   && !ASCII_ISSPACE((unsigned char) ch)))
7405 	      ED(fd)->badform = 1;	/* non-comment! */
7406 
7407 	    ED(fd)->hyphen = 0;
7408 	}
7409 
7410 	/*
7411 	 * Remember the comment for possible later processing, if
7412 	 * it gets too long, remember first and last few chars
7413 	 * so we know when to terminate (and throw some garbage
7414 	 * in between when we toss out what's between.
7415 	 */
7416 	if(ED(fd)->len == HTML_BUF_LEN){
7417 	    ED(fd)->buf[2] = ED(fd)->buf[3] = 'X';
7418 	    ED(fd)->buf[4] = ED(fd)->buf[ED(fd)->len - 2];
7419 	    ED(fd)->buf[5] = ED(fd)->buf[ED(fd)->len - 1];
7420 	    ED(fd)->len    = 6;
7421 	}
7422 
7423 	ED(fd)->buf[(ED(fd)->len)++] = ch;
7424 	return(0);			/* comments go in the bit bucket */
7425     }
7426     else if(ED(fd)->overrun || ED(fd)->badform){
7427 	return(0);			/* swallow char's until next '>' */
7428     }
7429     else if(!ED(fd)->element && !ED(fd)->len){
7430 	if(ch == '/'){		/* validate leading chars */
7431 	    ED(fd)->end_tag = 1;
7432 	    return(0);
7433 	}
7434 	else if(ch == '!'){
7435 	    ED(fd)->mkup_decl = 1;
7436 	    return(0);
7437 	}
7438 	else if(ch == '?'){
7439 	    ED(fd)->proc_inst = 1;
7440 	    return(0);
7441 	}
7442 	else if(!isalpha((unsigned char) ch))
7443 	  return(-1);			/* can't be a tag! */
7444     }
7445     else if(ch == '\"' || ch == '\''){
7446 	if(!ED(fd)->hit_equal){
7447 	    ED(fd)->badform = 1;	/* quote in element name?!? */
7448 	    return(0);
7449 	}
7450 
7451 	if(ED(fd)->quoted){
7452 	    if(ED(fd)->quoted == (char) ch){
7453 		/* end of a quoted value */
7454 		ED(fd)->quoted = 0;
7455 		if(ED(fd)->len && html_element_flush(ED(fd)))
7456 		  ED(fd)->badform = 1;
7457 
7458 		return(0);		/* continue collecting chars */
7459 	    }
7460 	    /* ELSE fall thru writing other quoting char */
7461 	}
7462 	else{
7463 	    ED(fd)->quoted = (char) ch;
7464 	    ED(fd)->was_quoted = 1;
7465 	    return(0);			/* need more data */
7466 	}
7467     }
7468     else if (ASCII_ISSPACE((unsigned char) ch))
7469 	ED(fd)->unquoted_data = 0;
7470     else if (ED(fd)->hit_equal)
7471 	ED(fd)->unquoted_data = 1;
7472 
7473     ch &= 0xff;			/* strip any "literal" high bits */
7474     if(ED(fd)->quoted
7475        || ED(fd)->unquoted_data
7476        || isalnum(ch)
7477        || strchr("#-.!", ch)){
7478 	if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7479 			       ? HTML_BUF_LEN:MAX_ELEMENT)){
7480 	    ED(fd)->buf[(ED(fd)->len)++] = ch;
7481 	}
7482 	else
7483 	  ED(fd)->overrun = 1;		/* flag it broken */
7484     }
7485     else if(ASCII_ISSPACE((unsigned char) ch) || ch == '='){
7486 	if((ED(fd)->len || ED(fd)->was_quoted) && html_element_flush(ED(fd))){
7487 	    ED(fd)->badform = 1;
7488 	    return(0);		/* else, we ain't done yet */
7489 	}
7490 
7491 	if(!ED(fd)->hit_equal)
7492 	  ED(fd)->hit_equal = (ch == '=');
7493     }
7494     else if(ch == '/' && ED(fd)->len && !ED(fd)->element){
7495 	ELPROP_S *ep;
7496 	ep = element_properties(fd, ED(fd)->buf);
7497 	if(ep){
7498 	  if(!ep->alternate)
7499 	    ED(fd)->badform = 1;
7500 	  else{
7501 	    if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7502 			       ? HTML_BUF_LEN:MAX_ELEMENT)){
7503 	      ED(fd)->buf[(ED(fd)->len)++] = ch;	/* add this exception */
7504 	    }
7505 	    else
7506 	      ED(fd)->overrun = 1;
7507 	  }
7508        }
7509        else
7510 	 ED(fd)->badform = 1;
7511     }
7512     else
7513       ED(fd)->badform = 1;		/* unrecognized data?? */
7514 
7515     return(0);				/* keep collecting */
7516 }
7517 
7518 
7519 /*
7520  * Element collector found complete string, integrate it and reset
7521  * internal collection buffer.
7522  *
7523  * Returns zero if element collection buffer flushed, error flag otherwise
7524  */
7525 int
html_element_flush(CLCTR_S * el_data)7526 html_element_flush(CLCTR_S *el_data)
7527 {
7528     int rv = 0;
7529 
7530     if(el_data->hit_equal){		/* adding a value */
7531 	el_data->hit_equal = 0;
7532 	if(el_data->cur_attrib){
7533 	    if(!el_data->cur_attrib->value){
7534 		el_data->cur_attrib->value = cpystr(el_data->len
7535 						    ? el_data->buf : "");
7536 	    }
7537 	    else{
7538 		dprint((2, "** element: unexpected value: %.10s...\n",
7539 			(el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7540 		rv = 1;
7541 	    }
7542 	}
7543 	else{
7544 	    dprint((2, "** element: missing attribute name: %.10s...\n",
7545 		    (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7546 	    rv = 2;
7547 	}
7548     }
7549     else if(el_data->len){
7550 	if(!el_data->element){
7551 	    el_data->element = cpystr(el_data->buf);
7552 	}
7553 	else{
7554 	    PARAMETER *p = (PARAMETER *)fs_get(sizeof(PARAMETER));
7555 	    memset(p, 0, sizeof(PARAMETER));
7556 	    if(el_data->attribs){
7557 		el_data->cur_attrib->next = p;
7558 		el_data->cur_attrib = p;
7559 	    }
7560 	    else
7561 	      el_data->attribs = el_data->cur_attrib = p;
7562 
7563 	    p->attribute = cpystr(el_data->buf);
7564 	}
7565 
7566     }
7567 
7568     el_data->was_quoted = 0;	/* reset collector buf and state */
7569     el_data->len = 0;
7570     memset(el_data->buf, 0, HTML_BUF_LEN);
7571     return(rv);			/* report whatever happened above */
7572 }
7573 
7574 
7575 /*
7576  * html_element_comment - "Special" comment handling here
7577  */
7578 void
html_element_comment(FILTER_S * f,char * s)7579 html_element_comment(FILTER_S *f, char *s)
7580 {
7581     char *p;
7582 
7583     while(*s && ASCII_ISSPACE((unsigned char) *s))
7584       s++;
7585 
7586     /*
7587      * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7588      */
7589     if(!struncmp(s, "chtml ", 6)){
7590 	s += 6;
7591 	if(!struncmp(s, "if ", 3)){
7592 	    HD(f)->bitbucket = 1;	/* default is failure! */
7593 	    switch(*(s += 3)){
7594 	      case 'P' :
7595 	      case 'p' :
7596 		if(!struncmp(s + 1, "inemode=", 8)){
7597 		    if(!strucmp(s = removing_quotes(s + 9), "function_key")
7598 		       && F_ON(F_USE_FK, ps_global))
7599 		      HD(f)->bitbucket = 0;
7600 		    else if(!strucmp(s, "running"))
7601 		      HD(f)->bitbucket = 0;
7602 #ifdef	_WINDOWS
7603 		    else if(!strucmp(s, "os_windows"))
7604 		      HD(f)->bitbucket = 0;
7605 #endif
7606 		}
7607 
7608 		break;
7609 
7610 	      case '[' :	/* test */
7611 		if((p = strindex(++s, ']')) != NULL){
7612 		    *p = '\0';		/* tie off test string */
7613 		    removing_leading_white_space(s);
7614 		    removing_trailing_white_space(s);
7615 		    if(*s == '-' && *(s+1) == 'r'){ /* readable file? */
7616 			for(s += 2; *s && ASCII_ISSPACE((unsigned char) *s); s++)
7617 			  ;
7618 
7619 
7620 			HD(f)->bitbucket = (can_access(CHTML_VAR_EXPAND(removing_quotes(s)),
7621 						       READ_ACCESS) != 0);
7622 		    }
7623 		}
7624 
7625 		break;
7626 
7627 	      default :
7628 		break;
7629 	    }
7630 	}
7631 	else if(!strucmp(s, "else")){
7632 	    HD(f)->bitbucket = !HD(f)->bitbucket;
7633 	}
7634 	else if(!strucmp(s, "endif")){
7635 	    /* Clean up after chtml here */
7636 	    HD(f)->bitbucket = 0;
7637 	}
7638     }
7639     else if(!HD(f)->bitbucket){
7640 	if(!struncmp(s, "#include ", 9)){
7641 	    char  buf[MAILTMPLEN], *bufp;
7642 	    int   len, end_of_line;
7643 	    FILE *fp;
7644 
7645 	    /* Include the named file */
7646 	    if(!struncmp(s += 9, "file=", 5)
7647 	       && (fp = our_fopen(CHTML_VAR_EXPAND(removing_quotes(s+5)), "r"))){
7648 		html_element_output(f, HTML_NEWLINE);
7649 
7650 		while(fgets(buf, sizeof(buf), fp)){
7651 		    if((len = strlen(buf)) && buf[len-1] == '\n'){
7652 			end_of_line = 1;
7653 			buf[--len]  = '\0';
7654 		    }
7655 		    else
7656 		      end_of_line = 0;
7657 
7658 		    for(bufp = buf; len; bufp++, len--)
7659 		      html_element_output(f, (int) *bufp);
7660 
7661 		    if(end_of_line)
7662 		      html_element_output(f, HTML_NEWLINE);
7663 		}
7664 
7665 		fclose(fp);
7666 		html_element_output(f, HTML_NEWLINE);
7667 		HD(f)->blanks = 0;
7668 		if(f->f1 == WSPACE)
7669 		  f->f1 = DFL;
7670 	    }
7671 	}
7672 	else if(!struncmp(s, "#echo ", 6)){
7673 	    if(!struncmp(s += 6, "var=", 4)){
7674 		char	*p, buf[MAILTMPLEN];
7675 		ADDRESS *adr;
7676 		extern char datestamp[];
7677 
7678 		if(!strcmp(s = removing_quotes(s + 4), "ALPINE_VERSION")){
7679 		    p = ALPINE_VERSION;
7680 		}
7681 		else if(!strcmp(s, "ALPINE_REVISION")){
7682 		    p = get_alpine_revision_string(buf, sizeof(buf));
7683 		}
7684 		else if(!strcmp(s, "C_CLIENT_VERSION")){
7685 		    p = CCLIENTVERSION;
7686 		}
7687 		else if(!strcmp(s, "ALPINE_COMPILE_DATE")){
7688 		    p = datestamp;
7689 		}
7690 		else if(!strcmp(s, "ALPINE_TODAYS_DATE")){
7691 		    rfc822_date(p = buf);
7692 		}
7693 		else if(!strcmp(s, "_LOCAL_FULLNAME_")){
7694 		    p = (ps_global->VAR_LOCAL_FULLNAME
7695 			 && ps_global->VAR_LOCAL_FULLNAME[0])
7696 			    ? ps_global->VAR_LOCAL_FULLNAME
7697 			    : "Local Support";
7698 		}
7699 		else if(!strcmp(s, "_LOCAL_ADDRESS_")){
7700 		    p = (ps_global->VAR_LOCAL_ADDRESS
7701 			 && ps_global->VAR_LOCAL_ADDRESS[0])
7702 			   ? ps_global->VAR_LOCAL_ADDRESS
7703 			   : "postmaster";
7704 		    adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7705 		    snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7706 		    mail_free_address(&adr);
7707 		}
7708 		else if(!strcmp(s, "_BUGS_FULLNAME_")){
7709 		    p = (ps_global->VAR_BUGS_FULLNAME
7710 			 && ps_global->VAR_BUGS_FULLNAME[0])
7711 			    ? ps_global->VAR_BUGS_FULLNAME
7712 			    : "Place to report Alpine Bugs";
7713 		}
7714 		else if(!strcmp(s, "_BUGS_ADDRESS_")){
7715 		    p = (ps_global->VAR_BUGS_ADDRESS
7716 			 && ps_global->VAR_BUGS_ADDRESS[0])
7717 			    ? ps_global->VAR_BUGS_ADDRESS : "postmaster";
7718 		    adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7719 		    snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7720 		    mail_free_address(&adr);
7721 		}
7722 		else if(!strcmp(s, "CURRENT_DIR")){
7723 		    getcwd(p = buf, sizeof(buf));
7724 		}
7725 		else if(!strcmp(s, "HOME_DIR")){
7726 		    p = ps_global->home_dir;
7727 		}
7728 		else if(!strcmp(s, "PINE_CONF_PATH")){
7729 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7730 		    p = "/usr/local/lib/pine.conf";
7731 #else
7732 		    p = SYSTEM_PINERC;
7733 #endif
7734 		}
7735 		else if(!strcmp(s, "PINE_CONF_FIXED_PATH")){
7736 #ifdef SYSTEM_PINERC_FIXED
7737 		    p = SYSTEM_PINERC_FIXED;
7738 #else
7739 		    p = "/usr/local/lib/pine.conf.fixed";
7740 #endif
7741 		}
7742 		else if(!strcmp(s, "PINE_INFO_PATH")){
7743 		    p = SYSTEM_PINE_INFO_PATH;
7744 		}
7745 		else if(!strcmp(s, "MAIL_SPOOL_PATH")){
7746 		    p = sysinbox();
7747 		}
7748 		else if(!strcmp(s, "MAIL_SPOOL_LOCK_PATH")){
7749 		    /* Don't put the leading /tmp/. */
7750 		    int i, j;
7751 
7752 		    p = sysinbox();
7753 		    if(p){
7754 			for(j = 0, i = 0; p[i] && j < MAILTMPLEN - 1; i++){
7755 			    if(p[i] == '/')
7756 				buf[j++] = '\\';
7757 			    else
7758 			      buf[j++] = p[i];
7759 			}
7760 			buf[j++] = '\0';
7761 			p = buf;
7762 		    }
7763 		}
7764 		else if(!struncmp(s, "VAR_", 4)){
7765 		    p = s+4;
7766 		    if(pith_opt_pretty_var_name)
7767 		      p = (*pith_opt_pretty_var_name)(p);
7768 		}
7769 		else if(!struncmp(s, "FEAT_", 5)){
7770 		    p = s+5;
7771 		    if(pith_opt_pretty_feature_name)
7772 		      p = (*pith_opt_pretty_feature_name)(p, -1);
7773 		}
7774 		else
7775 		  p = NULL;
7776 
7777 		if(p){
7778 		    if(f->f1 == WSPACE){
7779 			html_element_output(f, ' ');
7780 			f->f1 = DFL;			/* clear it */
7781 		    }
7782 
7783 		    while(*p)
7784 		      html_element_output(f, (int) *p++);
7785 		}
7786 	    }
7787 	}
7788     }
7789 }
7790 
7791 
7792 void
html_element_output(FILTER_S * f,int ch)7793 html_element_output(FILTER_S *f, int ch)
7794 {
7795     if(HANDLERS(f))
7796       (*EL(HANDLERS(f))->handler)(HANDLERS(f), ch, GF_DATA);
7797     else
7798       html_output(f, ch);
7799 }
7800 
7801 /*
7802  * collect html entity and return its UCS value when done.
7803  *
7804  * Returns HTML_MOREDATA : we need more data
7805  *	   HTML_ENTITY	 : entity collected
7806  *	   HTML_BADVALUE : good data, but no named match or out of range
7807  *	   HTML_BADDATA  : invalid input
7808  *
7809  * NOTES:
7810  *  - entity format is "'&' tag ';'" and represents a literal char
7811  *  - named entities are CASE SENSITIVE.
7812  *  - numeric char references (where the tag is prefixed with a '#')
7813  *    are a char with that numbers value
7814  *  - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7815  */
7816 int
html_entity_collector(FILTER_S * f,int ch,UCS * ucs,char ** alt)7817 html_entity_collector(FILTER_S *f, int ch, UCS *ucs, char **alt)
7818 {
7819     static int  len = 0;
7820     static char buf[MAX_ENTITY+2];
7821     int		rv, i;
7822 
7823     if(len == MAX_ENTITY){
7824 	rv = HTML_BADDATA;
7825     }
7826     else if((len == 0)
7827 	      ? (isalpha((unsigned char) ch) || ch == '#')
7828 	      : ((isdigit((unsigned char) ch)
7829 		  || (len == 1 && (unsigned char) ch == 'x')
7830 		  || (len == 1 &&(unsigned char) ch == 'X')
7831 		  || (len > 1 && isxdigit((unsigned char) ch))
7832 		  || (isalpha((unsigned char) ch) && buf[0] != '#')))){
7833 	buf[len++] = ch;
7834 	return(HTML_MOREDATA);
7835     }
7836     else if(ch == ';' || ASCII_ISSPACE((unsigned char) ch)){
7837 	buf[len] = '\0';		/* got something! */
7838 	if(buf[0] == '#'){
7839 	    if(buf[1] == 'x' || buf[1] == 'X')
7840 	       *ucs = (UCS) strtoul(&buf[2], NULL, 16);
7841 	    else
7842 	       *ucs = (UCS) strtoul(&buf[1], NULL, 10);
7843 
7844 	    if(alt){
7845 		*alt = NULL;
7846 		for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7847 		   if(entity_tab[i].value == *ucs){
7848 		     *alt = entity_tab[i].plain;
7849 		      break;
7850 		   }
7851 	    }
7852 
7853 	    len = 0;
7854 	    return(HTML_ENTITY);
7855 	}
7856 	else{
7857 	    rv = HTML_BADVALUE;		/* in case of no match */
7858 	    for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7859 	      if(strcmp(entity_tab[i].name, buf) == 0){
7860 		  *ucs = entity_tab[i].value;
7861 		  if(alt)
7862 		    *alt = entity_tab[i].plain;
7863 
7864 		  len = 0;
7865 		  return(HTML_ENTITY);
7866 	      }
7867 	}
7868     }
7869     else
7870       rv = HTML_BADDATA;		/* bogus input! */
7871 
7872     if(alt){
7873 	buf[len]   = '\0';
7874 	*alt	   = buf;
7875     }
7876 
7877     len = 0;
7878     return(rv);
7879 }
7880 
7881 
7882 /*----------------------------------------------------------------------
7883   HTML text to plain text filter
7884 
7885   This basically tries to do the best it can with HTML 2.0 (RFC1866)
7886   with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7887   formatting.
7888 
7889  ----*/
7890 void
gf_html2plain(FILTER_S * f,int flg)7891 gf_html2plain(FILTER_S *f, int flg)
7892 {
7893 /* BUG: quote incoming \255 values (see "yuml" above!) */
7894     if(flg == GF_DATA){
7895 	register int c;
7896 	GF_INIT(f, f->next);
7897 
7898 	if(!HTML_WROTE(f)){
7899 	    int ii;
7900 
7901 	    for(ii = HTML_INDENT(f); ii > 0; ii--)
7902 	      html_putc(f, ' ');
7903 
7904 	    HTML_WROTE(f) = 1;
7905 	}
7906 
7907 	while(GF_GETC(f, c)){
7908 	    /*
7909 	     * First we have to collect any literal entities...
7910 	     * that is, IF we're not already collecting one
7911 	     * AND we're not in element's text or, if we are, we're
7912 	     * not in quoted text.  Whew.
7913 	     */
7914 	    if(f->t){
7915 		char *alt = NULL;
7916 		UCS   ucs;
7917 
7918 		switch(html_entity_collector(f, c, &ucs, &alt)){
7919 		  case HTML_MOREDATA:	/* more data required? */
7920 		    continue;		/* go get another char */
7921 
7922 		  case HTML_BADVALUE :
7923 		  case HTML_BADDATA :
7924 		    /* if supplied, process bogus data */
7925 		    HTML_PROC(f, '&');
7926 		    for(; *alt; alt++){
7927 			unsigned int uic = *alt;
7928 			HTML_PROC(f, uic);
7929 		    }
7930 
7931 		    if(c == '&' && !HD(f)->quoted){
7932 			f->t = '&';
7933 			continue;
7934 		    }
7935 		    else
7936 		      f->t = 0;		/* don't come back next time */
7937 
7938 		    break;
7939 
7940 		  default :		/* thing to process */
7941 		    f->t = 0;		/* don't come back */
7942 
7943 		    /*
7944 		     * do something with UCS codepoint.  If it's
7945 		     * not displayable then use the alt version
7946 		     * otherwise
7947 		     * cvt UCS to UTF-8 and toss into next filter.
7948 		     */
7949 		    if(ucs > 127 && wcellwidth(ucs) < 0){
7950 			if(alt){
7951 			    for(; *alt; alt++){
7952 				c = MAKE_LITERAL(*alt);
7953 				HTML_PROC(f, c);
7954 			    }
7955 
7956 			    continue;
7957 			}
7958 			else
7959 			  c = MAKE_LITERAL('?');
7960 		    }
7961 		    else{
7962 			unsigned char utf8buf[8], *p1, *p2;
7963 
7964 			p2 = utf8_put(p1 = (unsigned char *) utf8buf, (unsigned long) ucs);
7965 			for(; p1 < p2; p1++){
7966 			    c = MAKE_LITERAL(*p1);
7967 			    HTML_PROC(f, c);
7968 			}
7969 
7970 			continue;
7971 		    }
7972 
7973 		    break;
7974 		}
7975 	    }
7976 	    else if(!PASS_HTML(f) && c == '&' && !HD(f)->quoted){
7977 		f->t = '&';
7978 		continue;
7979 	    }
7980 
7981 	    /*
7982 	     * then we process whatever we got...
7983 	     */
7984 
7985 	    HTML_PROC(f, c);
7986 	}
7987 
7988 	GF_OP_END(f);			/* clean up our input pointers */
7989     }
7990     else if(flg == GF_EOD){
7991 	while(HANDLERS(f)){
7992 	    dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f))->element));
7993 	    html_pop(f, EL(HANDLERS(f)));
7994 	}
7995 
7996 	html_output(f, HTML_NEWLINE);
7997 	if(ULINE_BIT(f))
7998 	  HTML_ULINE(f, ULINE_BIT(f) = 0);
7999 
8000 	if(BOLD_BIT(f))
8001 	  HTML_BOLD(f, BOLD_BIT(f) = 0);
8002 
8003 	HTML_FLUSH(f);
8004 	fs_give((void **)&f->line);
8005 	if(HD(f)->color)
8006 	  free_color_pair(&HD(f)->color);
8007 
8008 	fs_give(&f->data);
8009 	if(f->opt){
8010 	    if(((HTML_OPT_S *)f->opt)->base)
8011 	      fs_give((void **) &((HTML_OPT_S *)f->opt)->base);
8012 
8013 	    fs_give(&f->opt);
8014 	}
8015 
8016 	(*f->next->f)(f->next, GF_DATA);
8017 	(*f->next->f)(f->next, GF_EOD);
8018     }
8019     else if(flg == GF_RESET){
8020 	dprint((9, "-- gf_reset html2plain\n"));
8021 	f->data  = (HTML_DATA_S *) fs_get(sizeof(HTML_DATA_S));
8022 	memset(f->data, 0, sizeof(HTML_DATA_S));
8023 	/* start with flowing text */
8024 	HD(f)->wrapstate = !PASS_HTML(f);
8025 	HD(f)->wrapcol   = WRAP_COLS(f);
8026 	f->f1    = DFL;			/* state */
8027 	f->f2    = 0;			/* chars in wrap buffer */
8028 	f->n     = 0L;			/* chars on line so far */
8029 	f->linep = f->line = (char *)fs_get(HTML_BUF_LEN * sizeof(char));
8030 	HD(f)->line_bufsize = HTML_BUF_LEN; /* initial bufsize of line */
8031 	HD(f)->alt_entity =  (!ps_global->display_charmap
8032 			      || strucmp(ps_global->display_charmap, "iso-8859-1"));
8033 	HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8034     }
8035 }
8036 
8037 
8038 
8039 /*
8040  * html_indent - do the requested indent level function with appropriate
8041  *		 flushing and such.
8042  *
8043  *   Returns: indent level prior to set/increment
8044  */
8045 int
html_indent(FILTER_S * f,int val,int func)8046 html_indent(FILTER_S *f, int val, int func)
8047 {
8048     int old = HD(f)->indent_level;
8049 
8050     /* flush pending data at old indent level */
8051     switch(func){
8052       case HTML_ID_INC :
8053 	html_output_flush(f);
8054 	if((HD(f)->indent_level += val) < 0)
8055 	  HD(f)->indent_level = 0;
8056 
8057 	break;
8058 
8059       case HTML_ID_SET :
8060 	html_output_flush(f);
8061 	HD(f)->indent_level = val;
8062 	break;
8063 
8064       default :
8065 	break;
8066     }
8067 
8068     return(old);
8069 }
8070 
8071 
8072 
8073 /*
8074  * html_blanks - Insert n blank lines into output
8075  */
8076 void
html_blank(FILTER_S * f,int n)8077 html_blank(FILTER_S *f, int n)
8078 {
8079     /* Cap off any flowing text, and then write blank lines */
8080     if(f->f2 || f->n || CENTER_BIT(f) || HD(f)->centered || WRAPPED_LEN(f))
8081       html_output(f, HTML_NEWLINE);
8082 
8083     if(HD(f)->wrapstate)
8084       while(HD(f)->blanks < n)	/* blanks inc'd by HTML_NEWLINE */
8085 	html_output(f, HTML_NEWLINE);
8086 }
8087 
8088 
8089 
8090 /*
8091  *  html_newline -- insert a newline mindful of embedded tags
8092  */
8093 void
html_newline(FILTER_S * f)8094 html_newline(FILTER_S *f)
8095 {
8096     html_write_newline(f);		/* commit an actual newline */
8097 
8098     if(f->n){				/* and keep track of blank lines */
8099 	HD(f)->blanks = 0;
8100 	f->n = 0L;
8101     }
8102     else
8103       HD(f)->blanks++;
8104 }
8105 
8106 
8107 /*
8108  * output the given char, handling any requested wrapping.
8109  * It's understood that all whitespace handed us is written.  In other
8110  * words, junk whitespace is weeded out before it's given to us here.
8111  *
8112  */
8113 void
html_output(FILTER_S * f,int ch)8114 html_output(FILTER_S *f, int ch)
8115 {
8116     UCS uc;
8117     int width;
8118     void (*o_f)(FILTER_S *, int, int, int) = CENTER_BIT(f) ? html_output_centered : html_output_normal;
8119 
8120     /*
8121      * if ch is a control token, just pass it on, else, collect
8122      * utf8-encoded characters to determine width,then feed into
8123      * output routines
8124      */
8125     if(ch == TAG_EMBED || HD(f)->embedded.state || (ch > 0xff && IS_LITERAL(ch) == 0)){
8126 	(*o_f)(f, ch, 1, 0);
8127     }
8128     else if(utf8_to_ucs4_oneatatime(ch & 0xff, &(HD(f)->cb), &uc, &width)){
8129 	unsigned char *cp;
8130 
8131 	for(cp = HD(f)->cb.cbuf; cp <= HD(f)->cb.cbufend; cp++){
8132 	    (*o_f)(f, *cp, width, HD(f)->cb.cbufend - cp);
8133 	    width = 0;		/* only count it once */
8134 	}
8135 
8136 	HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8137     }
8138     else
8139       HD(f)->cb.cbufend = HD(f)->cb.cbufp;
8140     /* else do nothing until we have a full character */
8141 }
8142 
8143 
8144 void
html_output_string(FILTER_S * f,char * s)8145 html_output_string(FILTER_S *f, char *s)
8146 {
8147     for(; *s; s++)
8148       html_output(f, *s);
8149 }
8150 
8151 
8152 void
html_output_raw_tag(FILTER_S * f,char * tag)8153 html_output_raw_tag(FILTER_S *f, char *tag)
8154 {
8155     PARAMETER *p;
8156     char      *vp;
8157     int	       i;
8158 
8159     html_output(f, '<');
8160     html_output_string(f, tag);
8161     for(p = HD(f)->el_data->attribs;
8162 	p && p->attribute;
8163 	p = p->next){
8164 	/* SECURITY: no javascript */
8165 	/* PRIVACY: no img src without permission */
8166 	/* BUGS: no class collisions since <head> ignored */
8167 	if(html_event_attribute(p->attribute)
8168 	   || !strucmp(p->attribute, "class")
8169 	   || (!PASS_IMAGES(f) && !strucmp(tag, "img") && !strucmp(p->attribute, "src")))
8170 	  continue;
8171 
8172 	/* PRIVACY: sniff out background images */
8173 	if(p->value && !PASS_IMAGES(f)){
8174 	    if(!strucmp(p->attribute, "style")){
8175 		if((vp = srchstr(p->value, "background-image")) != NULL){
8176 		    /* neuter in place */
8177 		    vp[11] = vp[12] = vp[13] = vp[14] = vp[15] = 'X';
8178 		}
8179 		else{
8180 		    for(vp = p->value; (vp = srchstr(vp, "background")) != NULL; vp++)
8181 		      if(vp[10] == ' ' || vp[10] == ':')
8182 			for(i = 11; vp[i] && vp[i] != ';'; i++)
8183 			  if((vp[i] == 'u' && vp[i+1] == 'r' && vp[i+2] == 'l' && vp[i+3] == '(')
8184 			     || vp[i] == ':' || vp[i] == '/' || vp[i] == '.')
8185 			    vp[0] = 'X';
8186 		}
8187 	    }
8188 	    else if(!strucmp(p->attribute, "background")){
8189 		char *ip;
8190 
8191 		for(ip = p->value; *ip && !(*ip == ':' || *ip == '/' || *ip == '.'); ip++)
8192 		  ;
8193 
8194 		if(ip)
8195 		  continue;
8196 	    }
8197 	}
8198 
8199 	html_output(f, ' ');
8200 	html_output_string(f, p->attribute);
8201 	if(p->value){
8202 	    html_output(f, '=');
8203 	    html_output(f, '\"');
8204 	    html_output_string(f, p->value);
8205 	    html_output(f, '\"');
8206 	}
8207     }
8208 
8209     /* append warning to form submission */
8210     if(!strucmp(tag, "form")){
8211 	html_output_string(f, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8212     }
8213 
8214     if(ED(f)->end_tag){
8215 	html_output(f, ' ');
8216 	html_output(f, '/');
8217     }
8218 
8219     html_output(f, '>');
8220 }
8221 
8222 
8223 int
html_event_attribute(char * attr)8224 html_event_attribute(char *attr)
8225 {
8226     int i;
8227     static char *events[] = {
8228 	"onabort",     "onblur",      "onchange",   "onclick",     "ondblclick", "ondragdrop",
8229 	"onerror",     "onfocus",     "onkeydown",  "onkeypress",  "onkeyup",    "onload",
8230 	"onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup",  "onmove",
8231 	"onreset",     "onresize",    "onselec",    "onsubmit",    "onunload"
8232     };
8233 
8234     if((attr[0] == 'o' || attr[0] == 'O') && (attr[1] == 'n' || attr[1] == 'N'))
8235       for(i = 0; i < sizeof(events)/sizeof(events[0]); i++)
8236 	if(!strucmp(attr, events[i]))
8237 	  return(TRUE);
8238 
8239     return(FALSE);
8240 }
8241 
8242 
8243 void
html_output_normal(FILTER_S * f,int ch,int width,int remaining)8244 html_output_normal(FILTER_S *f, int ch, int width, int remaining)
8245 {
8246     static int written = 0;
8247     static int cwidth;
8248 
8249     if(HD(f)->centered){
8250 	html_centered_flush(f);
8251 	fs_give((void **) &HD(f)->centered->line.buf);
8252 	fs_give((void **) &HD(f)->centered->word.buf);
8253 	fs_give((void **) &HD(f)->centered);
8254     }
8255 
8256     if(HD(f)->wrapstate){
8257 	if(ch == HTML_NEWLINE){		/* hard newline */
8258 	    html_output_flush(f);
8259 	    html_newline(f);
8260 	}
8261 	else
8262 	  HD(f)->blanks = 0;		/* reset blank line counter */
8263 
8264 	if(ch == TAG_EMBED){	/* takes up no space */
8265 	    HD(f)->embedded.state = -5;
8266 	    HTML_LINEP_PUTC(f, TAG_EMBED);
8267 	}
8268 	else if(HD(f)->embedded.state){	/* ditto */
8269 	    if(HD(f)->embedded.state == -5){
8270 		/* looking for specially handled tags following TAG_EMBED */
8271 		if(ch == TAG_HANDLE)
8272 		  HD(f)->embedded.state = -1;	/* next ch is length */
8273 		else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8274 		    if(!HD(f)->color)
8275 		      HD(f)->color = new_color_pair(NULL, NULL);
8276 
8277 		    if(ch == TAG_FGCOLOR)
8278 		      HD(f)->embedded.color = HD(f)->color->fg;
8279 		    else
8280 		      HD(f)->embedded.color = HD(f)->color->bg;
8281 
8282 		    HD(f)->embedded.state = RGBLEN;
8283 		}
8284 		else
8285 		  HD(f)->embedded.state = 0;	/* non-special */
8286 	    }
8287 	    else if(HD(f)->embedded.state > 0){
8288 		/* collecting up an RGBLEN color or length, ignore tags */
8289 		(HD(f)->embedded.state)--;
8290 		if(HD(f)->embedded.color)
8291 		  *HD(f)->embedded.color++ = ch;
8292 
8293 		if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8294 		    *HD(f)->embedded.color = '\0';
8295 		    HD(f)->embedded.color = NULL;
8296 		}
8297 	    }
8298 	    else if(HD(f)->embedded.state < 0){
8299 		HD(f)->embedded.state = ch;	/* number of embedded chars */
8300 	    }
8301 	    else{
8302 		(HD(f)->embedded.state)--;
8303 		if(HD(f)->embedded.color)
8304 		  *HD(f)->embedded.color++ = ch;
8305 
8306 		if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8307 		    *HD(f)->embedded.color = '\0';
8308 		    HD(f)->embedded.color = NULL;
8309 		}
8310 	    }
8311 
8312 	    HTML_LINEP_PUTC(f, ch);
8313 	}
8314 	else if(HTML_ISSPACE(ch)){
8315 	    html_output_flush(f);
8316 	}
8317 	else{
8318 	    if(HD(f)->prefix)
8319 	      html_a_prefix(f);
8320 
8321 	    if(written == 0)
8322 	      cwidth = width;
8323 
8324 	    if(f->f2 + cwidth + 1 >= WRAP_COLS(f)){
8325 		HTML_LINEP_PUTC(f, ch & 0xff);
8326 		written++;
8327 		if(remaining == 0){
8328 		  HTML_FLUSH(f);
8329 		  html_newline(f);
8330 		}
8331 		if(HD(f)->in_anchor)
8332 		  html_write_anchor(f, HD(f)->in_anchor);
8333 	    }
8334 	    else{
8335 	      HTML_LINEP_PUTC(f, ch & 0xff);
8336 	      written++;
8337 	    }
8338 
8339 	    if(remaining == 0){
8340 	      written = 0;
8341 	      f->f2 += cwidth;
8342 	    }
8343 	}
8344     }
8345     else{
8346 	if(HD(f)->prefix)
8347 	  html_a_prefix(f);
8348 
8349 	html_output_flush(f);
8350 
8351 	switch(HD(f)->embedded.state){
8352 	  case 0 :
8353 	    switch(ch){
8354 	      default :
8355 		/*
8356 		 * It's difficult to both preserve whitespace and wrap at the
8357 		 * same time so we'll do a dumb wrap at the edge of the screen.
8358 		 * Since this shouldn't come up much in real life we'll hope
8359 		 * it is good enough.
8360 		 */
8361 		if(!PASS_HTML(f) && (f->n + width) > WRAP_COLS(f))
8362 		  html_newline(f);
8363 
8364 		f->n += width;			/* inc displayed char count */
8365 		HD(f)->blanks = 0;		/* reset blank line counter */
8366 		html_putc(f, ch & 0xff);
8367 		break;
8368 
8369 	      case TAG_EMBED :	/* takes up no space */
8370 		html_putc(f, TAG_EMBED);
8371 		HD(f)->embedded.state = -2;
8372 		break;
8373 
8374 	      case HTML_NEWLINE :		/* newline handling */
8375 		if(!f->n)
8376 		  break;
8377 
8378 	      case '\n' :
8379 		html_newline(f);
8380 
8381 	      case '\r' :
8382 		break;
8383 	    }
8384 
8385 	    break;
8386 
8387 	  case -2 :
8388 	    HD(f)->embedded.state = 0;
8389 	    switch(ch){
8390 	      case TAG_HANDLE :
8391 		HD(f)->embedded.state = -1;	/* next ch is length */
8392 		break;
8393 
8394 	      case TAG_BOLDON :
8395 		BOLD_BIT(f) = 1;
8396 		break;
8397 
8398 	      case TAG_BOLDOFF :
8399 		BOLD_BIT(f) = 0;
8400 		break;
8401 
8402 	      case TAG_ULINEON :
8403 		ULINE_BIT(f) = 1;
8404 		break;
8405 
8406 	      case TAG_ULINEOFF :
8407 		ULINE_BIT(f) = 0;
8408 		break;
8409 
8410 	      case TAG_FGCOLOR :
8411 		if(!HD(f)->color)
8412 		  HD(f)->color = new_color_pair(NULL, NULL);
8413 
8414 		HD(f)->embedded.color = HD(f)->color->fg;
8415 		HD(f)->embedded.state = 11;
8416 		break;
8417 
8418 	      case TAG_BGCOLOR :
8419 		if(!HD(f)->color)
8420 		  HD(f)->color = new_color_pair(NULL, NULL);
8421 
8422 		HD(f)->embedded.color = HD(f)->color->bg;
8423 		HD(f)->embedded.state = 11;
8424 		break;
8425 
8426 	      case TAG_HANDLEOFF :
8427 		ch = TAG_INVOFF;
8428 		HD(f)->in_anchor = 0;
8429 		break;
8430 
8431 	      default :
8432 		break;
8433 	    }
8434 
8435 	    html_putc(f, ch);
8436 	    break;
8437 
8438 	  case -1 :
8439 	    HD(f)->embedded.state = ch;	/* number of embedded chars */
8440 	    html_putc(f, ch);
8441 	    break;
8442 
8443 	  default :
8444 	    HD(f)->embedded.state--;
8445 	    if(HD(f)->embedded.color)
8446 	      *HD(f)->embedded.color++ = ch;
8447 
8448 	    if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8449 		*HD(f)->embedded.color = '\0';
8450 		HD(f)->embedded.color = NULL;
8451 	    }
8452 
8453 	    html_putc(f, ch);
8454 	    break;
8455 	}
8456     }
8457 }
8458 
8459 
8460 /*
8461  * flush any buffered chars waiting for wrapping.
8462  */
8463 void
html_output_flush(FILTER_S * f)8464 html_output_flush(FILTER_S *f)
8465 {
8466     if(f->f2){
8467 	if(f->n && ((int) f->n) + 1 + f->f2 > HD(f)->wrapcol)
8468 	  html_newline(f);		/* wrap? */
8469 
8470 	if(f->n){			/* text already on the line? */
8471 	    html_putc(f, ' ');
8472 	    f->n++;			/* increment count */
8473 	}
8474 	else{
8475 	    /* write at start of new line */
8476 	    html_write_indent(f, HD(f)->indent_level);
8477 
8478 	    if(HD(f)->in_anchor)
8479 	      html_write_anchor(f, HD(f)->in_anchor);
8480 	}
8481 
8482 	f->n += f->f2;
8483 	HTML_FLUSH(f);
8484     }
8485 }
8486 
8487 
8488 
8489 /*
8490  * html_output_centered - managed writing centered text
8491  */
8492 void
html_output_centered(FILTER_S * f,int ch,int width,int remaining)8493 html_output_centered(FILTER_S *f, int ch, int width, int remaining)
8494 {
8495     static int written;
8496     static int cwidth;
8497 
8498     if(!HD(f)->centered){		/* new text? */
8499 	html_output_flush(f);
8500 	if(f->n)			/* start on blank line */
8501 	  html_newline(f);
8502 
8503 	HD(f)->centered = (CENTER_S *) fs_get(sizeof(CENTER_S));
8504 	memset(HD(f)->centered, 0, sizeof(CENTER_S));
8505 	/* and grab a buf to start collecting centered text */
8506 	HD(f)->centered->line.len  = WRAP_COLS(f);
8507 	HD(f)->centered->line.buf  = (char *) fs_get(HD(f)->centered->line.len
8508 							      * sizeof(char));
8509 	HD(f)->centered->line.used = HD(f)->centered->line.width = 0;
8510 	HD(f)->centered->word.len  = 32;
8511 	HD(f)->centered->word.buf  = (char *) fs_get(HD(f)->centered->word.len
8512 							       * sizeof(char));
8513 	HD(f)->centered->word.used = HD(f)->centered->word.width = 0;
8514     }
8515 
8516     if(ch == HTML_NEWLINE){		/* hard newline */
8517 	html_centered_flush(f);
8518     }
8519     else if(ch == TAG_EMBED){		/* takes up no space */
8520 	HD(f)->embedded.state = -5;
8521 	html_centered_putc(&HD(f)->centered->word, TAG_EMBED);
8522     }
8523     else if(HD(f)->embedded.state){
8524 	if(HD(f)->embedded.state == -5){
8525 	    /* looking for specially handled tags following TAG_EMBED */
8526 	    if(ch == TAG_HANDLE)
8527 	      HD(f)->embedded.state = -1;	/* next ch is length */
8528 	    else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8529 		if(!HD(f)->color)
8530 		  HD(f)->color = new_color_pair(NULL, NULL);
8531 
8532 		if(ch == TAG_FGCOLOR)
8533 		  HD(f)->embedded.color = HD(f)->color->fg;
8534 		else
8535 		  HD(f)->embedded.color = HD(f)->color->bg;
8536 
8537 		HD(f)->embedded.state = RGBLEN;
8538 	    }
8539 	    else
8540 		  HD(f)->embedded.state = 0;	/* non-special */
8541 	}
8542 	else if(HD(f)->embedded.state > 0){
8543 	    /* collecting up an RGBLEN color or length, ignore tags */
8544 	    (HD(f)->embedded.state)--;
8545 	    if(HD(f)->embedded.color)
8546 	      *HD(f)->embedded.color++ = ch;
8547 
8548 	    if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8549 		*HD(f)->embedded.color = '\0';
8550 		HD(f)->embedded.color = NULL;
8551 	    }
8552 	}
8553 	else if(HD(f)->embedded.state < 0){
8554 	    HD(f)->embedded.state = ch;	/* number of embedded chars */
8555 	}
8556 	else{
8557 	    (HD(f)->embedded.state)--;
8558 	    if(HD(f)->embedded.color)
8559 	      *HD(f)->embedded.color++ = ch;
8560 
8561 	    if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8562 		*HD(f)->embedded.color = '\0';
8563 		HD(f)->embedded.color = NULL;
8564 	    }
8565 	}
8566 
8567 	html_centered_putc(&HD(f)->centered->word, ch);
8568     }
8569     else if(ASCII_ISSPACE((unsigned char) ch)){
8570 	if(!HD(f)->centered->space++){	/* end of a word? flush! */
8571 	    int i;
8572 
8573 	    if(WRAPPED_LEN(f) > HD(f)->wrapcol){
8574 		html_centered_flush_line(f);
8575 		/* fall thru to put current "word" on blank "line" */
8576 	    }
8577 	    else if(HD(f)->centered->line.width){
8578 		/* put space char between line and appended word */
8579 		html_centered_putc(&HD(f)->centered->line, ' ');
8580 		HD(f)->centered->line.width++;
8581 	    }
8582 
8583 	    for(i = 0; i < HD(f)->centered->word.used; i++)
8584 	      html_centered_putc(&HD(f)->centered->line,
8585 				 HD(f)->centered->word.buf[i]);
8586 
8587 	    HD(f)->centered->line.width += HD(f)->centered->word.width;
8588 	    HD(f)->centered->word.used  = 0;
8589 	    HD(f)->centered->word.width = 0;
8590 	}
8591     }
8592     else{
8593 	if(HD(f)->prefix)
8594 	  html_a_prefix(f);
8595 
8596 	/* ch is start of next word */
8597 	HD(f)->centered->space = 0;
8598 	if(HD(f)->centered->word.width >= WRAP_COLS(f))
8599 	  html_centered_flush(f);
8600 
8601 	html_centered_putc(&HD(f)->centered->word, ch);
8602 
8603 	if(written == 0)
8604 	  cwidth = width;
8605 
8606 	written++;
8607 
8608 	if(remaining == 0){
8609 	   written = 0;
8610 	   HD(f)->centered->word.width += cwidth;
8611 	}
8612     }
8613 }
8614 
8615 
8616 /*
8617  * html_centered_putc -- add given char to given WRAPLINE_S
8618  */
8619 void
html_centered_putc(WRAPLINE_S * wp,int ch)8620 html_centered_putc(WRAPLINE_S *wp, int ch)
8621 {
8622     if(wp->used + 1 >= wp->len){
8623 	wp->len += 64;
8624 	fs_resize((void **) &wp->buf, wp->len * sizeof(char));
8625     }
8626 
8627     wp->buf[wp->used++] = ch;
8628 }
8629 
8630 
8631 
8632 /*
8633  * html_centered_flush - finish writing any pending centered output
8634  */
8635 void
html_centered_flush(FILTER_S * f)8636 html_centered_flush(FILTER_S *f)
8637 {
8638     int i;
8639 
8640     /*
8641      * If word present (what about line?) we need to deal with
8642      * appending it...
8643      */
8644     if(HD(f)->centered->word.width && WRAPPED_LEN(f) > HD(f)->wrapcol)
8645       html_centered_flush_line(f);
8646 
8647     if(WRAPPED_LEN(f)){
8648 	/* figure out how much to indent */
8649 	if((i = (WRAP_COLS(f) - WRAPPED_LEN(f))/2) > 0)
8650 	  html_write_indent(f, i);
8651 
8652 	if(HD(f)->centered->anchor)
8653 	  html_write_anchor(f, HD(f)->centered->anchor);
8654 
8655 	html_centered_handle(&HD(f)->centered->anchor,
8656 			     HD(f)->centered->line.buf,
8657 			     HD(f)->centered->line.used);
8658 	html_write(f, HD(f)->centered->line.buf, HD(f)->centered->line.used);
8659 
8660 	if(HD(f)->centered->word.used){
8661 	    if(HD(f)->centered->line.width)
8662 	      html_putc(f, ' ');
8663 
8664 	    html_centered_handle(&HD(f)->centered->anchor,
8665 				 HD(f)->centered->word.buf,
8666 				 HD(f)->centered->word.used);
8667 	    html_write(f, HD(f)->centered->word.buf,
8668 		       HD(f)->centered->word.used);
8669 	}
8670 
8671 	HD(f)->centered->line.used  = HD(f)->centered->word.used  = 0;
8672 	HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8673     }
8674     else{
8675       if(HD(f)->centered->word.used){
8676 	html_write(f, HD(f)->centered->word.buf,
8677 		   HD(f)->centered->word.used);
8678 	HD(f)->centered->line.used  = HD(f)->centered->word.used  = 0;
8679 	HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8680       }
8681       HD(f)->blanks++;			/* advance the blank line counter */
8682     }
8683 
8684     html_newline(f);			/* finish the line */
8685 }
8686 
8687 
8688 /*
8689  * html_centered_handle - scan the line for embedded handles
8690  */
8691 void
html_centered_handle(int * h,char * line,int len)8692 html_centered_handle(int *h, char *line, int len)
8693 {
8694     int n;
8695 
8696     while(len-- > 0)
8697       if(*line++ == TAG_EMBED && len-- > 0)
8698 	switch(*line++){
8699 	  case TAG_HANDLE :
8700 	    if((n = *line++) >= --len){
8701 		*h = 0;
8702 		len -= n;
8703 		while(n--)
8704 		  *h = (*h * 10) + (*line++ - '0');
8705 	    }
8706 	    break;
8707 
8708 	  case TAG_HANDLEOFF :
8709 	  case TAG_INVOFF :
8710 	    *h = 0;		/* assumption 23,342: inverse off ends tags */
8711 	    break;
8712 
8713 	  default :
8714 	    break;
8715 	}
8716 }
8717 
8718 
8719 
8720 /*
8721  * html_centered_flush_line - flush the centered "line" only
8722  */
8723 void
html_centered_flush_line(FILTER_S * f)8724 html_centered_flush_line(FILTER_S *f)
8725 {
8726     if(HD(f)->centered->line.used){
8727 	int i, j;
8728 
8729 	/* hide "word" from flush */
8730 	i = HD(f)->centered->word.used;
8731 	j = HD(f)->centered->word.width;
8732 	HD(f)->centered->word.used  = 0;
8733 	HD(f)->centered->word.width = 0;
8734 	html_centered_flush(f);
8735 
8736 	HD(f)->centered->word.used  = i;
8737 	HD(f)->centered->word.width = j;
8738     }
8739 }
8740 
8741 
8742 /*
8743  * html_write_indent - write indention mindful of display attributes
8744  */
8745 void
html_write_indent(FILTER_S * f,int indent)8746 html_write_indent(FILTER_S *f, int indent)
8747 {
8748     if(! STRIP(f)){
8749 	if(BOLD_BIT(f)){
8750 	    html_putc(f, TAG_EMBED);
8751 	    html_putc(f, TAG_BOLDOFF);
8752 	}
8753 
8754 	if(ULINE_BIT(f)){
8755 	    html_putc(f, TAG_EMBED);
8756 	    html_putc(f, TAG_ULINEOFF);
8757 	}
8758     }
8759 
8760     f->n = indent;
8761     while(indent-- > 0)
8762       html_putc(f, ' ');		/* indent as needed */
8763 
8764     /*
8765      * Resume any previous embedded state
8766      */
8767     if(! STRIP(f)){
8768 	if(BOLD_BIT(f)){
8769 	    html_putc(f, TAG_EMBED);
8770 	    html_putc(f, TAG_BOLDON);
8771 	}
8772 
8773 	if(ULINE_BIT(f)){
8774 	    html_putc(f, TAG_EMBED);
8775 	    html_putc(f, TAG_ULINEON);
8776 	}
8777     }
8778 }
8779 
8780 
8781 /*
8782  *
8783  */
8784 void
html_write_anchor(FILTER_S * f,int anchor)8785 html_write_anchor(FILTER_S *f, int anchor)
8786 {
8787     char buf[256];
8788     int  i;
8789 
8790     html_putc(f, TAG_EMBED);
8791     html_putc(f, TAG_HANDLE);
8792     snprintf(buf, sizeof(buf), "%d", anchor);
8793     html_putc(f, (int) strlen(buf));
8794 
8795     for(i = 0; buf[i]; i++)
8796       html_putc(f, buf[i]);
8797 }
8798 
8799 
8800 /*
8801  * html_write_newline - write a newline mindful of display attributes
8802  */
8803 void
html_write_newline(FILTER_S * f)8804 html_write_newline(FILTER_S *f)
8805 {
8806     int i;
8807 
8808     if(! STRIP(f)){			/* First tie, off any embedded state */
8809 	if(HD(f)->in_anchor){
8810 	    html_putc(f, TAG_EMBED);
8811 	    html_putc(f, TAG_INVOFF);
8812 	}
8813 
8814 	if(BOLD_BIT(f)){
8815 	    html_putc(f, TAG_EMBED);
8816 	    html_putc(f, TAG_BOLDOFF);
8817 	}
8818 
8819 	if(ULINE_BIT(f)){
8820 	    html_putc(f, TAG_EMBED);
8821 	    html_putc(f, TAG_ULINEOFF);
8822 	}
8823 
8824 	if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8825 	    char        *p;
8826 	    int          i;
8827 
8828 	    p = color_embed(ps_global->VAR_NORM_FORE_COLOR,
8829 			    ps_global->VAR_NORM_BACK_COLOR);
8830 	    for(i = 0; i < 2 * (RGBLEN + 2); i++)
8831 	      html_putc(f, p[i]);
8832 	}
8833     }
8834 
8835     html_write(f, "\015\012", 2);
8836     for(i = HTML_INDENT(f); i > 0; i--)
8837       html_putc(f, ' ');
8838 
8839     if(! STRIP(f)){			/* First tie, off any embedded state */
8840 	if(BOLD_BIT(f)){
8841 	    html_putc(f, TAG_EMBED);
8842 	    html_putc(f, TAG_BOLDON);
8843 	}
8844 
8845 	if(ULINE_BIT(f)){
8846 	    html_putc(f, TAG_EMBED);
8847 	    html_putc(f, TAG_ULINEON);
8848 	}
8849 
8850 	if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8851 	    char        *p, *tfg, *tbg;
8852 	    int          i;
8853 	    COLOR_PAIR  *tmp;
8854 
8855 	    tfg = HD(f)->color->fg;
8856 	    tbg = HD(f)->color->bg;
8857 	    tmp = new_color_pair(tfg[0] ? tfg
8858 	      : color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR),
8859 	      tbg[0] ? tbg
8860 	      : color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR));
8861 	    if(pico_is_good_colorpair(tmp)){
8862 		p = color_embed(tfg[0] ? tfg
8863 				: ps_global->VAR_NORM_FORE_COLOR,
8864 				tbg[0] ? tbg
8865 				: ps_global->VAR_NORM_BACK_COLOR);
8866 		for(i = 0; i < 2 * (RGBLEN + 2); i++)
8867 		  html_putc(f, p[i]);
8868 	    }
8869 
8870 	    if(tmp)
8871 	      free_color_pair(&tmp);
8872 	}
8873     }
8874 }
8875 
8876 
8877 /*
8878  * html_write - write given n-length string to next filter
8879  */
8880 void
html_write(FILTER_S * f,char * s,int n)8881 html_write(FILTER_S *f, char *s, int n)
8882 {
8883     GF_INIT(f, f->next);
8884 
8885     while(n-- > 0){
8886 	/* keep track of attribute state?  Not if last char! */
8887 	if(!STRIP(f) && *s == TAG_EMBED && n-- > 0){
8888 	    GF_PUTC(f->next, TAG_EMBED);
8889 	    switch(*++s){
8890 	      case TAG_BOLDON :
8891 		BOLD_BIT(f) = 1;
8892 		break;
8893 	      case TAG_BOLDOFF :
8894 		BOLD_BIT(f) = 0;
8895 		break;
8896 	      case TAG_ULINEON :
8897 		ULINE_BIT(f) = 1;
8898 		break;
8899 	      case TAG_ULINEOFF :
8900 		ULINE_BIT(f) = 0;
8901 		break;
8902 	      case TAG_HANDLEOFF :
8903 		HD(f)->in_anchor = 0;
8904 		GF_PUTC(f->next, TAG_INVOFF);
8905 		s++;
8906 		continue;
8907 	      case TAG_HANDLE :
8908 		if(n-- > 0){
8909 		    int i = *++s;
8910 
8911 		    GF_PUTC(f->next, TAG_HANDLE);
8912 		    if(i <= n){
8913 			int	  anum = 0;
8914 			HANDLE_S *h;
8915 
8916 			n -= i;
8917 			GF_PUTC(f->next, i);
8918 			while(1){
8919 			    anum = (anum * 10) + (*++s - '0');
8920 			    if(--i)
8921 			      GF_PUTC(f->next, *s);
8922 			    else
8923 			      break;
8924 			}
8925 
8926 			if(DO_HANDLES(f)
8927 			   && (h = get_handle(*HANDLESP(f), anum)) != NULL
8928 			   && (h->type == URL || h->type == Attach)){
8929 			    HD(f)->in_anchor = anum;
8930 			}
8931 		    }
8932 		}
8933 
8934 		break;
8935 	      default:
8936 		break;
8937 	    }
8938 	}
8939 
8940 	GF_PUTC(f->next, (*s++) & 0xff);
8941     }
8942 
8943     GF_IP_END(f->next);			/* clean up next's input pointers */
8944 }
8945 
8946 
8947 /*
8948  * html_putc -- actual work of writing to next filter.
8949  *		NOTE: Small opt not using full GF_END since our input
8950  *		      pointers don't need adjusting.
8951  */
8952 void
html_putc(FILTER_S * f,int ch)8953 html_putc(FILTER_S *f, int ch)
8954 {
8955     GF_INIT(f, f->next);
8956     GF_PUTC(f->next, ch & 0xff);
8957     GF_IP_END(f->next);			/* clean up next's input pointers */
8958 }
8959 
8960 
8961 
8962 /*
8963  * Only current option is to turn on embedded data stripping for text
8964  * bound to a printer or composer.
8965  */
8966 void *
gf_html2plain_opt(char * base,int columns,int * margin,HANDLE_S ** handlesp,htmlrisk_t risk_f,int flags)8967 gf_html2plain_opt(char *base,
8968 		  int columns,
8969 		  int *margin,
8970 		  HANDLE_S **handlesp,
8971 		  htmlrisk_t risk_f,
8972 		  int flags)
8973 {
8974     HTML_OPT_S *op;
8975     int		margin_l, margin_r;
8976 
8977     op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8978 
8979     op->base	    = cpystr(base);
8980     margin_l	    = (margin) ? margin[0] : 0;
8981     margin_r	    = (margin) ? margin[1] : 0;
8982     op->indent	    = margin_l;
8983     op->columns	    = columns - (margin_l + margin_r);
8984     op->strip	    = ((flags & GFHP_STRIPPED) == GFHP_STRIPPED);
8985     op->handlesp    = handlesp;
8986     op->handles_loc = ((flags & GFHP_LOCAL_HANDLES) == GFHP_LOCAL_HANDLES);
8987     op->showserver  = ((flags & GFHP_SHOW_SERVER) == GFHP_SHOW_SERVER);
8988     op->warnrisk_f  = risk_f;
8989     op->no_relative_links = ((flags & GFHP_NO_RELATIVE) == GFHP_NO_RELATIVE);
8990     op->related_content	  = ((flags & GFHP_RELATED_CONTENT) == GFHP_RELATED_CONTENT);
8991     op->html	    = ((flags & GFHP_HTML) == GFHP_HTML);
8992     op->html_imgs   = ((flags & GFHP_HTML_IMAGES) == GFHP_HTML_IMAGES);
8993     op->element_table = html_element_table;
8994     return((void *) op);
8995 }
8996 
8997 
8998 void *
gf_html2plain_rss_opt(RSS_FEED_S ** feedp,int flags)8999 gf_html2plain_rss_opt(RSS_FEED_S **feedp, int flags)
9000 {
9001     HTML_OPT_S *op;
9002 
9003     op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
9004     memset(op, 0, sizeof(HTML_OPT_S));
9005 
9006     op->base = cpystr("");
9007     op->element_table = rss_element_table;
9008     *(op->feedp = feedp) = NULL;
9009     return((void *) op);
9010 }
9011 
9012 void
gf_html2plain_rss_free(RSS_FEED_S ** feedp)9013 gf_html2plain_rss_free(RSS_FEED_S **feedp)
9014 {
9015     if(feedp && *feedp){
9016 	if((*feedp)->title)
9017 	  fs_give((void **) &(*feedp)->title);
9018 
9019 	if((*feedp)->link)
9020 	  fs_give((void **) &(*feedp)->link);
9021 
9022 	if((*feedp)->description)
9023 	  fs_give((void **) &(*feedp)->description);
9024 
9025 	if((*feedp)->source)
9026 	  fs_give((void **) &(*feedp)->source);
9027 
9028 	if((*feedp)->image)
9029 	  fs_give((void **) &(*feedp)->image);
9030 
9031 	gf_html2plain_rss_free_items(&((*feedp)->items));
9032 	fs_give((void **) feedp);
9033     }
9034 }
9035 
9036 void
gf_html2plain_rss_free_items(RSS_ITEM_S ** itemp)9037 gf_html2plain_rss_free_items(RSS_ITEM_S **itemp)
9038 {
9039     if(itemp && *itemp){
9040 	if((*itemp)->title)
9041 	  fs_give((void **) &(*itemp)->title);
9042 
9043 	if((*itemp)->link)
9044 	  fs_give((void **) &(*itemp)->link);
9045 
9046 	if((*itemp)->description)
9047 	  fs_give((void **) &(*itemp)->description);
9048 
9049 	if((*itemp)->source)
9050 	  fs_give((void **) &(*itemp)->source);
9051 
9052 	gf_html2plain_rss_free_items(&(*itemp)->next);
9053 	fs_give((void **) itemp);
9054     }
9055 }
9056 
9057 char *
cid_tempfile_name(char * line,long n,int * is_cidp)9058 cid_tempfile_name(char *line, long n, int *is_cidp)
9059 {
9060     int f2 = 0;
9061     int i, found;
9062     char *s, *t = NULL, *u, c;
9063     char imgfile[1024];
9064     char *extp = NULL;
9065 
9066     c = line[n];
9067     line[n] = '\0';
9068     s = NULL;
9069     *is_cidp = 0;
9070     if(n > 0){
9071 	if (line[0] == '\"')
9072 	  f2 = 1;
9073 	if (n - f2 > 3){
9074 	   if (!struncmp(line+f2, "cid:", 4)){
9075 	       *is_cidp = 1;
9076 	       f2 += 4;
9077 	       s = fs_get((n - f2 + 4)*sizeof(char));
9078 	       sprintf(s,  "<%s", line+f2);
9079 	       if (s[strlen(s)-1] == '\"')
9080 		  s[strlen(s)-1] = '>';
9081 	       else{
9082 		  i = strlen(s);
9083 		  s[i] = '>';
9084 		  s[i + 1] = '\0';
9085 	       }
9086 	    /* find the tmpdir where all these files will be saved to */
9087 	       if(t == NULL){
9088 		  for(i = 0; ps_global->atmts[i].tmpdir == NULL && ps_global->atmts[i].description != NULL; i++);
9089 		  t = ps_global->atmts[i].description ? ps_global->atmts[i].tmpdir : NULL;
9090 	       }
9091 
9092 	    /* now we need to look for s in the list of attachments */
9093 	       for (i = 0, found = 0; found == 0 && ps_global->atmts[i].description != NULL; i++)
9094 		    if (ps_global->atmts[i].body
9095 			&& ps_global->atmts[i].body->type == TYPEIMAGE
9096 			&& strcmp(ps_global->atmts[i].body->id, s) == 0){
9097 			found++;
9098 			break;
9099 		    }
9100 
9101 	       fs_give((void **) &s);
9102 	       if(found && ps_global->atmts[i].cid_tmpfile == NULL){
9103 		   PARAMETER *param;
9104 		   if (ps_global->atmts[i].cid_tmpfile == NULL){
9105 		      for(param = ps_global->atmts[i].body->parameter; param ; param = param->next){
9106 		          if (!strucmp(param->attribute, "NAME")){
9107 			     strncpy(imgfile, param->value, sizeof(imgfile));
9108 			     imgfile[sizeof(imgfile)-1] = '\0';
9109 			     extp = strrchr(imgfile, '.');
9110 			     if(extp) extp++;
9111 		          }
9112 		      }
9113 		      ps_global->atmts[i].cid_tmpfile = temp_nam_ext(t, "tmp-img-", extp);
9114 		   }
9115 	       }
9116 	       if(found && ps_global->atmts[i].cid_tmpfile != NULL)
9117 		  s = strstr(ps_global->atmts[i].cid_tmpfile, "tmp-img-");
9118 	   }
9119 	}
9120     }
9121     line[n] = c;
9122     return s;
9123 }
9124 
9125 #define COLLECT(X, C) {						\
9126 	if((X)->n == buflen){					\
9127 	   fs_resize((void **) &((X)->line), buflen + 1024);	\
9128 	   (X)->linep = (X)->line + buflen;			\
9129 	   buflen += 1024;					\
9130 	}							\
9131 	*((X)->linep)++ = (C);					\
9132 	(X)->n = (X)->linep - (X)->line;			\
9133 }
9134 
9135 #define RESET_FILTER(X) { 					\
9136 	(X)->linep = (X)->line;					\
9137 	(X)->n = 0L;						\
9138 }
9139 
9140 void
gf_html_cid2file(FILTER_S * f,int cmd)9141 gf_html_cid2file(FILTER_S *f, int cmd)
9142 {
9143     register char *p;
9144     register unsigned char c;
9145     static long buflen = 0L;
9146 
9147     GF_INIT(f, f->next);
9148 
9149     if(cmd == GF_DATA){
9150         register int state = f->f1;
9151 
9152 	while(GF_GETC(f, c)){
9153 
9154 	    if(state == 0){	/* look for "<img " */
9155 	       if (c == '<') f->f2 = 1;
9156 	       else if(f->f2 > 0){
9157 		   if (f->f2 == 1 && (c == 'i' || c == 'I')) f->f2 = 2;
9158 		   else if (f->f2 == 2 && (c == 'm' || c == 'M')) f->f2 = 3;
9159 		   else if (f->f2 == 3 && (c == 'g' || c == 'G')) f->f2 = 4;
9160 		   else if (f->f2 == 4 && ASCII_ISSPACE(c)){ f->f2 = 0; state = 1; }
9161 		   else f->f2 = 0;
9162 	       }
9163 	    }
9164 	    else if(state == 1){	/* look for "src=" */
9165 		    if (c == 's' || c == 'S') f->f2 = 1;
9166 		    else if (f->f2 == 1 && (c == 'r' || c == 'R')) f->f2 = 2;
9167 		    else if (f->f2 == 2 && (c == 'c' || c == 'C')) f->f2 = 3;
9168 		    else if (f->f2 == 3 && c == '='){ GF_PUTC(f->next, c);  state = 2; }
9169 		    else if (f->f2 == 3 && !ASCII_ISSPACE(c)) f->f2 = 0;
9170 		    else f->f2 = 0;
9171 	    }
9172 	    else if (state == 2){	/* collect all data */
9173 	        if(ASCII_ISSPACE(c) || c == '>'){
9174 		   long n;
9175 		   int is_cid;
9176 		   if(f->n > 0){
9177 		      char *s = cid_tempfile_name(f->line, f->n, &is_cid);
9178 		      if(is_cid){
9179 		        RESET_FILTER(f);
9180 		        if(s != NULL)
9181 			  for(; *s != '\0'; s++)
9182 			    COLLECT(f, *s);
9183 		     }
9184 		   }
9185 		   GF_PUTC(f->next, '\"');
9186 		   if(is_cid || f->t){
9187 		      for(p = f->line; f->n; f->n--, p++){
9188 			 if(*p == '\"') continue;
9189 		         GF_PUTC(f->next, *p);
9190 		      }
9191 		   }
9192 		   else f->n = 0;
9193 		   GF_PUTC(f->next, '\"');
9194 		   /* no need to write "c" right now to the stream. It will be written below */
9195 		   state = ASCII_ISSPACE(c) ? 1 : 0;
9196 		   RESET_FILTER(f);
9197 		}
9198 		else COLLECT(f, c);	/* collect this data */
9199 	    }
9200 
9201 	    p = f->line;
9202 	    if(state < 2)
9203 	       GF_PUTC(f->next, c);
9204 	}
9205 
9206 	f->f1 = state;
9207 	GF_END(f, f->next);
9208     }
9209     else if(cmd == GF_EOD){
9210 	if(f->f1 == 2){
9211 	   char *s = cid_tempfile_name(f->line, f->n, &f->f2);
9212 	   GF_PUTC(f->next, '\"');
9213 	   if (f->f2 || f->t){
9214 	      for(p = s; *p; p++){
9215 		 if(*p == '\"') continue;
9216 		 GF_PUTC(f->next, *p);
9217 	      }
9218 	   }
9219 	   GF_PUTC(f->next, '\"');
9220 	   GF_PUTC(f->next, '>');
9221 	}
9222 
9223 	buflen = 0;
9224 	fs_give((void **)&(f->line));	/* free temp line buffer */
9225 	(void) GF_FLUSH(f->next);
9226 	(*f->next->f)(f->next, GF_EOD);
9227     }
9228     else if(cmd == GF_RESET){
9229 	dprint((9, "-- gf_reset cid2file\n"));
9230 	f->n = 0L;		/* number of bytes in buffer */
9231 	f->f1 = 0;		/* state */
9232 	f->f2 = 0;		/* total number of bytes read that match pattern */
9233 	f->t  = *(char *)f->opt;
9234     }
9235 }
9236 
9237 /* END OF HTML-TO-PLAIN text filter */
9238 
9239 /*
9240  * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9241  * from the text stream.
9242  */
9243 
9244 #define	MAX_ESC_LEN	5
9245 
9246 /*
9247  * the simple filter, removes unknown escape codes from the stream
9248  */
9249 void
gf_escape_filter(FILTER_S * f,int flg)9250 gf_escape_filter(FILTER_S *f, int flg)
9251 {
9252     register char *p;
9253     GF_INIT(f, f->next);
9254 
9255     if(flg == GF_DATA){
9256 	register unsigned char c;
9257 	register int state = f->f1;
9258 
9259 	while(GF_GETC(f, c)){
9260 
9261 	    if(state){
9262 		if(c == '\033' || f->n == MAX_ESC_LEN){
9263 		    f->line[f->n] = '\0';
9264 		    f->n = 0L;
9265 		    if(!match_escapes(f->line)){
9266 			GF_PUTC(f->next, '^');
9267 			GF_PUTC(f->next, '[');
9268 		    }
9269 		    else
9270 		      GF_PUTC(f->next, '\033');
9271 
9272 		    p = f->line;
9273 		    while(*p)
9274 		      GF_PUTC(f->next, *p++);
9275 
9276 		    if(c == '\033')
9277 		      continue;
9278 		    else
9279 		      state = 0;			/* fall thru */
9280 		}
9281 		else{
9282 		    f->line[f->n++] = c;		/* collect */
9283 		    continue;
9284 		}
9285 	    }
9286 
9287 	    if(c == '\033')
9288 	      state = 1;
9289 	    else
9290 	      GF_PUTC(f->next, c);
9291 	}
9292 
9293 	f->f1 = state;
9294 	GF_END(f, f->next);
9295     }
9296     else if(flg == GF_EOD){
9297 	if(f->f1){
9298 	    if(!match_escapes(f->line)){
9299 		GF_PUTC(f->next, '^');
9300 		GF_PUTC(f->next, '[');
9301 	    }
9302 	    else
9303 	      GF_PUTC(f->next, '\033');
9304 	}
9305 
9306 	for(p = f->line; f->n; f->n--, p++)
9307 	  GF_PUTC(f->next, *p);
9308 
9309 	fs_give((void **)&(f->line));	/* free temp line buffer */
9310 	(void) GF_FLUSH(f->next);
9311 	(*f->next->f)(f->next, GF_EOD);
9312     }
9313     else if(flg == GF_RESET){
9314 	dprint((9, "-- gf_reset escape\n"));
9315 	f->f1    = 0;
9316 	f->n     = 0L;
9317 	f->linep = f->line = (char *)fs_get((MAX_ESC_LEN + 1) * sizeof(char));
9318     }
9319 }
9320 
9321 
9322 
9323 /*
9324  * CONTROL CHARACTER FILTER - transmogrify control characters into their
9325  * corresponding string representations (you know, ^blah and such)...
9326  */
9327 
9328 /*
9329  * the simple filter transforms unknown control characters in the stream
9330  * into harmless strings.
9331  */
9332 void
gf_control_filter(FILTER_S * f,int flg)9333 gf_control_filter(FILTER_S *f, int flg)
9334 {
9335     GF_INIT(f, f->next);
9336 
9337     if(flg == GF_DATA){
9338 	register unsigned char c;
9339 	register int filt_only_c0;
9340 
9341 	filt_only_c0 = f->opt ? (*(int *) f->opt) : 0;
9342 
9343 	while(GF_GETC(f, c)){
9344 
9345 	    if(((c < 0x20 || c == 0x7f)
9346 		|| (c >= 0x80 && c < 0xA0 && !filt_only_c0))
9347 	       && !(ASCII_ISSPACE((unsigned char) c)
9348 		    || c == '\016' || c == '\017' || c == '\033')){
9349 		GF_PUTC(f->next, c >= 0x80 ? '~' : '^');
9350 		GF_PUTC(f->next, (c == 0x7f) ? '?' : (c & 0x1f) + '@');
9351 	    }
9352 	    else
9353 	      GF_PUTC(f->next, c);
9354 	}
9355 
9356 	GF_END(f, f->next);
9357     }
9358     else if(flg == GF_EOD){
9359 	(void) GF_FLUSH(f->next);
9360 	(*f->next->f)(f->next, GF_EOD);
9361     }
9362 }
9363 
9364 
9365 /*
9366  * function called from the outside to set
9367  * control filter's option, which says to filter C0 control characters
9368  * but not C1 control chars. We don't call it at all if we don't want
9369  * to filter C0 chars either.
9370  */
9371 void *
gf_control_filter_opt(int * filt_only_c0)9372 gf_control_filter_opt(int *filt_only_c0)
9373 {
9374     return((void *) filt_only_c0);
9375 }
9376 
9377 
9378 /*
9379  * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9380  * This prevents the possibility of embedding other tags.
9381  * We assume that this filter should only be used for something
9382  * that is eventually writing to a display, which has the special
9383  * knowledge of quoted TAG_EMBEDs.
9384  */
9385 void
gf_tag_filter(FILTER_S * f,int flg)9386 gf_tag_filter(FILTER_S *f, int flg)
9387 {
9388     GF_INIT(f, f->next);
9389 
9390     if(flg == GF_DATA){
9391 	register unsigned char c;
9392 
9393 	while(GF_GETC(f, c)){
9394 
9395 	    if((c & 0xff) == (TAG_EMBED & 0xff)){
9396 		GF_PUTC(f->next, TAG_EMBED);
9397 		GF_PUTC(f->next, c);
9398 	    }
9399 	    else
9400 	      GF_PUTC(f->next, c);
9401 	}
9402 
9403 	GF_END(f, f->next);
9404     }
9405     else if(flg == GF_EOD){
9406 	(void) GF_FLUSH(f->next);
9407 	(*f->next->f)(f->next, GF_EOD);
9408     }
9409 }
9410 
9411 
9412 /*
9413  * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9414  * specified line width
9415  */
9416 
9417 
9418 typedef struct wrap_col_s {
9419     unsigned	bold:1;
9420     unsigned	uline:1;
9421     unsigned	inverse:1;
9422     unsigned	tags:1;
9423     unsigned	do_indent:1;
9424     unsigned	on_comma:1;
9425     unsigned	flowed:1;
9426     unsigned	delsp:1;
9427     unsigned	quoted:1;
9428     unsigned	allwsp:1;
9429     unsigned	hard_nl:1;
9430     unsigned	leave_flowed:1;
9431     unsigned    use_color:1;
9432     unsigned    hdr_color:1;
9433     unsigned    for_compose:1;
9434     unsigned    handle_soft_hyphen:1;
9435     unsigned    saw_soft_hyphen:1;
9436     unsigned	trailing_space:1;
9437     unsigned char  utf8buf[7];
9438     unsigned char *utf8bufp;
9439     COLOR_PAIR *color;
9440     STORE_S    *spaces;
9441     short	embedded,
9442 		space_len;
9443     char       *lineendp;
9444     int		anchor,
9445 		prefbrk,
9446 		prefbrkn,
9447 		quote_depth,
9448 		quote_count,
9449 		sig,
9450 		state,
9451 		wrap_col,
9452 		wrap_max,
9453 		margin_l,
9454 		margin_r,
9455 		indent;
9456     char	special[256];
9457 } WRAP_S;
9458 
9459 #define	WRAP_MARG_L(F)	(((WRAP_S *)(F)->opt)->margin_l)
9460 #define	WRAP_MARG_R(F)	(((WRAP_S *)(F)->opt)->margin_r)
9461 #define	WRAP_COL(F)	(((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9462 #define	WRAP_MAX_COL(F)	(((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9463 #define	WRAP_INDENT(F)	(((WRAP_S *)(F)->opt)->indent)
9464 #define	WRAP_DO_IND(F)	(((WRAP_S *)(F)->opt)->do_indent)
9465 #define	WRAP_COMMA(F)	(((WRAP_S *)(F)->opt)->on_comma)
9466 #define	WRAP_FLOW(F)	(((WRAP_S *)(F)->opt)->flowed)
9467 #define	WRAP_DELSP(F)	(((WRAP_S *)(F)->opt)->delsp)
9468 #define	WRAP_FL_QD(F)	(((WRAP_S *)(F)->opt)->quote_depth)
9469 #define	WRAP_FL_QC(F)	(((WRAP_S *)(F)->opt)->quote_count)
9470 #define	WRAP_FL_SIG(F)	(((WRAP_S *)(F)->opt)->sig)
9471 #define	WRAP_HARD(F)	(((WRAP_S *)(F)->opt)->hard_nl)
9472 #define	WRAP_LV_FLD(F)	(((WRAP_S *)(F)->opt)->leave_flowed)
9473 #define	WRAP_USE_CLR(F)	(((WRAP_S *)(F)->opt)->use_color)
9474 #define	WRAP_HDR_CLR(F)	(((WRAP_S *)(F)->opt)->hdr_color)
9475 #define	WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9476 #define	WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9477 #define	WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9478 #define	WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9479 #define	WRAP_UTF8BUFP(F)   (((WRAP_S *)(F)->opt)->utf8bufp)
9480 #define	WRAP_STATE(F)	(((WRAP_S *)(F)->opt)->state)
9481 #define	WRAP_QUOTED(F)	(((WRAP_S *)(F)->opt)->quoted)
9482 #define	WRAP_TAGS(F)	(((WRAP_S *)(F)->opt)->tags)
9483 #define	WRAP_BOLD(F)	(((WRAP_S *)(F)->opt)->bold)
9484 #define	WRAP_ULINE(F)	(((WRAP_S *)(F)->opt)->uline)
9485 #define	WRAP_INVERSE(F)	(((WRAP_S *)(F)->opt)->inverse)
9486 #define	WRAP_LASTC(F)	(((WRAP_S *)(F)->opt)->lineendp)
9487 #define	WRAP_EMBED(F)	(((WRAP_S *)(F)->opt)->embedded)
9488 #define	WRAP_ANCHOR(F)	(((WRAP_S *)(F)->opt)->anchor)
9489 #define	WRAP_PB_OFF(F)	(((WRAP_S *)(F)->opt)->prefbrk)
9490 #define	WRAP_PB_LEN(F)	(((WRAP_S *)(F)->opt)->prefbrkn)
9491 #define	WRAP_ALLWSP(F)	(((WRAP_S *)(F)->opt)->allwsp)
9492 #define	WRAP_SPC_LEN(F)	(((WRAP_S *)(F)->opt)->space_len)
9493 #define	WRAP_TRL_SPC(F)	(((WRAP_S *)(F)->opt)->trailing_space)
9494 #define	WRAP_SPEC(F, C)	((WRAP_S *) (F)->opt)->special[C]
9495 #define	WRAP_COLOR(F)	(((WRAP_S *)(F)->opt)->color)
9496 #define	WRAP_COLOR_SET(F)  ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9497 #define	WRAP_SPACES(F)	(((WRAP_S *)(F)->opt)->spaces)
9498 #define	WRAP_PUTC(F,C,W) {						\
9499 			    if((F)->linep == WRAP_LASTC(F)){		\
9500 				size_t offset = (F)->linep - (F)->line;	\
9501 				fs_resize((void **) &(F)->line,		\
9502 					  (2 * offset) * sizeof(char)); \
9503 				(F)->linep = &(F)->line[offset];	\
9504 				WRAP_LASTC(F) = &(F)->line[2*offset-1];	\
9505 			    }						\
9506 			    *(F)->linep++ = (C);			\
9507 			    (F)->f2 += (W);				\
9508 			}
9509 
9510 #define	WRAP_EMBED_PUTC(F,C) {						\
9511 			    if((F)->f2){				\
9512 			        WRAP_PUTC((F), C, 0);			\
9513 			    }						\
9514 			    else					\
9515 			      so_writec(C, WRAP_SPACES(F));		\
9516 }
9517 
9518 #define	WRAP_COLOR_UNSET(F)	{					\
9519 			    if(WRAP_COLOR_SET(F)){			\
9520 			      WRAP_COLOR(F)->fg[0] = '\0';		\
9521 			    }						\
9522 			}
9523 
9524 /*
9525  * wrap_flush_embed flags
9526  */
9527 #define	WFE_NONE	0		/* Nothing special */
9528 #define	WFE_CNT_HANDLE	1		/* account for/don't write handles */
9529 
9530 
9531 int	wrap_flush(FILTER_S *, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9532 int	wrap_flush_embed(FILTER_S *, unsigned char **, unsigned char **,
9533 			 unsigned char **, unsigned char **);
9534 int	wrap_flush_s(FILTER_S *,char *, int, int, unsigned char **, unsigned char **,
9535 		     unsigned char **, unsigned char **, int);
9536 int	wrap_eol(FILTER_S *, int, unsigned char **, unsigned char **,
9537 		 unsigned char **, unsigned char **);
9538 int	wrap_bol(FILTER_S *, int, int, unsigned char **,
9539 		 unsigned char **, unsigned char **, unsigned char **);
9540 int	wrap_quote_insert(FILTER_S *, unsigned char **, unsigned char **,
9541 			  unsigned char **, unsigned char **);
9542 
9543 /*
9544  * the no longer simple filter, breaks lines at end of white space nearest
9545  * to global "gf_wrap_width" in length
9546  * It also supports margins, indents (inverse indenting, really) and
9547  * flowed text (ala RFC 3676)
9548  *
9549  */
9550 void
gf_wrap(FILTER_S * f,int flg)9551 gf_wrap(FILTER_S *f, int flg)
9552 {
9553     register long i;
9554     GF_INIT(f, f->next);
9555 
9556     /*
9557      * f->f1    state
9558      * f->line  buffer where next "word" being considered is stored
9559      * f->f2    width in screen cells of f->line stuff
9560      * f->n     width in screen cells of the part of this line committed to next
9561      *            filter so far
9562      */
9563 
9564     if(flg == GF_DATA){
9565 	register unsigned char c;
9566 	register int state = f->f1;
9567 	int width, full_character;
9568 
9569 	while(GF_GETC(f, c)){
9570 
9571 	    switch(state){
9572 	      case CCR :				/* CRLF or CR in text ? */
9573 		state = BOL;				/* either way, handle start */
9574 
9575 		if(WRAP_FLOW(f)){
9576 		    /* wrapped line? */
9577 		    if(f->f2 == 0 && WRAP_SPC_LEN(f) && WRAP_TRL_SPC(f)){
9578 			/*
9579 			 * whack trailing space char, but be aware
9580 			 * of embeds in space buffer.  grok them just
9581 			 * in case they contain a 0x20 value
9582 			 */
9583 			if(WRAP_DELSP(f)){
9584 			    char *sb, *sbp, *scp = NULL;
9585 			    int   x;
9586 
9587 			    for(sb = sbp = (char *)so_text(WRAP_SPACES(f)); *sbp; sbp++){
9588 				switch(*sbp){
9589 				  case ' ' :
9590 				    scp = sbp;
9591 				    break;
9592 
9593 				  case TAG_EMBED :
9594 				    sbp++;
9595 				    switch (*sbp++){
9596 				      case TAG_HANDLE :
9597 					x = (int) *sbp++;
9598 					if(strlen(sbp) >= x)
9599 					  sbp += (x - 1);
9600 
9601 					break;
9602 
9603 				      case TAG_FGCOLOR :
9604 				      case TAG_BGCOLOR :
9605 					if(strlen(sbp) >= RGBLEN)
9606 					  sbp += (RGBLEN - 1);
9607 
9608 					break;
9609 
9610 				      default :
9611 					break;
9612 				    }
9613 
9614 				    break;
9615 
9616 				  default :
9617 				    break;
9618 				}
9619 			    }
9620 
9621 			    /* replace space buf without trailing space char */
9622 			    if(scp){
9623 				STORE_S *ns = so_get(CharStar, NULL, EDIT_ACCESS);
9624 
9625 				*scp++ = '\0';
9626 				WRAP_SPC_LEN(f)--;
9627 				WRAP_TRL_SPC(f) = 0;
9628 
9629 				so_puts(ns, sb);
9630 				so_puts(ns, scp);
9631 
9632 				so_give(&WRAP_SPACES(f));
9633 				WRAP_SPACES(f) = ns;
9634 			    }
9635 			}
9636 		    }
9637 		    else{				/* fixed line */
9638 			WRAP_HARD(f) = 1;
9639 			wrap_flush(f, &ip, &eib, &op, &eob);
9640 			wrap_eol(f, 0, &ip, &eib, &op, &eob);
9641 
9642 			/*
9643 			 * When we get to a real end of line, we don't need to
9644 			 * remember what the special color was anymore because
9645 			 * we aren't going to be changing back to it. We unset it
9646 			 * so that we don't keep resetting the color to normal.
9647 			 */
9648 			WRAP_COLOR_UNSET(f);
9649 		    }
9650 
9651 		    if(c == '\012'){			/* get c following LF */
9652 		      break;
9653 		    }
9654 		    /* else c is first char of new line, fall thru */
9655 		}
9656 		else{
9657 		    wrap_flush(f, &ip, &eib, &op, &eob);
9658 		    wrap_eol(f, 0, &ip, &eib, &op, &eob);
9659 		    WRAP_COLOR_UNSET(f);		/* see note above */
9660 		    if(c == '\012'){
9661 			break;
9662 		    }
9663 		    /* else fall thru to deal with beginning of line */
9664 		}
9665 
9666 	      case BOL :
9667 		if(WRAP_FLOW(f)){
9668 		    if(c == '>'){
9669 			WRAP_FL_QC(f) = 1;		/* init it */
9670 			state = FL_QLEV;		/* go collect it */
9671 		    }
9672 		    else {
9673 			/* if EMBEDed, process it and return here */
9674 			if(c == (unsigned char) TAG_EMBED){
9675 			    WRAP_EMBED_PUTC(f, TAG_EMBED);
9676 			    WRAP_STATE(f) = state;
9677 			    state = TAG;
9678 			    continue;
9679 			}
9680 
9681 			/* quote level change implies new paragraph */
9682 			if(WRAP_FL_QD(f)){
9683 			    WRAP_FL_QD(f) = 0;
9684 			    if(WRAP_HARD(f) == 0){
9685 				WRAP_HARD(f) = 1;
9686 				wrap_flush(f, &ip, &eib, &op, &eob);
9687 				wrap_eol(f, 0, &ip, &eib, &op, &eob);
9688 				WRAP_COLOR_UNSET(f);	/* see note above */
9689 			    }
9690 			}
9691 
9692 			if(WRAP_HARD(f)){
9693 			    wrap_bol(f, 0, 1, &ip, &eib, &op,
9694 				     &eob);   /* write quoting prefix */
9695 			    WRAP_HARD(f) = 0;
9696 			}
9697 
9698 			switch (c) {
9699 			  case '\015' :			/* a blank line? */
9700 			    wrap_flush(f, &ip, &eib, &op, &eob);
9701 			    state = CCR;		/* go collect it */
9702 			    break;
9703 
9704 			  case ' ' :			/* space stuffed */
9705 			    state = FL_STF;		/* just eat it */
9706 			    break;
9707 
9708 			  case '-' :			/* possible sig-dash */
9709 			    WRAP_FL_SIG(f) = 1;	        /* init state */
9710 			    state = FL_SIG;		/* go collect it */
9711 			    break;
9712 
9713 			  default :
9714 			    state = DFL;		/* go back to normal */
9715 			    goto case_dfl;		/* handle c like DFL case */
9716 			}
9717 		    }
9718 		}
9719 		else{
9720 		    state = DFL;
9721 		    if(WRAP_COMMA(f) && c == TAB){
9722 			wrap_bol(f, 1, 0, &ip, &eib, &op,
9723 				 &eob);    /* convert to normal indent */
9724 			break;
9725 		    }
9726 
9727 		    wrap_bol(f,0,0, &ip, &eib, &op, &eob);
9728 		    goto case_dfl;			/* handle c like DFL case */
9729 		}
9730 
9731 		break;
9732 
9733 	      case  FL_QLEV :
9734 		if(c == '>'){				/* another level */
9735 		    WRAP_FL_QC(f)++;
9736 		}
9737 		else {
9738 		    /* if EMBEDed, process it and return here */
9739 		    if(c == (unsigned char) TAG_EMBED){
9740 			WRAP_EMBED_PUTC(f, TAG_EMBED);
9741 			WRAP_STATE(f) = state;
9742 			state = TAG;
9743 			continue;
9744 		    }
9745 
9746 		    /* quote level change signals new paragraph */
9747 		    if(WRAP_FL_QC(f) != WRAP_FL_QD(f)){
9748 			WRAP_FL_QD(f) = WRAP_FL_QC(f);
9749 			if(WRAP_HARD(f) == 0){		/* add hard newline */
9750 			    WRAP_HARD(f) = 1;		/* hard newline */
9751 			    wrap_flush(f, &ip, &eib, &op, &eob);
9752 			    wrap_eol(f, 0, &ip, &eib, &op, &eob);
9753 			    WRAP_COLOR_UNSET(f);	/* see note above */
9754 			}
9755 		    }
9756 
9757 		    if(WRAP_HARD(f)){
9758 			wrap_bol(f,0,1, &ip, &eib, &op, &eob);
9759 			WRAP_HARD(f) = 0;
9760 		    }
9761 
9762 		    switch (c) {
9763 		      case '\015' :			/* a blank line? */
9764 			wrap_flush(f, &ip, &eib, &op, &eob);
9765 			state = CCR;			/* go collect it */
9766 			break;
9767 
9768 		      case ' ' :			/* space-stuffed! */
9769 			state = FL_STF;			/* just eat it */
9770 			break;
9771 
9772 		      case '-' :			/* sig dash? */
9773 			WRAP_FL_SIG(f) = 1;
9774 			state = FL_SIG;
9775 			break;
9776 
9777 		      default :				/* something else */
9778 			state = DFL;
9779 			goto case_dfl;			/* handle c like DFL */
9780 		    }
9781 		}
9782 
9783 		break;
9784 
9785 	      case FL_STF :				/* space stuffed */
9786 		switch (c) {
9787 		  case '\015' :				/* a blank line? */
9788 		    wrap_flush(f, &ip, &eib, &op, &eob);
9789 		    state = CCR;			/* go collect it */
9790 		    break;
9791 
9792 		  case (unsigned char) TAG_EMBED :	/* process TAG data */
9793 		    WRAP_EMBED_PUTC(f, TAG_EMBED);
9794 		    WRAP_STATE(f) = state;		/* and return */
9795 		    state = TAG;
9796 		    continue;
9797 
9798 		  case '-' :				/* sig dash? */
9799 		    WRAP_FL_SIG(f) = 1;
9800 		    WRAP_ALLWSP(f) = 0;
9801 		    state = FL_SIG;
9802 		    break;
9803 
9804 		  default :				/* something else */
9805 		    state = DFL;
9806 		    goto case_dfl;			/* handle c like DFL */
9807 		}
9808 
9809 		break;
9810 
9811 	      case FL_SIG :				/* sig-dash collector */
9812 		switch (WRAP_FL_SIG(f)){		/* possible sig-dash? */
9813 		  case 1 :
9814 		    if(c != '-'){			/* not a sigdash */
9815 			if((f->n + WRAP_SPC_LEN(f) + 1) > WRAP_COL(f)){
9816 			    wrap_flush_embed(f, &ip, &eib, &op,
9817 					     &eob);      /* note any embedded*/
9818 			    wrap_eol(f, 1, &ip, &eib,
9819 				     &op, &eob);       /* plunk down newline */
9820 			    wrap_bol(f, 1, 1, &ip, &eib,
9821 				     &op, &eob);         /* write any prefix */
9822 			}
9823 
9824 			WRAP_PUTC(f,'-', 1);		/* write what we got */
9825 
9826 			WRAP_FL_SIG(f) = 0;
9827 			state = DFL;
9828 			goto case_dfl;
9829 		    }
9830 
9831 		    /* don't put anything yet until we know to wrap or not */
9832 		    WRAP_FL_SIG(f) = 2;
9833 		    break;
9834 
9835 		  case 2 :
9836 		    if(c != ' '){			    /* not a sigdash */
9837 			WRAP_PUTC(f, '-', 1);
9838 			if((f->n + WRAP_SPC_LEN(f) + 2) > WRAP_COL(f)){
9839 			    wrap_flush_embed(f, &ip, &eib, &op,
9840 					     &eob);      /* note any embedded*/
9841 			    wrap_eol(f, 1, &ip, &eib,
9842 				     &op, &eob);       /* plunk down newline */
9843 			    wrap_bol(f, 1, 1, &ip, &eib, &op,
9844 				     &eob);   	         /* write any prefix */
9845 			}
9846 
9847 			WRAP_PUTC(f,'-', 1);		/* write what we got */
9848 
9849 			WRAP_FL_SIG(f) = 0;
9850 			state = DFL;
9851 			goto case_dfl;
9852 		    }
9853 
9854 		    /* don't put anything yet until we know to wrap or not */
9855 		    WRAP_FL_SIG(f) = 3;
9856 		    break;
9857 
9858 		  case 3 :
9859 		    if(c == '\015'){			/* success! */
9860 			/* known sigdash, newline if soft nl */
9861 			if(WRAP_SPC_LEN(f)){
9862 			    wrap_flush(f, &ip, &eib, &op, &eob);
9863 			    wrap_eol(f, 0, &ip, &eib, &op, &eob);
9864 			    wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9865 			}
9866 			WRAP_PUTC(f,'-',1);
9867 			WRAP_PUTC(f,'-',1);
9868 			WRAP_PUTC(f,' ',1);
9869 
9870 			state = CCR;
9871 			break;
9872 		    }
9873 		    else{
9874 			WRAP_FL_SIG(f) = 4;		/* possible success */
9875 		    }
9876 
9877 		  case 4 :
9878 		    switch(c){
9879 		      case (unsigned char) TAG_EMBED :
9880 			/*
9881 			 * At this point we're almost 100% sure that we've got
9882 			 * a sigdash.  Putc it (adding newline if previous
9883 			 * was a soft nl) so we get it the right color
9884 			 * before we store this new embedded stuff
9885 			 */
9886 			if(WRAP_SPC_LEN(f)){
9887 			    wrap_flush(f, &ip, &eib, &op, &eob);
9888 			    wrap_eol(f, 0, &ip, &eib, &op, &eob);
9889 			    wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9890 			}
9891 			WRAP_PUTC(f,'-',1);
9892 			WRAP_PUTC(f,'-',1);
9893 			WRAP_PUTC(f,' ',1);
9894 
9895 			WRAP_FL_SIG(f) = 5;
9896 			break;
9897 
9898 		      case '\015' :			/* success! */
9899 			/*
9900 			 * We shouldn't get here, but in case we do, we have
9901 			 * not yet put the sigdash
9902 			 */
9903 			if(WRAP_SPC_LEN(f)){
9904 			    wrap_flush(f, &ip, &eib, &op, &eob);
9905 			    wrap_eol(f, 0, &ip, &eib, &op, &eob);
9906 			    wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9907 			}
9908 			WRAP_PUTC(f,'-',1);
9909 			WRAP_PUTC(f,'-',1);
9910 			WRAP_PUTC(f,' ',1);
9911 
9912 			state = CCR;
9913 			break;
9914 
9915 		      default :				/* that's no sigdash! */
9916 			/* write what we got but didn't put yet */
9917 			WRAP_PUTC(f,'-', 1);
9918 			WRAP_PUTC(f,'-', 1);
9919 			WRAP_PUTC(f,' ', 1);
9920 
9921 			WRAP_FL_SIG(f) = 0;
9922 			wrap_flush(f, &ip, &eib, &op, &eob);
9923 			WRAP_SPC_LEN(f) = 1;
9924 			state = DFL;			/* set normal state */
9925 			goto case_dfl;			/* and go do "c" */
9926 		    }
9927 
9928 		    break;
9929 
9930 		  case 5 :
9931 		    WRAP_STATE(f) = FL_SIG;		/* come back here */
9932 		    WRAP_FL_SIG(f) = 6;			/* and seek EOL */
9933 		    WRAP_EMBED_PUTC(f, TAG_EMBED);
9934 		    state = TAG;			/* process embed */
9935 		    goto case_tag;
9936 
9937 		  case 6 :
9938 		    /*
9939 		     * at this point we've already putc the sigdash in case 4
9940 		     */
9941 		    switch(c){
9942 		      case (unsigned char) TAG_EMBED :
9943 			WRAP_FL_SIG(f) = 5;
9944 			break;
9945 
9946 		      case '\015' :			/* success! */
9947 			state = CCR;
9948 			break;
9949 
9950 		      default :				/* that's no sigdash! */
9951 			/*
9952 			 * probably never reached (fake sigdash with embedded
9953 			 * stuff) but if this did get reached, then we
9954 			 * might have accidentally disobeyed a soft nl
9955 			 */
9956 			WRAP_FL_SIG(f) = 0;
9957 			wrap_flush(f, &ip, &eib, &op, &eob);
9958 			WRAP_SPC_LEN(f) = 1;
9959 			state = DFL;			/* set normal state */
9960 			goto case_dfl;			/* and go do "c" */
9961 		    }
9962 
9963 		    break;
9964 
9965 
9966 		  default :
9967 		    dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9968 			       WRAP_FL_SIG(f)));
9969 		    WRAP_FL_SIG(f) = 0;
9970 		    state = DFL;			/* set normal state */
9971 		    goto case_dfl;			/* and go process "c" */
9972 		}
9973 
9974 		break;
9975 
9976 	      case_dfl :
9977 	      case DFL :
9978     /*
9979      * This was just if(WRAP_SPEC(f, c)) before the change to add
9980      * the == 0 test. This isn't quite right, either. We should really
9981      * be looking for special characters in the UCS characters, not
9982      * in the incoming stream of UTF-8. It is not right to
9983      * call this on bytes that are in the middle of a UTF-8 character,
9984      * hence the == 0 test which restricts it to the first byte
9985      * of a character. This isn't right, either, but it's closer.
9986      * Also change the definition of WRAP_SPEC so that isspace only
9987      * matches ascii characters, which will never be in the middle
9988      * of a UTF-8 multi-byte character.
9989      */
9990 		if((WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 0 && WRAP_SPEC(f, c)){
9991 		    WRAP_SAW_SOFT_HYPHEN(f) = 0;
9992 		    switch(c){
9993 		      default :
9994 			if(WRAP_QUOTED(f))
9995 			  break;
9996 
9997 			if(f->f2){			/* any non-lwsp to flush? */
9998 			    if(WRAP_COMMA(f)){
9999 				/* remember our second best break point */
10000 				WRAP_PB_OFF(f) = f->linep - f->line;
10001 				WRAP_PB_LEN(f) = f->f2;
10002 				break;
10003 			    }
10004 			    else
10005 			      wrap_flush(f, &ip, &eib, &op, &eob);
10006 			}
10007 
10008 			switch(c){			/* remember separator */
10009 			  case ' ' :
10010 			    WRAP_SPC_LEN(f)++;
10011 			    WRAP_TRL_SPC(f) = 1;
10012 			    so_writec(' ',WRAP_SPACES(f));
10013 			    break;
10014 
10015 			  case TAB :
10016 			  {
10017 			      int i = (int) f->n + WRAP_SPC_LEN(f);
10018 
10019 			      do
10020 				WRAP_SPC_LEN(f)++;
10021 			      while(++i & 0x07);
10022 
10023 			      so_writec(TAB,WRAP_SPACES(f));
10024 			      WRAP_TRL_SPC(f) = 0;
10025 			  }
10026 
10027 			  break;
10028 
10029 			  default :			/* some control char? */
10030 			    WRAP_SPC_LEN(f) += 2;
10031 			    WRAP_TRL_SPC(f) = 0;
10032 			    break;
10033 			}
10034 
10035 			continue;
10036 
10037 		      case '\"' :
10038 			WRAP_QUOTED(f) = !WRAP_QUOTED(f);
10039 			break;
10040 
10041 		      case '\015' :			/* already has newline? */
10042 			state = CCR;
10043 			continue;
10044 
10045 		      case '\012' :			 /* bare LF in text? */
10046 			wrap_flush(f, &ip, &eib, &op, &eob); /* they must've */
10047 			wrap_eol(f, 0, &ip, &eib, &op, &eob);       /* meant */
10048 			wrap_bol(f,1,1, &ip, &eib, &op, &eob); /* newline... */
10049 			continue;
10050 
10051 		      case (unsigned char) TAG_EMBED :
10052 			WRAP_EMBED_PUTC(f, TAG_EMBED);
10053 			WRAP_STATE(f) = state;
10054 			state = TAG;
10055 			continue;
10056 
10057 		      case ',' :
10058 			if(!WRAP_QUOTED(f)){
10059 			    /* handle this special case in general code below */
10060 			    if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_MAX_COL(f)
10061 			       && WRAP_ALLWSP(f) && WRAP_PB_OFF(f))
10062 			      break;
10063 
10064 			    if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_COL(f)){
10065 				if(WRAP_ALLWSP(f))    /* if anything visible */
10066 				  wrap_flush(f, &ip, &eib, &op,
10067 					     &eob);  /* ... blat buf'd chars */
10068 
10069 				wrap_eol(f, 1, &ip, &eib, &op,
10070 					 &eob);  /* plunk down newline */
10071 				wrap_bol(f, 1, 1, &ip, &eib, &op,
10072 					 &eob);    /* write any prefix */
10073 			    }
10074 
10075 			    WRAP_PUTC(f, ',', 1);	/* put out comma */
10076 			    wrap_flush(f, &ip, &eib, &op,
10077 				       &eob);       /* write buf'd chars */
10078 			    continue;
10079 			}
10080 
10081 			break;
10082 		    }
10083 		}
10084 		else if(WRAP_HANDLE_SOFT_HYPHEN(f)
10085 			&& (WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 1
10086 			&& WRAP_UTF8BUF(f, 0) == 0xC2 && c == 0xAD){
10087 		    /*
10088 		     * This is a soft hyphen. If there is enough space for
10089 		     * a real hyphen to fit on the line here then we can
10090 		     * flush everything up to before the soft hyphen,
10091 		     * and simply remember that we saw a soft hyphen.
10092 		     * If it turns out that we can't fit the next piece in
10093 		     * then wrap_eol will append a real hyphen to the line.
10094 		     * If we can fit another piece in it will be because we've
10095 		     * reached the next break point. At that point we'll flush
10096 		     * everything but won't include the unneeded hyphen. We erase
10097 		     * the fact that we saw this soft hyphen because it have
10098 		     * become irrelevant.
10099 		     *
10100 		     * If the hyphen is the character that puts us over the edge
10101 		     * we go through the else case.
10102 		     */
10103 
10104 		    /* erase this soft hyphen character from buffer */
10105 		    WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10106 
10107 		    if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
10108 			if(f->f2)			/* any non-lwsp to flush? */
10109 			  wrap_flush(f, &ip, &eib, &op, &eob);
10110 
10111 			/* remember that we saw the soft hyphen */
10112 			WRAP_SAW_SOFT_HYPHEN(f) = 1;
10113 		    }
10114 		    else{
10115 			/*
10116 			 * Everything up to the hyphen fits, otherwise it
10117 			 * would have already been flushed the last time
10118 			 * through the loop. But the hyphen won't fit. So
10119 			 * we need to go back to the last line break and
10120 			 * break there instead. Then start a new line with
10121 			 * the buffered up characters and the soft hyphen.
10122 			 */
10123 			wrap_flush_embed(f, &ip, &eib, &op, &eob);
10124 			wrap_eol(f, 1, &ip, &eib, &op,
10125 				 &eob);	    /* plunk down newline */
10126 			wrap_bol(f,1,1, &ip, &eib, &op,
10127 				 &eob);	      /* write any prefix */
10128 
10129 			/*
10130 			 * Now we're in the same situation as we would have
10131 			 * been above except we're on a new line. Try to
10132 			 * flush out the characters seen up to the hyphen.
10133 			 */
10134 			if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
10135 			    if(f->f2)			/* any non-lwsp to flush? */
10136 			      wrap_flush(f, &ip, &eib, &op, &eob);
10137 
10138 			    /* remember that we saw the soft hyphen */
10139 			    WRAP_SAW_SOFT_HYPHEN(f) = 1;
10140 			}
10141 			else
10142 			  WRAP_SAW_SOFT_HYPHEN(f) = 0;
10143 		    }
10144 
10145 		    continue;
10146 		}
10147 
10148 		full_character = 0;
10149 
10150 		{
10151 		    unsigned char *inputp;
10152 		    unsigned long remaining_octets;
10153 		    UCS ucs;
10154 
10155 		    if(WRAP_UTF8BUFP(f) < &WRAP_UTF8BUF(f, 0) + 6){	/* always true */
10156 
10157 			*WRAP_UTF8BUFP(f)++ = c;
10158 			remaining_octets = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10159 			if(remaining_octets == 1 && isascii(WRAP_UTF8BUF(f, 0))){
10160 			    full_character++;
10161 			    if(c == TAB){
10162 				int i = (int) f->n;
10163 
10164 				while(i & 0x07)
10165 				  i++;
10166 
10167 				width = i - f->n;
10168 			    }
10169 			    else if(c < 0x80 && iscntrl((unsigned char) c))
10170 			      width = 2;
10171 			    else
10172 			      width = 1;
10173 			}
10174 			else{
10175 			    inputp = &WRAP_UTF8BUF(f, 0);
10176 			    ucs = (UCS) utf8_get(&inputp, &remaining_octets);
10177 			    switch(ucs){
10178 			      case U8G_ENDSTRG:	/* incomplete character, wait */
10179 			      case U8G_ENDSTRI:	/* incomplete character, wait */
10180 				width = 0;
10181 				break;
10182 
10183 			      default:
10184 			        if(ucs & U8G_ERROR || ucs == UBOGON){
10185 				    /*
10186 				     * None of these cases is supposed to happen. If it
10187 				     * does happen then the input stream isn't UTF-8
10188 				     * so something is wrong. Writechar will treat
10189 				     * each octet in the input buffer as a separate
10190 				     * error character and print a '?' for each,
10191 				     * so the width will be the number of octets.
10192 				     */
10193 				    width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10194 				    full_character++;
10195 				}
10196 				else{
10197 				    /* got a character */
10198 				    width = wcellwidth(ucs);
10199 				    full_character++;
10200 
10201 				    if(width < 0){
10202 					/*
10203 					 * This happens when we have a UTF-8 character that
10204 					 * we aren't able to print in our locale. For example,
10205 					 * if the locale is setup with the terminal
10206 					 * expecting ISO-8859-1 characters then there are
10207 					 * lots of UTF-8 characters that can't be printed.
10208 					 * Print a '?' instead.
10209 					 */
10210 					width = 1;
10211 				    }
10212 				}
10213 
10214 				break;
10215 			    }
10216 			}
10217 		    }
10218 		    else{
10219 			/*
10220 			 * This cannot happen because an error would have
10221 			 * happened at least by character #6. So if we get
10222 			 * here there is a bug in utf8_get().
10223 			 */
10224 			if(WRAP_UTF8BUFP(f) == &WRAP_UTF8BUF(f, 0) + 6){
10225 			    *WRAP_UTF8BUFP(f)++ = c;
10226 			}
10227 
10228 			/*
10229 			 * We could possibly do some more sophisticated
10230 			 * resynchronization here, but we aren't doing
10231 			 * anything in Writechar so it wouldn't match up
10232 			 * with that anyway. Just figure each character will
10233 			 * end up being printed as a ? character.
10234 			 */
10235 			width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10236 			full_character++;
10237 		    }
10238 		}
10239 
10240 		if(WRAP_ALLWSP(f)){
10241 		    /*
10242 		     * Nothing is visible yet but the first word may be too long
10243 		     * all by itself. We need to break early.
10244 		     */
10245 		    if(f->n + WRAP_SPC_LEN(f) + f->f2 + width > WRAP_MAX_COL(f)){
10246 			/*
10247 			 * A little reaching behind the curtain here.
10248 			 * if there's at least a preferable break point, use
10249 			 * it and stuff what's left back into the wrap buffer.
10250 			 * The "nwsp" latch is used to skip leading whitespace
10251 			 * The second half of the test prevents us from wrapping
10252 			 * at the preferred break point in the case that it
10253 			 * is so early in the line that it doesn't help.
10254 			 * That is, the width of the indent is even more than
10255 			 * the width of the first part before the preferred
10256 			 * break point. An example would be breaking after
10257 			 * "To:" when the indent is 4 which is > 3.
10258 			 */
10259 			if(WRAP_PB_OFF(f) && WRAP_PB_LEN(f) >= WRAP_INDENT(f)){
10260 			    char *p1 = f->line + WRAP_PB_OFF(f);
10261 			    char *p2 = f->linep;
10262 			    char  c2;
10263 			    int   nwsp = 0, left_after_wrap;
10264 
10265 			    left_after_wrap = f->f2 - WRAP_PB_LEN(f);
10266 
10267 			    f->f2 = WRAP_PB_LEN(f);
10268 			    f->linep = p1;
10269 
10270 			    wrap_flush(f, &ip, &eib, &op, &eob); /* flush shortened buf */
10271 
10272 			    /* put back rest of characters */
10273 			    while(p1 < p2){
10274 				c2 = *p1++;
10275 				if(!(c2 == ' ' || c2 == '\t') || nwsp){
10276 				    WRAP_PUTC(f, c2, 0);
10277 				    nwsp = 1;
10278 				}
10279 				else
10280 				  left_after_wrap--;	/* wrong if a tab! */
10281 			    }
10282 
10283 			    f->f2 = MAX(left_after_wrap, 0);
10284 
10285 			    wrap_eol(f, 1, &ip, &eib, &op,
10286 				     &eob);     /* plunk down newline */
10287 			    wrap_bol(f,1,1, &ip, &eib, &op,
10288 				     &eob);      /* write any prefix */
10289 
10290 			    /*
10291 			     * What's this for?
10292 			     * If we do the less preferable break point at
10293 			     * the space we don't want to lose the fact that
10294 			     * we might be able to break at this comma for
10295 			     * the next one.
10296 			     */
10297 			    if(full_character && c == ','){
10298 				WRAP_PUTC(f, c, 1);
10299 				wrap_flush(f, &ip, &eib, &op, &eob);
10300 				WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10301 			    }
10302 			}
10303 			else{
10304 			    wrap_flush(f, &ip, &eib, &op, &eob);
10305 
10306 			    wrap_eol(f, 1, &ip, &eib, &op,
10307 				     &eob);     /* plunk down newline */
10308 			    wrap_bol(f,1,1, &ip, &eib, &op,
10309 				     &eob);      /* write any prefix */
10310 			}
10311 		    }
10312 		}
10313 		else if((f->n + WRAP_SPC_LEN(f) + f->f2 + width) > WRAP_COL(f)){
10314 		    wrap_flush_embed(f, &ip, &eib, &op, &eob);
10315 		    wrap_eol(f, 1, &ip, &eib, &op,
10316 			     &eob);	    /* plunk down newline */
10317 		    wrap_bol(f,1,1, &ip, &eib, &op,
10318 			     &eob);	      /* write any prefix */
10319 		}
10320 
10321 		/*
10322 		 * Commit entire multibyte UTF-8 character at once
10323 		 * instead of writing partial characters into the
10324 		 * buffer.
10325 		 */
10326 		if(full_character){
10327 		    unsigned char *q;
10328 
10329 		    for(q = &WRAP_UTF8BUF(f, 0); q < WRAP_UTF8BUFP(f); q++){
10330 			WRAP_PUTC(f, *q, width);
10331 			width = 0;
10332 		    }
10333 
10334 		    WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10335 		}
10336 
10337 		break;
10338 
10339 	      case_tag :
10340 	      case TAG :
10341 		WRAP_EMBED_PUTC(f, c);
10342 		switch(c){
10343 		  case TAG_HANDLE :
10344 		    WRAP_EMBED(f) = -1;
10345 		    state = HANDLE;
10346 		    break;
10347 
10348 		  case TAG_FGCOLOR :
10349 		  case TAG_BGCOLOR :
10350 		    WRAP_EMBED(f) = RGBLEN;
10351 		    state = HDATA;
10352 		    break;
10353 
10354 		  default :
10355 		    state = WRAP_STATE(f);
10356 		    break;
10357 		}
10358 
10359 		break;
10360 
10361 	      case HANDLE :
10362 		WRAP_EMBED_PUTC(f, c);
10363 		WRAP_EMBED(f) = c;
10364 		state = HDATA;
10365 		break;
10366 
10367 	      case HDATA :
10368 		if(f->f2){
10369 		  WRAP_PUTC(f, c, 0);
10370 		}
10371 		else
10372 		  so_writec(c, WRAP_SPACES(f));
10373 
10374 		if(!(WRAP_EMBED(f) -= 1)){
10375 		    state = WRAP_STATE(f);
10376 		}
10377 
10378 		break;
10379 	    }
10380 	}
10381 
10382 	f->f1 = state;
10383 	GF_END(f, f->next);
10384     }
10385     else if(flg == GF_EOD){
10386 	wrap_flush(f, &ip, &eib, &op, &eob);
10387 	if(WRAP_COLOR(f))
10388 	  free_color_pair(&WRAP_COLOR(f));
10389 
10390 	fs_give((void **) &f->line);	/* free temp line buffer */
10391 	so_give(&WRAP_SPACES(f));
10392 	fs_give((void **) &f->opt);	/* free wrap widths struct */
10393 	(void) GF_FLUSH(f->next);
10394 	(*f->next->f)(f->next, GF_EOD);
10395     }
10396     else if(flg == GF_RESET){
10397 	dprint((9, "-- gf_reset wrap\n"));
10398 	f->f1    = BOL;
10399 	f->n     = 0L;		/* displayed length of line so far */
10400 	f->f2	 = 0;		/* displayed length of buffered chars */
10401 	WRAP_HARD(f) = 1;	/* starting at beginning of line */
10402 	if(! (WRAP_S *) f->opt)
10403 	  f->opt = gf_wrap_filter_opt(75, 80, NULL, 0, 0);
10404 
10405 	while(WRAP_INDENT(f) >= WRAP_MAX_COL(f))
10406 	  WRAP_INDENT(f) /= 2;
10407 
10408 	f->line  = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char));
10409 	f->linep = f->line;
10410 	WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f) - 1];
10411 
10412 	for(i = 0; i < 256; i++)
10413 	  ((WRAP_S *) f->opt)->special[i] = ((i == '\"' && WRAP_COMMA(f))
10414 					     || i == '\015'
10415 					     || i == '\012'
10416 					     || (i == (unsigned char) TAG_EMBED
10417 						 && WRAP_TAGS(f))
10418 					     || (i == ',' && WRAP_COMMA(f)
10419 						 && !WRAP_QUOTED(f))
10420 					     || ASCII_ISSPACE(i));
10421 	WRAP_SPACES(f) = so_get(CharStar, NULL, EDIT_ACCESS);
10422 	WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10423     }
10424 }
10425 
10426 int
wrap_flush(FILTER_S * f,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10427 wrap_flush(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10428 	   unsigned char **opp, unsigned char **eobp)
10429 {
10430     register char *s;
10431     register int   n;
10432 
10433     s = (char *)so_text(WRAP_SPACES(f));
10434     n = so_tell(WRAP_SPACES(f));
10435     so_seek(WRAP_SPACES(f), 0L, 0);
10436     wrap_flush_s(f, s, n, WRAP_SPC_LEN(f), ipp, eibp, opp, eobp, WFE_NONE);
10437     so_truncate(WRAP_SPACES(f), 0L);
10438     WRAP_SPC_LEN(f) = 0;
10439     WRAP_TRL_SPC(f) = 0;
10440     s = f->line;
10441     n = f->linep - f->line;
10442     wrap_flush_s(f, s, n, f->f2, ipp, eibp, opp, eobp, WFE_NONE);
10443     f->f2    = 0;
10444     f->linep = f->line;
10445     WRAP_PB_OFF(f) = 0;
10446     WRAP_PB_LEN(f) = 0;
10447 
10448     return 0;
10449 }
10450 
10451 int
wrap_flush_embed(FILTER_S * f,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10452 wrap_flush_embed(FILTER_S *f, unsigned char **ipp, unsigned char **eibp, unsigned char **opp, unsigned char **eobp)
10453 {
10454   register char *s;
10455   register int   n;
10456   s = (char *)so_text(WRAP_SPACES(f));
10457   n = so_tell(WRAP_SPACES(f));
10458   so_seek(WRAP_SPACES(f), 0L, 0);
10459   wrap_flush_s(f, s, n, 0, ipp, eibp, opp, eobp, WFE_CNT_HANDLE);
10460   so_truncate(WRAP_SPACES(f), 0L);
10461   WRAP_SPC_LEN(f) = 0;
10462   WRAP_TRL_SPC(f) = 0;
10463 
10464   return 0;
10465 }
10466 
10467 int
wrap_flush_s(FILTER_S * f,char * s,int n,int w,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp,int flags)10468 wrap_flush_s(FILTER_S *f, char *s, int n, int w, unsigned char **ipp,
10469 	     unsigned char **eibp, unsigned char **opp, unsigned char **eobp, int flags)
10470 {
10471     f->n += w;
10472 
10473     for(; n > 0; n--,s++){
10474 	if(*s == TAG_EMBED){
10475 	    if(n-- > 0){
10476 		switch(*++s){
10477 		  case TAG_BOLDON :
10478 		    GF_PUTC_GLO(f->next,TAG_EMBED);
10479 		    GF_PUTC_GLO(f->next,TAG_BOLDON);
10480 		    WRAP_BOLD(f) = 1;
10481 		    break;
10482 		  case TAG_BOLDOFF :
10483 		    GF_PUTC_GLO(f->next,TAG_EMBED);
10484 		    GF_PUTC_GLO(f->next,TAG_BOLDOFF);
10485 		    WRAP_BOLD(f) = 0;
10486 		    break;
10487 		  case TAG_ULINEON :
10488 		    GF_PUTC_GLO(f->next,TAG_EMBED);
10489 		    GF_PUTC_GLO(f->next,TAG_ULINEON);
10490 		    WRAP_ULINE(f) = 1;
10491 		    break;
10492 		  case TAG_ULINEOFF :
10493 		    GF_PUTC_GLO(f->next,TAG_EMBED);
10494 		    GF_PUTC_GLO(f->next,TAG_ULINEOFF);
10495 		    WRAP_ULINE(f) = 0;
10496 		    break;
10497 		  case TAG_INVOFF :
10498 		    GF_PUTC_GLO(f->next,TAG_EMBED);
10499 		    GF_PUTC_GLO(f->next,TAG_INVOFF);
10500 		    WRAP_ANCHOR(f) = 0;
10501 		    break;
10502 		  case TAG_HANDLE :
10503 		    if((flags & WFE_CNT_HANDLE) == 0)
10504 		      GF_PUTC_GLO(f->next,TAG_EMBED);
10505 
10506 		    if(n-- > 0){
10507 			int i = *++s;
10508 
10509 			if((flags & WFE_CNT_HANDLE) == 0)
10510 			  GF_PUTC_GLO(f->next, TAG_HANDLE);
10511 
10512 			if(i <= n){
10513 			    n -= i;
10514 
10515 			    if((flags & WFE_CNT_HANDLE) == 0)
10516 			      GF_PUTC_GLO(f->next, i);
10517 
10518 			    WRAP_ANCHOR(f) = 0;
10519 			    while(i-- > 0){
10520 				WRAP_ANCHOR(f) = (WRAP_ANCHOR(f) * 10) + (*++s-'0');
10521 
10522 				if((flags & WFE_CNT_HANDLE) == 0)
10523 				  GF_PUTC_GLO(f->next,*s);
10524 			    }
10525 
10526 			}
10527 		    }
10528 		    break;
10529 		  case TAG_FGCOLOR :
10530 		    if(pico_usingcolor() && n >= RGBLEN){
10531 			int i;
10532 			GF_PUTC_GLO(f->next,TAG_EMBED);
10533 			GF_PUTC_GLO(f->next,TAG_FGCOLOR);
10534 			if(!WRAP_COLOR(f))
10535 			  WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10536 			strncpy(WRAP_COLOR(f)->fg, s+1, RGBLEN);
10537 			WRAP_COLOR(f)->fg[RGBLEN]='\0';
10538 			i = RGBLEN;
10539 			n -= i;
10540 			while(i-- > 0)
10541 			  GF_PUTC_GLO(f->next,
10542 				  (*++s) & 0xff);
10543 		    }
10544 		    break;
10545 		  case TAG_BGCOLOR :
10546 		    if(pico_usingcolor() && n >= RGBLEN){
10547 			int i;
10548 			GF_PUTC_GLO(f->next,TAG_EMBED);
10549 			GF_PUTC_GLO(f->next,TAG_BGCOLOR);
10550 			if(!WRAP_COLOR(f))
10551 			  WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10552 			strncpy(WRAP_COLOR(f)->bg, s+1, RGBLEN);
10553 			WRAP_COLOR(f)->bg[RGBLEN]='\0';
10554 			i = RGBLEN;
10555 			n -= i;
10556 			while(i-- > 0)
10557 			  GF_PUTC_GLO(f->next,
10558 				  (*++s) & 0xff);
10559 		    }
10560 		    break;
10561 		  default :
10562 		    break;
10563 		}
10564 	    }
10565 	}
10566 	else if(w){
10567 
10568 	    if(f->n <= WRAP_MAX_COL(f)){
10569 		GF_PUTC_GLO(f->next, (*s) & 0xff);
10570 	    }
10571 	    else{
10572 		dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s) & 0xff));
10573 	    }
10574 
10575 	    WRAP_ALLWSP(f) = 0;
10576 	}
10577     }
10578 
10579     return 0;
10580 }
10581 
10582 int
wrap_eol(FILTER_S * f,int c,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10583 wrap_eol(FILTER_S *f, int c, unsigned char **ipp, unsigned char **eibp,
10584 	 unsigned char **opp, unsigned char **eobp)
10585 {
10586     if(WRAP_SAW_SOFT_HYPHEN(f)){
10587 	WRAP_SAW_SOFT_HYPHEN(f) = 0;
10588 	GF_PUTC_GLO(f->next, '-');	/* real hyphen */
10589     }
10590 
10591     if(c && WRAP_LV_FLD(f))
10592       GF_PUTC_GLO(f->next, ' ');
10593 
10594     if(WRAP_BOLD(f)){
10595 	GF_PUTC_GLO(f->next, TAG_EMBED);
10596 	GF_PUTC_GLO(f->next, TAG_BOLDOFF);
10597     }
10598 
10599     if(WRAP_ULINE(f)){
10600 	GF_PUTC_GLO(f->next, TAG_EMBED);
10601 	GF_PUTC_GLO(f->next, TAG_ULINEOFF);
10602     }
10603 
10604     if(WRAP_INVERSE(f) || WRAP_ANCHOR(f)){
10605 	GF_PUTC_GLO(f->next, TAG_EMBED);
10606 	GF_PUTC_GLO(f->next, TAG_INVOFF);
10607     }
10608 
10609     if(WRAP_COLOR_SET(f)){
10610 	char *p;
10611 	char  cb[RGBLEN+1];
10612 	GF_PUTC_GLO(f->next, TAG_EMBED);
10613 	GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10614 	strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR), sizeof(cb));
10615 	cb[sizeof(cb)-1] = '\0';
10616 	p = cb;
10617 	for(; *p; p++)
10618 	  GF_PUTC_GLO(f->next, *p);
10619 	GF_PUTC_GLO(f->next, TAG_EMBED);
10620 	GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10621 	strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR), sizeof(cb));
10622 	cb[sizeof(cb)-1] = '\0';
10623 	p = cb;
10624 	for(; *p; p++)
10625 	  GF_PUTC_GLO(f->next, *p);
10626     }
10627 
10628     GF_PUTC_GLO(f->next, '\015');
10629     GF_PUTC_GLO(f->next, '\012');
10630     f->n = 0L;
10631     so_truncate(WRAP_SPACES(f), 0L);
10632     WRAP_SPC_LEN(f) = 0;
10633     WRAP_TRL_SPC(f) = 0;
10634 
10635     return 0;
10636 }
10637 
10638 int
wrap_bol(FILTER_S * f,int ivar,int q,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10639 wrap_bol(FILTER_S *f, int ivar, int q, unsigned char **ipp, unsigned char **eibp,
10640 	 unsigned char **opp, unsigned char **eobp)
10641 {
10642     int n = WRAP_MARG_L(f) + (ivar ? WRAP_INDENT(f) : 0);
10643 
10644     if(WRAP_HDR_CLR(f)){
10645 	char *p;
10646 	char cbuf[RGBLEN+1];
10647 	int k;
10648 
10649 	if((k = WRAP_MARG_L(f)) > 0)
10650 	  while(k-- > 0){
10651 	      n--;
10652 	      f->n++;
10653 	      GF_PUTC_GLO(f->next, ' ');
10654 	  }
10655 
10656 	GF_PUTC_GLO(f->next, TAG_EMBED);
10657 	GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10658 	strncpy(cbuf,
10659 		color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_FORE_COLOR),
10660 		sizeof(cbuf));
10661 	cbuf[sizeof(cbuf)-1] = '\0';
10662 	p = cbuf;
10663 	for(; *p; p++)
10664 	  GF_PUTC_GLO(f->next, *p);
10665 	GF_PUTC_GLO(f->next, TAG_EMBED);
10666 	GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10667 	strncpy(cbuf,
10668 		color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_BACK_COLOR),
10669 		sizeof(cbuf));
10670 	cbuf[sizeof(cbuf)-1] = '\0';
10671 	p = cbuf;
10672 	for(; *p; p++)
10673 	  GF_PUTC_GLO(f->next, *p);
10674     }
10675 
10676     while(n-- > 0){
10677 	f->n++;
10678 	GF_PUTC_GLO(f->next, ' ');
10679     }
10680 
10681     WRAP_ALLWSP(f) = 1;
10682 
10683     if(q)
10684       wrap_quote_insert(f, ipp, eibp, opp, eobp);
10685 
10686     if(WRAP_BOLD(f)){
10687 	GF_PUTC_GLO(f->next, TAG_EMBED);
10688 	GF_PUTC_GLO(f->next, TAG_BOLDON);
10689     }
10690     if(WRAP_ULINE(f)){
10691 	GF_PUTC_GLO(f->next, TAG_EMBED);
10692 	GF_PUTC_GLO(f->next, TAG_ULINEON);
10693     }
10694     if(WRAP_INVERSE(f)){
10695 	GF_PUTC_GLO(f->next, TAG_EMBED);
10696 	GF_PUTC_GLO(f->next, TAG_INVON);
10697     }
10698     if(WRAP_COLOR_SET(f)){
10699 	char *p;
10700 	if(WRAP_COLOR(f)->fg[0]){
10701 	    char cb[RGBLEN+1];
10702 	    GF_PUTC_GLO(f->next, TAG_EMBED);
10703 	    GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10704 	    strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->fg), sizeof(cb));
10705 	    cb[sizeof(cb)-1] = '\0';
10706 	    p = cb;
10707 	    for(; *p; p++)
10708 	      GF_PUTC_GLO(f->next, *p);
10709 	}
10710 	if(WRAP_COLOR(f)->bg[0]){
10711 	    char cb[RGBLEN+1];
10712 	    GF_PUTC_GLO(f->next, TAG_EMBED);
10713 	    GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10714 	    strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->bg), sizeof(cb));
10715 	    cb[sizeof(cb)-1] = '\0';
10716 	    p = cb;
10717 	    for(; *p; p++)
10718 	      GF_PUTC_GLO(f->next, *p);
10719 	}
10720     }
10721     if(WRAP_ANCHOR(f)){
10722 	char buf[64]; int i;
10723 	GF_PUTC_GLO(f->next, TAG_EMBED);
10724 	GF_PUTC_GLO(f->next, TAG_HANDLE);
10725 	snprintf(buf, sizeof(buf), "%d", WRAP_ANCHOR(f));
10726 	GF_PUTC_GLO(f->next, (int) strlen(buf));
10727 	for(i = 0; buf[i]; i++)
10728 	  GF_PUTC_GLO(f->next, buf[i]);
10729     }
10730 
10731     return 0;
10732 }
10733 
10734 int
wrap_quote_insert(FILTER_S * f,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10735 wrap_quote_insert(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10736 		  unsigned char **opp, unsigned char **eobp)
10737 {
10738     int j, i;
10739     COLOR_PAIR *col = NULL;
10740     char *prefix = NULL, *last_prefix = NULL;
10741 
10742     if(ps_global->VAR_QUOTE_REPLACE_STRING){
10743 	get_pair(ps_global->VAR_QUOTE_REPLACE_STRING, &prefix, &last_prefix, 0, 0);
10744 	if(!prefix && last_prefix){
10745 	    prefix = last_prefix;
10746 	    last_prefix = NULL;
10747 	}
10748     }
10749 
10750     for(j = 0; j < WRAP_FL_QD(f); j++){
10751 	if(WRAP_USE_CLR(f)){
10752 	    if((j % 3) == 0
10753 	       && ps_global->VAR_QUOTE1_FORE_COLOR
10754 	       && ps_global->VAR_QUOTE1_BACK_COLOR
10755 	       && (col = new_color_pair(ps_global->VAR_QUOTE1_FORE_COLOR,
10756 					ps_global->VAR_QUOTE1_BACK_COLOR))
10757 	       && pico_is_good_colorpair(col)){
10758                 GF_COLOR_PUTC(f, col);
10759             }
10760 	    else if((j % 3) == 1
10761 		    && ps_global->VAR_QUOTE2_FORE_COLOR
10762 		    && ps_global->VAR_QUOTE2_BACK_COLOR
10763 		    && (col = new_color_pair(ps_global->VAR_QUOTE2_FORE_COLOR,
10764 					     ps_global->VAR_QUOTE2_BACK_COLOR))
10765 		    && pico_is_good_colorpair(col)){
10766 	        GF_COLOR_PUTC(f, col);
10767             }
10768 	    else if((j % 3) == 2
10769 		    && ps_global->VAR_QUOTE3_FORE_COLOR
10770 		    && ps_global->VAR_QUOTE3_BACK_COLOR
10771 		    && (col = new_color_pair(ps_global->VAR_QUOTE3_FORE_COLOR,
10772 					     ps_global->VAR_QUOTE3_BACK_COLOR))
10773 		    && pico_is_good_colorpair(col)){
10774 	        GF_COLOR_PUTC(f, col);
10775             }
10776 	    if(col){
10777 		free_color_pair(&col);
10778 		col = NULL;
10779 	    }
10780 	}
10781 
10782 	if(!WRAP_LV_FLD(f)){
10783 	    if(!WRAP_FOR_CMPS(f) && ps_global->VAR_QUOTE_REPLACE_STRING && prefix){
10784 		for(i = 0; prefix[i]; i++)
10785 		  GF_PUTC_GLO(f->next, prefix[i]);
10786 		f->n += utf8_width(prefix);
10787 	    }
10788 	    else if(ps_global->VAR_REPLY_STRING
10789 		    && (!strcmp(ps_global->VAR_REPLY_STRING, ">")
10790 			|| !strcmp(ps_global->VAR_REPLY_STRING, "\">\""))){
10791 		GF_PUTC_GLO(f->next, '>');
10792 		f->n += 1;
10793 	    }
10794 	    else{
10795 		GF_PUTC_GLO(f->next, '>');
10796 		GF_PUTC_GLO(f->next, ' ');
10797 		f->n += 2;
10798 	    }
10799 	}
10800 	else{
10801 	    GF_PUTC_GLO(f->next, '>');
10802 	    f->n += 1;
10803 	}
10804     }
10805     if(j && WRAP_LV_FLD(f)){
10806 	GF_PUTC_GLO(f->next, ' ');
10807 	f->n++;
10808     }
10809     else if(j && last_prefix){
10810 	for(i = 0; last_prefix[i]; i++)
10811 	  GF_PUTC_GLO(f->next, last_prefix[i]);
10812 	f->n += utf8_width(last_prefix);
10813     }
10814 
10815     if(prefix)
10816       fs_give((void **)&prefix);
10817     if(last_prefix)
10818       fs_give((void **)&last_prefix);
10819 
10820     return 0;
10821 }
10822 
10823 
10824 /*
10825  * function called from the outside to set
10826  * wrap filter's width option
10827  */
10828 void *
gf_wrap_filter_opt(int width,int width_max,int * margin,int indent,int flags)10829 gf_wrap_filter_opt(int width, int width_max, int *margin, int indent, int flags)
10830 {
10831     WRAP_S *wrap;
10832 
10833     /* NOTE: variables MUST be sanity checked before they get here */
10834     wrap = (WRAP_S *) fs_get(sizeof(WRAP_S));
10835     memset(wrap, 0, sizeof(WRAP_S));
10836     wrap->wrap_col     = width;
10837     wrap->wrap_max     = width_max;
10838     wrap->indent       = indent;
10839     wrap->margin_l     = (margin) ? margin[0] : 0;
10840     wrap->margin_r     = (margin) ? margin[1] : 0;
10841     wrap->tags	       = (GFW_HANDLES & flags) == GFW_HANDLES;
10842     wrap->on_comma     = (GFW_ONCOMMA & flags) == GFW_ONCOMMA;
10843     wrap->flowed       = (GFW_FLOWED & flags) == GFW_FLOWED;
10844     wrap->leave_flowed = (GFW_FLOW_RESULT & flags) == GFW_FLOW_RESULT;
10845     wrap->delsp	       = (GFW_DELSP & flags) == GFW_DELSP;
10846     wrap->use_color    = (GFW_USECOLOR & flags) == GFW_USECOLOR;
10847     wrap->hdr_color    = (GFW_HDRCOLOR & flags) == GFW_HDRCOLOR;
10848     wrap->for_compose  = (GFW_FORCOMPOSE & flags) == GFW_FORCOMPOSE;
10849     wrap->handle_soft_hyphen = (GFW_SOFTHYPHEN & flags) == GFW_SOFTHYPHEN;
10850 
10851     return((void *) wrap);
10852 }
10853 
10854 
10855 void *
gf_url_hilite_opt(URL_HILITE_S * uh,HANDLE_S ** handlesp,int flags)10856 gf_url_hilite_opt(URL_HILITE_S *uh, HANDLE_S **handlesp, int flags)
10857 {
10858     if(uh){
10859 	memset(uh, 0, sizeof(URL_HILITE_S));
10860 	uh->handlesp  = handlesp;
10861 	uh->hdr_color = (URH_HDRCOLOR & flags) == URH_HDRCOLOR;
10862     }
10863 
10864     return((void *) uh);
10865 }
10866 
10867 
10868 #define	PF_QD(F)	(((PREFLOW_S *)(F)->opt)->quote_depth)
10869 #define	PF_QC(F)	(((PREFLOW_S *)(F)->opt)->quote_count)
10870 #define	PF_SIG(F)	(((PREFLOW_S *)(F)->opt)->sig)
10871 
10872 typedef struct preflow_s {
10873     int		quote_depth,
10874 		quote_count,
10875 		sig;
10876 } PREFLOW_S;
10877 
10878 /*
10879  * This would normally be handled in gf_wrap. If there is a possibility
10880  * that a url we want to recognize is cut in half by a soft newline we
10881  * want to fix that up by putting the halves back together. We do that
10882  * by deleting the soft newline and putting it all in one line. It will
10883  * still get wrapped later in gf_wrap. It isn't pretty with all the
10884  * goto's, but whatta ya gonna do?
10885  */
10886 void
gf_preflow(FILTER_S * f,int flg)10887 gf_preflow(FILTER_S *f, int flg)
10888 {
10889     GF_INIT(f, f->next);
10890 
10891     if(flg == GF_DATA){
10892 	register unsigned char c;
10893 	register int state  = f->f1;
10894 	register int pending = f->f2;
10895 
10896 	while(GF_GETC(f, c)){
10897 	    switch(state){
10898 	      case DFL:
10899 default_case:
10900 		switch(c){
10901 		  case ' ':
10902 		    state = WSPACE;
10903 		    break;
10904 
10905 		  case '\015':
10906 		    state = CCR;
10907 		    break;
10908 
10909 		  default:
10910 		    GF_PUTC(f->next, c);
10911 		    break;
10912 		}
10913 
10914 	        break;
10915 
10916 	      case CCR:
10917 		switch(c){
10918 		  case '\012':
10919 		    pending = 1;
10920 		    state = BOL;
10921 		    break;
10922 
10923 		  default:
10924 		    GF_PUTC(f->next, '\012');
10925 		    state = DFL;
10926 		    goto default_case;
10927 		    break;
10928 		}
10929 
10930 	        break;
10931 
10932 	      case WSPACE:
10933 		switch(c){
10934 		  case '\015':
10935 		    state = SPACECR;
10936 		    break;
10937 
10938 		  default:
10939 		    GF_PUTC(f->next, ' ');
10940 		    state = DFL;
10941 		    goto default_case;
10942 		    break;
10943 		}
10944 
10945 	        break;
10946 
10947 	      case SPACECR:
10948 		switch(c){
10949 		  case '\012':
10950 		    pending = 2;
10951 		    state = BOL;
10952 		    break;
10953 
10954 		  default:
10955 		    GF_PUTC(f->next, ' ');
10956 		    GF_PUTC(f->next, '\012');
10957 		    state = DFL;
10958 		    goto default_case;
10959 		    break;
10960 		}
10961 
10962 	        break;
10963 
10964 	      case BOL:
10965 		PF_QC(f) = 0;
10966 		if(c == '>'){		/* count quote level */
10967 		    PF_QC(f)++;
10968 		    state = FL_QLEV;
10969 		}
10970 		else{
10971 done_counting_quotes:
10972 		    if(c == ' '){	/* eat stuffed space */
10973 			state = FL_STF;
10974 			break;
10975 		    }
10976 
10977 done_with_stuffed_space:
10978 		    if(c == '-'){	/* look for signature */
10979 			PF_SIG(f) = 1;
10980 			state = FL_SIG;
10981 			break;
10982 		    }
10983 
10984 done_with_sig:
10985 		    if(pending == 2){
10986 			if(PF_QD(f) == PF_QC(f) && PF_SIG(f) < 4){
10987 			    /* delete pending */
10988 
10989 			    PF_QD(f) = PF_QC(f);
10990 
10991 			    /* suppress quotes, too */
10992 			    PF_QC(f) = 0;
10993 			}
10994 			else{
10995 			    /*
10996 			     * This should have been a hard new line
10997 			     * instead so leave out the trailing space.
10998 			     */
10999 			    GF_PUTC(f->next, '\015');
11000 			    GF_PUTC(f->next, '\012');
11001 
11002 			    PF_QD(f) = PF_QC(f);
11003 			}
11004 		    }
11005 		    else if(pending == 1){
11006 			GF_PUTC(f->next, '\015');
11007 			GF_PUTC(f->next, '\012');
11008 			PF_QD(f) = PF_QC(f);
11009 		    }
11010 		    else{
11011 			PF_QD(f) = PF_QC(f);
11012 		    }
11013 
11014 		    pending = 0;
11015 		    state = DFL;
11016 		    while(PF_QC(f)-- > 0)
11017 		      GF_PUTC(f->next, '>');
11018 
11019 		    switch(PF_SIG(f)){
11020 		      case 0:
11021 		      default:
11022 		        break;
11023 
11024 		      case 1:
11025 			GF_PUTC(f->next, '-');
11026 		        break;
11027 
11028 		      case 2:
11029 			GF_PUTC(f->next, '-');
11030 			GF_PUTC(f->next, '-');
11031 		        break;
11032 
11033 		      case 3:
11034 		      case 4:
11035 			GF_PUTC(f->next, '-');
11036 			GF_PUTC(f->next, '-');
11037 			GF_PUTC(f->next, ' ');
11038 		        break;
11039 		    }
11040 
11041 		    PF_SIG(f) = 0;
11042 		    goto default_case;		/* to handle c */
11043 		}
11044 
11045 		break;
11046 
11047 	      case FL_QLEV:		/* count quote level */
11048 		if(c == '>')
11049 		  PF_QC(f)++;
11050 		else
11051 		  goto done_counting_quotes;
11052 
11053 		break;
11054 
11055 	      case FL_STF:		/* eat stuffed space */
11056 		goto done_with_stuffed_space;
11057 	        break;
11058 
11059 	      case FL_SIG:		/* deal with sig indicator */
11060 		switch(PF_SIG(f)){
11061 		  case 1:		/* saw '-' */
11062 		    if(c == '-')
11063 		      PF_SIG(f) = 2;
11064 		    else
11065 		      goto done_with_sig;
11066 
11067 		    break;
11068 
11069 		  case 2:		/* saw '--' */
11070 		    if(c == ' ')
11071 		      PF_SIG(f) = 3;
11072 		    else
11073 		      goto done_with_sig;
11074 
11075 		    break;
11076 
11077 		  case 3:		/* saw '-- ' */
11078 		    if(c == '\015')
11079 		      PF_SIG(f) = 4;	/* it really is a sig line */
11080 
11081 		    goto done_with_sig;
11082 		    break;
11083 		}
11084 
11085 	        break;
11086 	    }
11087 	}
11088 
11089 	f->f1 = state;
11090 	f->f2 = pending;
11091 	GF_END(f, f->next);
11092     }
11093     else if(flg == GF_EOD){
11094 	fs_give((void **) &f->opt);
11095 	(void) GF_FLUSH(f->next);
11096 	(*f->next->f)(f->next, GF_EOD);
11097     }
11098     else if(flg == GF_RESET){
11099 	PREFLOW_S *pf;
11100 
11101 	pf = (PREFLOW_S *) fs_get(sizeof(*pf));
11102 	memset(pf, 0, sizeof(*pf));
11103 	f->opt = (void *) pf;
11104 
11105 	f->f1     = BOL;	/* state */
11106 	f->f2     = 0;		/* pending */
11107 	PF_QD(f)  = 0;		/* quote depth */
11108 	PF_QC(f)  = 0;		/* quote count */
11109 	PF_SIG(f) = 0;		/* sig level */
11110     }
11111 }
11112 
11113 
11114 
11115 
11116 /*
11117  * LINE PREFIX FILTER - insert given text at beginning of each
11118  * line
11119  */
11120 
11121 
11122 #define	GF_PREFIX_WRITE(s)	{ \
11123 				    register char *p; \
11124 				    if((p = (s)) != NULL) \
11125 				      while(*p) \
11126 					GF_PUTC(f->next, *p++); \
11127 				}
11128 
11129 
11130 /*
11131  * the simple filter, prepends each line with the requested prefix.
11132  * if prefix is null, does nothing, and as with all filters, assumes
11133  * NVT end of lines.
11134  */
11135 void
gf_prefix(FILTER_S * f,int flg)11136 gf_prefix(FILTER_S *f, int flg)
11137 {
11138     GF_INIT(f, f->next);
11139 
11140     if(flg == GF_DATA){
11141 	register unsigned char c;
11142 	register int state = f->f1;
11143 	register int first = f->f2;
11144 
11145 	while(GF_GETC(f, c)){
11146 
11147 	    if(first){				/* write initial prefix!! */
11148 		first = 0;			/* but just once */
11149 		GF_PREFIX_WRITE((char *) f->opt);
11150 	    }
11151 
11152 	    /*
11153 	     * State == 0 is the starting state and the usual state.
11154 	     * State == 1 means we saw a CR and haven't acted on it yet.
11155 	     * We are looking for a LF to get the CRLF end of line.
11156 	     * However, we also treat bare CR and bare LF as if they
11157 	     * were CRLF sequences. What else could it mean in text?
11158 	     * This filter is only used for text so that is probably
11159 	     * a reasonable interpretation of the bad input.
11160 	     */
11161 	    if(c == '\015'){		/* CR */
11162 		if(state){			/* Treat pending CR as endofline, */
11163 		    GF_PUTC(f->next, '\015');	/* and remain in saw-a-CR state.  */
11164 		    GF_PUTC(f->next, '\012');
11165 		    GF_PREFIX_WRITE((char *) f->opt);
11166 		}
11167 		else{
11168 		    state = 1;
11169 		}
11170 	    }
11171 	    else if(c == '\012'){	/* LF */
11172 		GF_PUTC(f->next, '\015');	/* Got either a CRLF or a bare LF, */
11173 		GF_PUTC(f->next, '\012');	/* treat both as if a CRLF.    */
11174 		GF_PREFIX_WRITE((char *) f->opt);
11175 		state = 0;
11176 	    }
11177 	    else{			/* any other character */
11178 		if(state){
11179 		    GF_PUTC(f->next, '\015');	/* Treat pending CR as endofline. */
11180 		    GF_PUTC(f->next, '\012');
11181 		    GF_PREFIX_WRITE((char *) f->opt);
11182 		    state = 0;
11183 		}
11184 
11185 		GF_PUTC(f->next, c);
11186 	    }
11187 	}
11188 
11189 	f->f1 = state;			/* save state for next chunk of data */
11190 	f->f2 = first;
11191 	GF_END(f, f->next);
11192     }
11193     else if(flg == GF_EOD){
11194 	(void) GF_FLUSH(f->next);
11195 	(*f->next->f)(f->next, GF_EOD);
11196     }
11197     else if(flg == GF_RESET){
11198 	dprint((9, "-- gf_reset prefix\n"));
11199 	f->f1   = 0;
11200 	f->f2   = 1;			/* nothing written yet */
11201     }
11202 }
11203 
11204 
11205 /*
11206  * function called from the outside to set
11207  * prefix filter's prefix string
11208  */
11209 void *
gf_prefix_opt(char * prefix)11210 gf_prefix_opt(char *prefix)
11211 {
11212     return((void *) prefix);
11213 }
11214 
11215 
11216 /*
11217  * LINE TEST FILTER - accumulate lines and offer each to the provided
11218  * test function.
11219  */
11220 
11221 typedef struct _linetest_s {
11222     linetest_t	f;
11223     void       *local;
11224 } LINETEST_S;
11225 
11226 
11227 /* accumulator growth increment */
11228 #define	LINE_TEST_BLOCK	1024
11229 
11230 #define	GF_LINE_TEST_EOB(f) \
11231 			((f)->line + ((f)->f2 - 1))
11232 
11233 #define	GF_LINE_TEST_ADD(f, c) \
11234 			{ \
11235 				    if(p >= eobuf){ \
11236 					f->f2 += LINE_TEST_BLOCK; \
11237 					fs_resize((void **)&f->line, \
11238 					      (size_t) f->f2 * sizeof(char)); \
11239 					eobuf = GF_LINE_TEST_EOB(f); \
11240 					p = eobuf - LINE_TEST_BLOCK; \
11241 				    } \
11242 				    *p++ = c; \
11243 				}
11244 
11245 #define	GF_LINE_TEST_TEST(F, D) \
11246 			{ \
11247 			    unsigned char  c; \
11248 			    register char *cp; \
11249 			    register int   l; \
11250 			    LT_INS_S	  *ins = NULL, *insp; \
11251 			    *p = '\0'; \
11252 			    (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11253 					   (F)->line, &ins, \
11254 					   ((LINETEST_S *) (F)->opt)->local); \
11255 			    if((D) < 2){ \
11256 				if((D) < 0){ \
11257 				    if((F)->line) \
11258 				      fs_give((void **) &(F)->line); \
11259 				    if((F)->opt) \
11260 				      fs_give((void **) &(F)->opt); \
11261 				    gf_error(_("translation error")); \
11262 				    /* NO RETURN */ \
11263 				} \
11264 				for(insp = ins, cp = (F)->line; cp < p; ){ \
11265 				  if(insp && cp == insp->where){ \
11266 				    if(insp->len > 0){ \
11267 					for(l = 0; l < insp->len; l++){ \
11268 					  c = (unsigned char) insp->text[l]; \
11269 					  GF_PUTC((F)->next, c);  \
11270 					}  \
11271 					insp = insp->next; \
11272 					continue; \
11273 				    } else if(insp->len < 0){ \
11274 					cp -= insp->len; \
11275 					insp = insp->next; \
11276 					continue; \
11277 				    } \
11278 				  } \
11279 				  GF_PUTC((F)->next, *cp); \
11280 				  cp++; \
11281 				} \
11282 				while(insp){ \
11283 				    for(l = 0; l < insp->len; l++){ \
11284 					c = (unsigned char) insp->text[l]; \
11285 					GF_PUTC((F)->next, c); \
11286 				    } \
11287 				    insp = insp->next; \
11288 				} \
11289 				gf_line_test_free_ins(&ins); \
11290 			    } \
11291 			}
11292 
11293 
11294 
11295 /*
11296  * this simple filter accumulates characters until a newline, offers it
11297  * to the provided test function, and then passes it on.  It assumes
11298  * NVT EOLs.
11299  */
11300 void
gf_line_test(FILTER_S * f,int flg)11301 gf_line_test(FILTER_S *f, int flg)
11302 {
11303     register char *p = f->linep;
11304     register char *eobuf = GF_LINE_TEST_EOB(f);
11305     GF_INIT(f, f->next);
11306 
11307     if(flg == GF_DATA){
11308 	register unsigned char c;
11309 	register int state = f->f1;
11310 
11311 	while(GF_GETC(f, c)){
11312 
11313 	    if(state){
11314 		state = 0;
11315 		if(c == '\012'){
11316 		    int done;
11317 
11318 		    GF_LINE_TEST_TEST(f, done);
11319 
11320 		    p = (f)->line;
11321 
11322 		    if(done == 2)	/* skip this line! */
11323 		      continue;
11324 
11325 		    GF_PUTC(f->next, '\015');
11326 		    GF_PUTC(f->next, '\012');
11327 		    /*
11328 		     * if the line tester returns TRUE, it's
11329 		     * telling us its seen enough and doesn't
11330 		     * want to see any more.  Remove ourself
11331 		     * from the pipeline...
11332 		     */
11333 		    if(done){
11334 			if(gf_master == f){
11335 			    gf_master = f->next;
11336 			}
11337 			else{
11338 			    FILTER_S *fprev;
11339 
11340 			    for(fprev = gf_master;
11341 				fprev && fprev->next != f;
11342 				fprev = fprev->next)
11343 			      ;
11344 
11345 			    if(fprev)		/* wha??? */
11346 			      fprev->next = f->next;
11347 			    else
11348 			      continue;
11349 			}
11350 
11351 			while(GF_GETC(f, c))	/* pass input */
11352 			  GF_PUTC(f->next, c);
11353 
11354 			(void) GF_FLUSH(f->next);	/* and drain queue */
11355 			fs_give((void **)&f->line);
11356 			fs_give((void **)&f);	/* wax our data */
11357 			return;
11358 		    }
11359 		    else
11360 		      continue;
11361 		}
11362 		else			/* add CR to buffer */
11363 		  GF_LINE_TEST_ADD(f, '\015');
11364 	    } /* fall thru to handle 'c' */
11365 
11366 	    if(c == '\015')		/* newline? */
11367 	      state = 1;
11368 	    else
11369 	      GF_LINE_TEST_ADD(f, c);
11370 	}
11371 
11372 	f->f1 = state;
11373 	GF_END(f, f->next);
11374     }
11375     else if(flg == GF_EOD){
11376 	int i;
11377 
11378 	GF_LINE_TEST_TEST(f, i);	/* examine remaining data */
11379 	fs_give((void **) &f->line);	/* free line buffer */
11380 	fs_give((void **) &f->opt);	/* free test struct */
11381 	(void) GF_FLUSH(f->next);
11382 	(*f->next->f)(f->next, GF_EOD);
11383     }
11384     else if(flg == GF_RESET){
11385 	dprint((9, "-- gf_reset line_test\n"));
11386 	f->f1 = 0;			/* state */
11387 	f->n  = 0L;			/* line number */
11388 	f->f2 = LINE_TEST_BLOCK;	/* size of alloc'd line */
11389 	f->line = p = (char *) fs_get(f->f2 * sizeof(char));
11390     }
11391 
11392     f->linep = p;
11393 }
11394 
11395 
11396 /*
11397  * function called from the outside to operate on accumulated line.
11398  */
11399 void *
gf_line_test_opt(linetest_t test_f,void * local)11400 gf_line_test_opt(linetest_t test_f, void *local)
11401 {
11402     LINETEST_S *ltp;
11403 
11404     ltp = (LINETEST_S *) fs_get(sizeof(LINETEST_S));
11405     memset(ltp, 0, sizeof(LINETEST_S));
11406     ltp->f     = test_f;
11407     ltp->local = local;
11408     return((void *) ltp);
11409 }
11410 
11411 
11412 
11413 LT_INS_S **
gf_line_test_new_ins(LT_INS_S ** ins,char * p,char * s,int n)11414 gf_line_test_new_ins(LT_INS_S **ins, char *p, char *s, int n)
11415 {
11416     *ins = (LT_INS_S *) fs_get(sizeof(LT_INS_S));
11417     if(((*ins)->len = n) > 0)
11418       strncpy((*ins)->text = (char *) fs_get(n * sizeof(char)), s, n);
11419     else
11420       (*ins)->text = NULL;
11421 
11422     (*ins)->where = p;
11423     (*ins)->next  = NULL;
11424     return(&(*ins)->next);
11425 }
11426 
11427 
11428 void
gf_line_test_free_ins(LT_INS_S ** ins)11429 gf_line_test_free_ins(LT_INS_S **ins)
11430 {
11431     if(ins && *ins){
11432 	if((*ins)->next)
11433 	  gf_line_test_free_ins(&(*ins)->next);
11434 
11435 	if((*ins)->text)
11436 	  fs_give((void **) &(*ins)->text);
11437 
11438 	fs_give((void **) ins);
11439     }
11440 }
11441 
11442 
11443 /*
11444  * PREPEND EDITORIAL FILTER - conditionally prepend output text
11445  *                            with editorial comment
11446  */
11447 
11448 typedef struct _preped_s {
11449     prepedtest_t  f;
11450     char	 *text;
11451 } PREPED_S;
11452 
11453 
11454 /*
11455  * gf_prepend_editorial - accumulate filtered text and prepend its
11456  *                        output with given text
11457  *
11458  *
11459  */
11460 void
gf_prepend_editorial(FILTER_S * f,int flg)11461 gf_prepend_editorial(FILTER_S *f, int flg)
11462 {
11463     GF_INIT(f, f->next);
11464 
11465     if(flg == GF_DATA){
11466 	register unsigned char c;
11467 
11468 	while(GF_GETC(f, c)){
11469 	    so_writec(c, (STORE_S *) f->data);
11470 	}
11471 
11472 	GF_END(f, f->next);
11473     }
11474     else if(flg == GF_EOD){
11475 	unsigned char c;
11476 
11477 	if(!((PREPED_S *)(f)->opt)->f || (*((PREPED_S *)(f)->opt)->f)()){
11478 	    char *p = ((PREPED_S *)(f)->opt)->text;
11479 
11480 	    for( ; p && *p; p++)
11481 	      GF_PUTC(f->next, *p);
11482 	}
11483 
11484 	so_seek((STORE_S *) f->data, 0L, 0);
11485 	while(so_readc(&c, (STORE_S *) f->data)){
11486 	    GF_PUTC(f->next, c);
11487 	}
11488 
11489 	so_give((STORE_S **) &f->data);
11490 	fs_give((void **) &f->opt);
11491 	(void) GF_FLUSH(f->next);
11492 	(*f->next->f)(f->next, GF_EOD);
11493     }
11494     else if(flg == GF_RESET){
11495 	dprint((9, "-- gf_reset line_test\n"));
11496 	f->data = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
11497     }
11498 }
11499 
11500 
11501 /*
11502  * function called from the outside to setup prepending editorial
11503  * to output text
11504  */
11505 void *
gf_prepend_editorial_opt(prepedtest_t test_f,char * text)11506 gf_prepend_editorial_opt(prepedtest_t test_f, char *text)
11507 {
11508     PREPED_S *pep;
11509 
11510     pep = (PREPED_S *) fs_get(sizeof(PREPED_S));
11511     memset(pep, 0, sizeof(PREPED_S));
11512     pep->f    = test_f;
11513     pep->text = text;
11514     return((void *) pep);
11515 }
11516 
11517 
11518 /*
11519  * Network virtual terminal to local newline convention filter
11520  */
11521 void
gf_nvtnl_local(FILTER_S * f,int flg)11522 gf_nvtnl_local(FILTER_S *f, int flg)
11523 {
11524     GF_INIT(f, f->next);
11525 
11526     if(flg == GF_DATA){
11527 	register unsigned char c;
11528 	register int state = f->f1;
11529 
11530 	while(GF_GETC(f, c)){
11531 	    if(state){
11532 		state = 0;
11533 		if(c == '\012'){
11534 		    GF_PUTC(f->next, '\012');
11535 		    continue;
11536 		}
11537 		else
11538 		  GF_PUTC(f->next, '\015');
11539 		/* fall thru to deal with 'c' */
11540 	    }
11541 
11542 	    if(c == '\015')
11543 	      state = 1;
11544 	    else
11545 	      GF_PUTC(f->next, c);
11546 	}
11547 
11548 	f->f1 = state;
11549 	GF_END(f, f->next);
11550     }
11551     else if(flg == GF_EOD){
11552 	(void) GF_FLUSH(f->next);
11553 	(*f->next->f)(f->next, GF_EOD);
11554     }
11555     else if(flg == GF_RESET){
11556 	dprint((9, "-- gf_reset nvtnl_local\n"));
11557 	f->f1 = 0;
11558     }
11559 }
11560 
11561 
11562 /*
11563  * local to network newline convention filter
11564  */
11565 void
gf_local_nvtnl(FILTER_S * f,int flg)11566 gf_local_nvtnl(FILTER_S *f, int flg)
11567 {
11568     GF_INIT(f, f->next);
11569 
11570     if(flg == GF_DATA){
11571 	register unsigned char c;
11572 
11573 	while(GF_GETC(f, c)){
11574 	    if(c == '\012'){
11575 		GF_PUTC(f->next, '\015');
11576 		GF_PUTC(f->next, '\012');
11577 	    }
11578 	    else if(c != '\015')   /* do not copy isolated \015 into source */
11579 	      GF_PUTC(f->next, c);
11580 	}
11581 
11582 	GF_END(f, f->next);
11583     }
11584     else if(flg == GF_EOD){
11585 	(void) GF_FLUSH(f->next);
11586 	(*f->next->f)(f->next, GF_EOD);
11587     }
11588     else if(GF_RESET){
11589 	dprint((9, "-- gf_reset local_nvtnl\n"));
11590 	/* no op */
11591     }
11592 
11593 }
11594 
11595 void
free_filter_module_globals(void)11596 free_filter_module_globals(void)
11597 {
11598     FILTER_S *flt, *fltn = gf_master;
11599 
11600     while((flt = fltn) != NULL){	/* free list of old filters */
11601 	fltn = flt->next;
11602 	fs_give((void **)&flt);
11603     }
11604 }
11605