1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
3 #endif
4
5 /*
6 * ========================================================================
7 * Copyright 2013-2021 Eduardo Chappa
8 * Copyright 2006-2008 University of Washington
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * ========================================================================
17 */
18
19 /*======================================================================
20 filter.c
21
22 This code provides a generalized, flexible way to allow
23 piping of data thru filters. Each filter is passed a structure
24 that it will use to hold its static data while it operates on
25 the stream of characters that are passed to it. After processing
26 it will either return or call the next filter in
27 the pipe with any character (or characters) it has ready to go. This
28 means some terminal type of filter has to be the last in the
29 chain (i.e., one that writes the passed char someplace, but doesn't
30 call another filter).
31
32 See below for more details.
33
34 The motivation is to handle MIME decoding, richtext conversion,
35 iso_code stripping and anything else that may come down the
36 pike (e.g., PEM) in an elegant fashion. mikes (920811)
37
38 TODO:
39 reasonable error handling
40
41 ====*/
42
43
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
58
59 #ifdef _WINDOWS
60 #include "../pico/osdep/mswin.h"
61 #endif
62
63
64 /*
65 * Internal prototypes
66 */
67 int gf_so_writec(int);
68 int gf_so_readc(unsigned char *);
69 int gf_freadc(unsigned char *);
70 int gf_freadc_locale(unsigned char *);
71 int gf_freadc_getchar(unsigned char *, void *);
72 int gf_fwritec(int);
73 int gf_fwritec_locale(int);
74 #ifdef _WINDOWS
75 int gf_freadc_windows(unsigned char *);
76 #endif /* _WINDOWS */
77 int gf_preadc(unsigned char *);
78 int gf_preadc_locale(unsigned char *);
79 int gf_preadc_getchar(unsigned char *, void *);
80 int gf_pwritec(int);
81 int gf_pwritec_locale(int);
82 int gf_sreadc(unsigned char *);
83 int gf_sreadc_locale(unsigned char *);
84 int gf_sreadc_getchar(unsigned char *, void *);
85 int gf_swritec(int);
86 int gf_swritec_locale(int);
87 void gf_terminal(FILTER_S *, int);
88 void gf_error(char *);
89 char *gf_filter_puts(char *);
90 void gf_filter_eod(void);
91
92 void gf_8bit_put(FILTER_S *, int);
93
94
95
96 /*
97 * System specific options
98 */
99 #ifdef _WINDOWS
100 #define CRLF_NEWLINES
101 #endif
102
103
104 /*
105 * Hooks for callers to adjust behavior
106 */
107 char *(*pith_opt_pretty_var_name)(char *);
108 char *(*pith_opt_pretty_feature_name)(char *, int);
109
110
111 /*
112 * pointer to first function in a pipe, and pointer to last filter
113 */
114 FILTER_S *gf_master = NULL;
115 static gf_io_t last_filter;
116 static char *gf_error_string;
117 static long gf_byte_count;
118 static jmp_buf gf_error_state;
119
120
121 #define GF_NOOP 0x01 /* flags used by generalized */
122 #define GF_EOD 0x02 /* filters */
123 #define GF_DATA 0x04 /* See filter.c for more */
124 #define GF_ERROR 0x08 /* details */
125 #define GF_RESET 0x10
126
127
128 /*
129 * A list of states used by the various filters. Reused in many filters.
130 */
131 #define DFL 0
132 #define EQUAL 1
133 #define HEX 2
134 #define WSPACE 3
135 #define CCR 4
136 #define CLF 5
137 #define TOKEN 6
138 #define TAG 7
139 #define HANDLE 8
140 #define HDATA 9
141 #define ESC 10
142 #define ESCDOL 11
143 #define ESCPAR 12
144 #define EUC 13
145 #define BOL 14
146 #define FL_QLEV 15
147 #define FL_STF 16
148 #define FL_SIG 17
149 #define STOP_DECODING 18
150 #define SPACECR 19
151
152
153
154 /*
155 * Macros to reduce function call overhead associated with calling
156 * each filter for each byte filtered, and to minimize filter structure
157 * dereferences. NOTE: "queuein" has to do with putting chars into the
158 * filter structs data queue. So, writing at the queuein offset is
159 * what a filter does to pass processed data out of itself. Ditto for
160 * queueout. This explains the FI --> queueout init stuff below.
161 */
162 #define GF_QUE_START(F) (&(F)->queue[0])
163 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
164
165 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
168 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
171
172 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
173 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
175
176 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
177 unsigned char *GF_EOB_INIT(FI); \
178 unsigned char *GF_IP_INIT(FO); \
179 unsigned char *GF_EIB_INIT(FO);
180
181 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
182 (F)->queueout = (F)->queuein = 0)
183
184 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
185
186 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
190
191 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
193
194 /*
195 * Introducing the *_GLO macros for use in splitting the big macros out
196 * into functions (wrap_flush, wrap_eol). The reason we need a
197 * separate macro is because of the vars ip, eib, op, and eob, which are
198 * set up locally in a call to GF_INIT. To preserve these variables
199 * in the new functions, we now pass pointers to these four vars. Each
200 * of these new functions expects the presence of pointer vars
201 * ipp, eibp, opp, and eobp.
202 */
203
204 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
205
206 #define GF_COLOR_PUTC(F, C) { \
207 char *p; \
208 char cb[RGBLEN+1]; \
209 GF_PUTC_GLO((F)->next, TAG_EMBED); \
210 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
211 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212 cb[sizeof(cb)-1] = '\0'; \
213 p = cb; \
214 for(; *p; p++) \
215 GF_PUTC_GLO((F)->next, *p); \
216 GF_PUTC_GLO((F)->next, TAG_EMBED); \
217 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
218 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219 cb[sizeof(cb)-1] = '\0'; \
220 p = cb; \
221 for(; *p; p++) \
222 GF_PUTC_GLO((F)->next, *p); \
223 }
224
225 /*
226 * Generalized getc and putc routines. provided here so they don't
227 * need to be re-done elsewhere to
228 */
229
230 /*
231 * pointers to objects to be used by the generic getc and putc
232 * functions
233 */
234 static struct gf_io_struct {
235 FILE *file;
236 PIPE_S *pipe;
237 char *txtp;
238 unsigned long n;
239 int flags;
240 CBUF_S cb;
241 } gf_in, gf_out;
242
243 #define GF_SO_STACK struct gf_so_stack
244 static GF_SO_STACK {
245 STORE_S *so;
246 GF_SO_STACK *next;
247 } *gf_so_in, *gf_so_out;
248
249
250
251 /*
252 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
253 *
254 * The purpose of this routine is so that we can avoid setting SIGALARM
255 * when writing into a PicoText object, because that type of object uses
256 * unprotected malloc/free/realloc, which can't be interrupted.
257 */
258 int
pc_is_picotext(gf_io_t pc)259 pc_is_picotext(gf_io_t pc)
260 {
261 return(pc == gf_so_writec && gf_so_out && gf_so_out->so &&
262 gf_so_out->so->src == ExternalText);
263 }
264
265
266
267 /*
268 * setup to use and return a pointer to the generic
269 * getc function
270 */
271 void
gf_set_readc(gf_io_t * gc,void * txt,long unsigned int len,SourceType src,int flags)272 gf_set_readc(gf_io_t *gc, void *txt, long unsigned int len, SourceType src, int flags)
273 {
274 gf_in.n = len;
275 gf_in.flags = flags;
276 gf_in.cb.cbuf[0] = '\0';
277 gf_in.cb.cbufp = gf_in.cb.cbuf;
278 gf_in.cb.cbufend = gf_in.cb.cbuf;
279
280 if(src == FileStar){
281 gf_in.file = (FILE *)txt;
282 fseek(gf_in.file, 0L, 0);
283 #ifdef _WINDOWS
284 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_windows
285 : gf_freadc;
286 #else /* UNIX */
287 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_locale
288 : gf_freadc;
289 #endif /* UNIX */
290 }
291 else if(src == PipeStar){
292 gf_in.pipe = (PIPE_S *)txt;
293 *gc = gf_preadc;
294 *gc = (flags & READ_FROM_LOCALE) ? gf_preadc_locale
295 : gf_preadc;
296 }
297 else{
298 gf_in.txtp = (char *)txt;
299 *gc = (flags & READ_FROM_LOCALE) ? gf_sreadc_locale
300 : gf_sreadc;
301 }
302 }
303
304
305 /*
306 * setup to use and return a pointer to the generic
307 * putc function
308 */
309 void
gf_set_writec(gf_io_t * pc,void * txt,long unsigned int len,SourceType src,int flags)310 gf_set_writec(gf_io_t *pc, void *txt, long unsigned int len, SourceType src, int flags)
311 {
312 gf_out.n = len;
313 gf_out.flags = flags;
314 gf_out.cb.cbuf[0] = '\0';
315 gf_out.cb.cbufp = gf_out.cb.cbuf;
316 gf_out.cb.cbufend = gf_out.cb.cbuf;
317
318 if(src == FileStar){
319 gf_out.file = (FILE *)txt;
320 #ifdef _WINDOWS
321 *pc = gf_fwritec;
322 #else /* UNIX */
323 *pc = (flags & WRITE_TO_LOCALE) ? gf_fwritec_locale
324 : gf_fwritec;
325 #endif /* UNIX */
326 }
327 else if(src == PipeStar){
328 gf_out.pipe = (PIPE_S *)txt;
329 *pc = (flags & WRITE_TO_LOCALE) ? gf_pwritec_locale
330 : gf_pwritec;
331 }
332 else{
333 gf_out.txtp = (char *)txt;
334 *pc = (flags & WRITE_TO_LOCALE) ? gf_swritec_locale
335 : gf_swritec;
336 }
337 }
338
339
340 /*
341 * setup to use and return a pointer to the generic
342 * getc function
343 */
344 void
gf_set_so_readc(gf_io_t * gc,STORE_S * so)345 gf_set_so_readc(gf_io_t *gc, STORE_S *so)
346 {
347 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
348
349 sp->so = so;
350 sp->next = gf_so_in;
351 gf_so_in = sp;
352 *gc = gf_so_readc;
353 }
354
355
356 void
gf_clear_so_readc(STORE_S * so)357 gf_clear_so_readc(STORE_S *so)
358 {
359 GF_SO_STACK *sp;
360
361 if((sp = gf_so_in) != NULL){
362 if(so == sp->so){
363 gf_so_in = gf_so_in->next;
364 fs_give((void **) &sp);
365 }
366 else
367 alpine_panic("Programmer botch: Can't unstack store readc");
368 }
369 else
370 alpine_panic("Programmer botch: NULL store clearing store readc");
371 }
372
373
374 /*
375 * setup to use and return a pointer to the generic
376 * putc function
377 */
378 void
gf_set_so_writec(gf_io_t * pc,STORE_S * so)379 gf_set_so_writec(gf_io_t *pc, STORE_S *so)
380 {
381 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
382
383 sp->so = so;
384 sp->next = gf_so_out;
385 gf_so_out = sp;
386 *pc = gf_so_writec;
387 }
388
389
390 void
gf_clear_so_writec(STORE_S * so)391 gf_clear_so_writec(STORE_S *so)
392 {
393 GF_SO_STACK *sp;
394
395 if((sp = gf_so_out) != NULL){
396 if(so == sp->so){
397 gf_so_out = gf_so_out->next;
398 fs_give((void **) &sp);
399 }
400 else
401 alpine_panic("Programmer botch: Can't unstack store writec");
402 }
403 else
404 alpine_panic("Programmer botch: NULL store clearing store writec");
405 }
406
407
408 /*
409 * put the character to the object previously defined
410 */
411 int
gf_so_writec(int c)412 gf_so_writec(int c)
413 {
414 return(so_writec(c, gf_so_out->so));
415 }
416
417
418 /*
419 * get a character from an object previously defined
420 */
421 int
gf_so_readc(unsigned char * c)422 gf_so_readc(unsigned char *c)
423 {
424 return(so_readc(c, gf_so_in->so));
425 }
426
427
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
430 int
gf_freadc(unsigned char * c)431 gf_freadc(unsigned char *c)
432 {
433 int rv = 0;
434
435 do {
436 errno = 0;
437 clearerr(gf_in.file);
438 rv = fread(c, sizeof(unsigned char), (size_t)1, gf_in.file);
439 } while(!rv && ferror(gf_in.file) && errno == EINTR);
440
441 return(rv);
442 }
443
444
445 int
gf_freadc_locale(unsigned char * c)446 gf_freadc_locale(unsigned char *c)
447 {
448 return(generic_readc_locale(c, gf_freadc_getchar, (void *) gf_in.file, &gf_in.cb));
449 }
450
451
452 /*
453 * This is just to make it work with generic_readc_locale.
454 */
455 int
gf_freadc_getchar(unsigned char * c,void * extraarg)456 gf_freadc_getchar(unsigned char *c, void *extraarg)
457 {
458 FILE *file;
459 int rv = 0;
460
461 file = (FILE *) extraarg;
462
463 do {
464 errno = 0;
465 clearerr(file);
466 rv = fread(c, sizeof(unsigned char), (size_t)1, file);
467 } while(!rv && ferror(file) && errno == EINTR);
468
469 return(rv);
470 }
471
472
473 /*
474 * Put a character to a file.
475 * Assumes gf_out struct is filled in.
476 * Returns 1 on success, <= 0 on failure.
477 */
478 int
gf_fwritec(int c)479 gf_fwritec(int c)
480 {
481 unsigned char ch = (unsigned char)c;
482 int rv = 0;
483
484 do
485 rv = fwrite(&ch, sizeof(unsigned char), (size_t)1, gf_out.file);
486 while(!rv && ferror(gf_out.file) && errno == EINTR);
487
488 return(rv);
489 }
490
491
492 /*
493 * The locale version converts from UTF-8 to user's locale charset
494 * before writing the characters.
495 */
496 int
gf_fwritec_locale(int c)497 gf_fwritec_locale(int c)
498 {
499 int rv = 1;
500 int i, outchars;
501 unsigned char obuf[MAX(MB_LEN_MAX,32)];
502
503 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
504 for(i = 0; i < outchars; i++)
505 if(gf_fwritec(obuf[i]) != 1){
506 rv = 0;
507 break;
508 }
509 }
510
511 return(rv);
512 }
513
514
515 #ifdef _WINDOWS
516 /*
517 * Read unicode characters from windows filesystem and return
518 * them as a stream of UTF-8 characters. The stream is assumed
519 * opened so that it will know how to put together the unicode.
520 *
521 * (This is totally untested, copied loosely from so_file_readc_windows
522 * which may or may not be appropriate.)
523 */
524 int
gf_freadc_windows(unsigned char * c)525 gf_freadc_windows(unsigned char *c)
526 {
527 int rv = 0;
528 UCS ucs;
529
530 /* already got some from previous call? */
531 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
532 *c = *gf_in.cb.cbufp;
533 gf_in.cb.cbufp++;
534 rv++;
535 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
536 gf_in.cb.cbufend = gf_in.cb.cbuf;
537 gf_in.cb.cbufp = gf_in.cb.cbuf;
538 }
539
540 return(rv);
541 }
542
543 if(gf_in.file){
544 /* windows only so second arg is ignored */
545 ucs = read_a_wide_char(gf_in.file, NULL);
546 rv = (ucs == CCONV_EOF) ? 0 : 1;
547 }
548
549 if(rv){
550 /*
551 * Now we need to convert the UCS character to UTF-8
552 * and dole out the UTF-8 one char at a time.
553 */
554 gf_in.cb.cbufend = utf8_put(gf_in.cb.cbuf, (unsigned long) ucs);
555 gf_in.cb.cbufp = gf_in.cb.cbuf;
556 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
557 *c = *gf_in.cb.cbufp;
558 gf_in.cb.cbufp++;
559 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
560 gf_in.cb.cbufend = gf_in.cb.cbuf;
561 gf_in.cb.cbufp = gf_in.cb.cbuf;
562 }
563 }
564 else
565 *c = '?';
566 }
567
568 return(rv);
569 }
570 #endif /* _WINDOWS */
571
572
573 int
gf_preadc(unsigned char * c)574 gf_preadc(unsigned char *c)
575 {
576 return(pipe_readc(c, gf_in.pipe));
577 }
578
579
580 int
gf_preadc_locale(unsigned char * c)581 gf_preadc_locale(unsigned char *c)
582 {
583 return(generic_readc_locale(c, gf_preadc_getchar, (void *) gf_in.pipe, &gf_in.cb));
584 }
585
586
587 /*
588 * This is just to make it work with generic_readc_locale.
589 */
590 int
gf_preadc_getchar(unsigned char * c,void * extraarg)591 gf_preadc_getchar(unsigned char *c, void *extraarg)
592 {
593 PIPE_S *pipe;
594
595 pipe = (PIPE_S *) extraarg;
596
597 return(pipe_readc(c, pipe));
598 }
599
600
601 /*
602 * Put a character to a pipe.
603 * Assumes gf_out struct is filled in.
604 * Returns 1 on success, <= 0 on failure.
605 */
606 int
gf_pwritec(int c)607 gf_pwritec(int c)
608 {
609 return(pipe_writec(c, gf_out.pipe));
610 }
611
612
613 /*
614 * The locale version converts from UTF-8 to user's locale charset
615 * before writing the characters.
616 */
617 int
gf_pwritec_locale(int c)618 gf_pwritec_locale(int c)
619 {
620 int rv = 1;
621 int i, outchars;
622 unsigned char obuf[MAX(MB_LEN_MAX,32)];
623
624 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
625 for(i = 0; i < outchars; i++)
626 if(gf_pwritec(obuf[i]) != 1){
627 rv = 0;
628 break;
629 }
630 }
631
632 return(rv);
633 }
634
635
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
638 int
gf_sreadc(unsigned char * c)639 gf_sreadc(unsigned char *c)
640 {
641 return((gf_in.n) ? *c = *(gf_in.txtp)++, gf_in.n-- : 0);
642 }
643
644
645 int
gf_sreadc_locale(unsigned char * c)646 gf_sreadc_locale(unsigned char *c)
647 {
648 return(generic_readc_locale(c, gf_sreadc_getchar, NULL, &gf_in.cb));
649 }
650
651
652 int
gf_sreadc_getchar(unsigned char * c,void * extraarg)653 gf_sreadc_getchar(unsigned char *c, void *extraarg)
654 {
655 /*
656 * extraarg is ignored and gf_sreadc just uses globals instead.
657 * That's ok as long as we don't call it more than once at a time.
658 */
659 return(gf_sreadc(c));
660 }
661
662
663 /*
664 * Put a character to a string.
665 * Assumes gf_out struct is filled in.
666 * Returns 1 on success, <= 0 on failure.
667 */
668 int
gf_swritec(int c)669 gf_swritec(int c)
670 {
671 return((gf_out.n) ? *(gf_out.txtp)++ = c, gf_out.n-- : 0);
672 }
673
674
675 /*
676 * The locale version converts from UTF-8 to user's locale charset
677 * before writing the characters.
678 */
679 int
gf_swritec_locale(int c)680 gf_swritec_locale(int c)
681 {
682 int rv = 1;
683 int i, outchars;
684 unsigned char obuf[MAX(MB_LEN_MAX,32)];
685
686 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
687 for(i = 0; i < outchars; i++)
688 if(gf_swritec(obuf[i]) != 1){
689 rv = 0;
690 break;
691 }
692 }
693
694 return(rv);
695 }
696
697
698 /*
699 * output the given string with the given function
700 */
701 int
gf_puts(register char * s,gf_io_t pc)702 gf_puts(register char *s, gf_io_t pc)
703 {
704 while(*s != '\0')
705 if(!(*pc)((unsigned char)*s++))
706 return(0); /* ERROR putting char ! */
707
708 return(1);
709 }
710
711
712 /*
713 * output the given string with the given function
714 */
715 int
gf_nputs(register char * s,long int n,gf_io_t pc)716 gf_nputs(register char *s, long int n, gf_io_t pc)
717 {
718 while(n--)
719 if(!(*pc)((unsigned char)*s++))
720 return(0); /* ERROR putting char ! */
721
722 return(1);
723 }
724
725
726 /*
727 * Read a stream of multi-byte characters from the
728 * user's locale charset and return a stream of
729 * UTF-8 characters, one at a time. The input characters
730 * are obtained by using the get_a_char function.
731 *
732 * Args c -- the returned octet
733 * get_a_char -- function to get a single octet of the multibyte
734 * character. The first arg of that function is the
735 * returned value and the second arg is for the
736 * functions use. The second arg is replaced with
737 * extraarg when it is called.
738 * extraarg -- The second arg to get_a_char.
739 * cb -- Storage area for state between calls to this func.
740 */
741 int
generic_readc_locale(unsigned char * c,int (* get_a_char)(unsigned char *,void *),void * extraarg,CBUF_S * cb)742 generic_readc_locale(unsigned char *c,
743 int (*get_a_char)(unsigned char *, void *),
744 void *extraarg,
745 CBUF_S *cb)
746 {
747 unsigned long octets_so_far = 0, remaining_octets;
748 unsigned char *inputp;
749 unsigned char ch;
750 UCS ucs;
751 unsigned char inputbuf[20];
752 int rv = 0;
753 int got_one = 0;
754
755 /* already got some from previous call? */
756 if(cb->cbufend > cb->cbuf){
757 *c = *cb->cbufp;
758 cb->cbufp++;
759 rv++;
760 if(cb->cbufp >= cb->cbufend){
761 cb->cbufend = cb->cbuf;
762 cb->cbufp = cb->cbuf;
763 }
764
765 return(rv);
766 }
767
768 memset(inputbuf, 0, sizeof(inputbuf));
769 if((*get_a_char)(&ch, extraarg) == 0)
770 return(0);
771
772 inputbuf[octets_so_far++] = ch;
773
774 while(!got_one){
775 remaining_octets = octets_so_far;
776 inputp = inputbuf;
777 ucs = mbtow(ps_global->input_cs, &inputp, &remaining_octets);
778 switch(ucs){
779 case CCONV_BADCHAR:
780 return(rv);
781
782 case CCONV_NEEDMORE:
783 /*
784 * Do we need to do something with the characters we've
785 * collected that don't form a valid UCS character?
786 * Probably need to try discarding them one at a time
787 * from the front instead of just throwing them all out.
788 */
789 if(octets_so_far >= sizeof(inputbuf))
790 return(rv);
791
792 if((*get_a_char)(&ch, extraarg) == 0)
793 return(rv);
794
795 inputbuf[octets_so_far++] = ch;
796 break;
797
798 default:
799 /* got a good UCS-4 character */
800 got_one++;
801 break;
802 }
803 }
804
805 /*
806 * Now we need to convert the UCS character to UTF-8
807 * and dole out the UTF-8 one char at a time.
808 */
809 rv++;
810 cb->cbufend = utf8_put(cb->cbuf, (unsigned long) ucs);
811 cb->cbufp = cb->cbuf;
812 if(cb->cbufend > cb->cbuf){
813 *c = *cb->cbufp;
814 cb->cbufp++;
815 if(cb->cbufp >= cb->cbufend){
816 cb->cbufend = cb->cbuf;
817 cb->cbufp = cb->cbuf;
818 }
819 }
820 else
821 *c = '?';
822
823 return(rv);
824 }
825
826
827 /*
828 * Start of generalized filter routines
829 */
830
831 /*
832 * initializing function to make sure list of filters is empty.
833 */
834 void
gf_filter_init(void)835 gf_filter_init(void)
836 {
837 FILTER_S *flt, *fltn = gf_master;
838
839 while((flt = fltn) != NULL){ /* free list of old filters */
840 fltn = flt->next;
841 fs_give((void **)&flt);
842 }
843
844 gf_master = NULL;
845 gf_error_string = NULL; /* clear previous errors */
846 gf_byte_count = 0L; /* reset counter */
847 }
848
849
850
851 /*
852 * link the given filter into the filter chain
853 */
854 void
gf_link_filter(filter_t f,void * data)855 gf_link_filter(filter_t f, void *data)
856 {
857 FILTER_S *new, *tail;
858
859 #ifdef CRLF_NEWLINES
860 /*
861 * If the system's native EOL convention is CRLF, then there's no
862 * point in passing data thru a filter that's not doing anything
863 */
864 if(f == gf_nvtnl_local || f == gf_local_nvtnl)
865 return;
866 #endif
867
868 new = (FILTER_S *)fs_get(sizeof(FILTER_S));
869 memset(new, 0, sizeof(FILTER_S));
870
871 new->f = f; /* set the function pointer */
872 new->opt = data; /* set any optional parameter data */
873 (*f)(new, GF_RESET); /* have it setup initial state */
874
875 if((tail = gf_master) != NULL){ /* or add it to end of existing */
876 while(tail->next) /* list */
877 tail = tail->next;
878
879 tail->next = new;
880 }
881 else /* attach new struct to list */
882 gf_master = new; /* start a new list */
883 }
884
885
886 /*
887 * terminal filter, doesn't call any other filters, typically just does
888 * something with the output
889 */
890 void
gf_terminal(FILTER_S * f,int flg)891 gf_terminal(FILTER_S *f, int flg)
892 {
893 if(flg == GF_DATA){
894 GF_INIT(f, f);
895
896 while(op < eob)
897 if((*last_filter)(*op++) <= 0) /* generic terminal filter */
898 gf_error(errno ? error_description(errno) : "Error writing pipe");
899
900 GF_CH_RESET(f);
901 }
902 else if(flg == GF_RESET)
903 errno = 0; /* prepare for problems */
904 }
905
906
907 /*
908 * set some outside gf_io_t function to the terminal function
909 * for example: a function to write a char to a file or into a buffer
910 */
911 void
gf_set_terminal(gf_io_t f)912 gf_set_terminal(gf_io_t f) /* function to set generic filter */
913
914 {
915 last_filter = f;
916 }
917
918
919 /*
920 * common function for filter's to make it known that an error
921 * has occurred. Jumps back to gf_pipe with error message.
922 */
923 void
gf_error(char * s)924 gf_error(char *s)
925 {
926 /* let the user know the error passed in s */
927 gf_error_string = s;
928 longjmp(gf_error_state, 1);
929 }
930
931
932 /*
933 * The routine that shoves each byte through the chain of
934 * filters. It sets up error handling, and the terminal function.
935 * Then loops getting bytes with the given function, and passing
936 * it on to the first filter in the chain.
937 */
938 char *
gf_pipe(gf_io_t gc,gf_io_t pc)939 gf_pipe(gf_io_t gc, gf_io_t pc)
940 /* how to get a character */
941 {
942 unsigned char c;
943
944 dprint((4, "-- gf_pipe: "));
945
946 /*
947 * set up for any errors a filter may encounter
948 */
949 if(setjmp(gf_error_state)){
950 dprint((4, "ERROR: %s\n",
951 gf_error_string ? gf_error_string : "NULL"));
952 return(gf_error_string); /* */
953 }
954
955 /*
956 * set and link in the terminal filter
957 */
958 gf_set_terminal(pc);
959 gf_link_filter(gf_terminal, NULL);
960
961 /*
962 * while there are chars to process, send them thru the pipe.
963 * NOTE: it's necessary to enclose the loop below in a block
964 * as the GF_INIT macro calls some automatic var's into
965 * existence. It can't be placed at the start of gf_pipe
966 * because its useful for us to be called without filters loaded
967 * when we're just being used to copy bytes between storage
968 * objects.
969 */
970 {
971 GF_INIT(gf_master, gf_master);
972
973 while((*gc)(&c)){
974 gf_byte_count++;
975
976 #ifdef _WINDOWS
977 if(!(gf_byte_count & 0x3ff))
978 /* Under windows we yield to allow event processing.
979 * Progress display is handled through the alarm()
980 * mechanism.
981 */
982 mswin_yield ();
983 #endif
984
985 GF_PUTC(gf_master, c & 0xff);
986 }
987
988 /*
989 * toss an end-of-data marker down the pipe to give filters
990 * that have any buffered data the opportunity to dump it
991 */
992 (void) GF_FLUSH(gf_master);
993 (*gf_master->f)(gf_master, GF_EOD);
994 }
995
996 dprint((4, "done.\n"));
997 return(NULL); /* everything went OK */
998 }
999
1000
1001 /*
1002 * return the number of bytes piped so far
1003 */
1004 long
gf_bytes_piped(void)1005 gf_bytes_piped(void)
1006 {
1007 return(gf_byte_count);
1008 }
1009
1010
1011 /*
1012 * filter the given input with the given command
1013 *
1014 * Args: cmd -- command string to execute
1015 * prepend -- string to prepend to filtered input
1016 * source_so -- storage object containing data to be filtered
1017 * pc -- function to write filtered output with
1018 * aux_filters -- additional filters to pass data thru after "cmd"
1019 *
1020 * Returns: NULL on success, reason for failure (not alloc'd!) on error
1021 */
1022 char *
gf_filter(char * cmd,char * prepend,STORE_S * source_so,gf_io_t pc,FILTLIST_S * aux_filters,int silent,int disable_reset,void (* pipecb_f)(PIPE_S *,int,void *))1023 gf_filter(char *cmd, char *prepend, STORE_S *source_so, gf_io_t pc,
1024 FILTLIST_S *aux_filters, int silent, int disable_reset,
1025 void (*pipecb_f)(PIPE_S *, int, void *))
1026 {
1027 unsigned char c, obuf[MAX(MB_LEN_MAX,32)];
1028 int flags, outchars, i;
1029 char *errstr = NULL, buf[MAILTMPLEN];
1030 PIPE_S *fpipe;
1031 CBUF_S cb;
1032 #ifdef NON_BLOCKING_IO
1033 int n;
1034 #endif
1035
1036 dprint((4, "so_filter: \"%s\"\n", cmd ? cmd : "?"));
1037
1038 gf_filter_init();
1039
1040 /*
1041 * After coming back from user's pipe command we need to convert
1042 * the output from the pipe back to UTF-8.
1043 */
1044 if(ps_global->keyboard_charmap && strucmp("UTF-8", ps_global->keyboard_charmap))
1045 gf_link_filter(gf_utf8, gf_utf8_opt(ps_global->keyboard_charmap));
1046
1047 for( ; aux_filters && aux_filters->filter; aux_filters++)
1048 gf_link_filter(aux_filters->filter, aux_filters->data);
1049
1050 gf_set_terminal(pc);
1051 gf_link_filter(gf_terminal, NULL);
1052
1053 cb.cbuf[0] = '\0';
1054 cb.cbufp = cb.cbuf;
1055 cb.cbufend = cb.cbuf;
1056
1057 /*
1058 * Spawn filter feeding it data, and reading what it writes.
1059 */
1060 so_seek(source_so, 0L, 0);
1061 flags = PIPE_WRITE | PIPE_READ | PIPE_NOSHELL
1062 | (silent ? PIPE_SILENT : 0)
1063 | (!disable_reset ? PIPE_RESET : 0);
1064
1065 if((fpipe = open_system_pipe(cmd, NULL, NULL, flags, 0, pipecb_f, pipe_report_error)) != NULL){
1066
1067 #ifdef NON_BLOCKING_IO
1068
1069 if(fcntl(fileno(fpipe->in.f), F_SETFL, NON_BLOCKING_IO) == -1)
1070 errstr = "Can't set up non-blocking IO";
1071
1072 if(prepend && (fputs(prepend, fpipe->out.f) == EOF
1073 || fputc('\n', fpipe->out.f) == EOF))
1074 errstr = error_description(errno);
1075
1076 while(!errstr){
1077 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 * isn't ubiquitous ;).
1079 */
1080 for(n = 0; !errstr && fpipe->out.f && n < 1024; n++)
1081 if(!so_readc(&c, source_so)){
1082 fclose(fpipe->out.f);
1083 fpipe->out.f = NULL;
1084 }
1085 else{
1086 /*
1087 * Got a UTF-8 character from source_so.
1088 * We need to convert it to the user's locale charset
1089 * and then send the result to the pipe.
1090 */
1091 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1092 for(i = 0; i < outchars && !errstr; i++)
1093 if(fputc(obuf[i], fpipe->out.f) == EOF)
1094 errstr = error_description(errno);
1095 }
1096
1097 /*
1098 * Note: We clear errno here and test below, before ferror,
1099 * because *some* stdio implementations consider
1100 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1101 */
1102 errno = 0;
1103 clearerr(fpipe->in.f); /* fix from <cananian@cananian.mit.edu> */
1104
1105 while(!errstr && fgets(buf, sizeof(buf), fpipe->in.f))
1106 errstr = gf_filter_puts(buf);
1107
1108 /* then fgets failed! */
1109 if(!errstr && !(errno == EAGAIN || errno == EWOULDBLOCK)){
1110 if(feof(fpipe->in.f)) /* nothing else interesting! */
1111 break;
1112 else if(ferror(fpipe->in.f)) /* bummer. */
1113 errstr = error_description(errno);
1114 }
1115 else if(errno == EAGAIN || errno == EWOULDBLOCK)
1116 clearerr(fpipe->in.f);
1117 }
1118
1119 #else /* !NON_BLOCKING_IO */
1120
1121 if(prepend && (pipe_puts(prepend, fpipe) == EOF
1122 || pipe_putc('\n', fpipe) == EOF))
1123 errstr = error_description(errno);
1124
1125 /*
1126 * Well, do the best we can, and hope the pipe we're writing
1127 * doesn't fill up before we start reading...
1128 */
1129 while(!errstr && so_readc(&c, source_so))
1130 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1131 for(i = 0; i < outchars && !errstr; i++)
1132 if(pipe_putc(obuf[i], fpipe) == EOF)
1133 errstr = error_description(errno);
1134
1135 if(pipe_close_write(fpipe))
1136 errstr = _("Pipe command returned error.");
1137
1138 while(!errstr && pipe_gets(buf, sizeof(buf), fpipe))
1139 errstr = gf_filter_puts(buf);
1140
1141 #endif /* !NON_BLOCKING_IO */
1142
1143 if(close_system_pipe(&fpipe, NULL, pipecb_f) && !errstr)
1144 errstr = _("Pipe command returned error.");
1145
1146 gf_filter_eod();
1147 }
1148 else
1149 errstr = _("Error setting up pipe command.");
1150
1151 return(errstr);
1152 }
1153
1154
1155 /*
1156 * gf_filter_puts - write the given string down the filter's pipe
1157 */
1158 char *
gf_filter_puts(register char * s)1159 gf_filter_puts(register char *s)
1160 {
1161 GF_INIT(gf_master, gf_master);
1162
1163 /*
1164 * set up for any errors a filter may encounter
1165 */
1166 if(setjmp(gf_error_state)){
1167 dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 gf_error_string ? gf_error_string : "NULL"));
1169 return(gf_error_string);
1170 }
1171
1172 while(*s)
1173 GF_PUTC(gf_master, (*s++) & 0xff);
1174
1175 GF_END(gf_master, gf_master);
1176 return(NULL);
1177 }
1178
1179
1180 /*
1181 * gf_filter_eod - flush pending data filter's input queue and deliver
1182 * the GF_EOD marker.
1183 */
1184 void
gf_filter_eod(void)1185 gf_filter_eod(void)
1186 {
1187 GF_INIT(gf_master, gf_master);
1188 (void) GF_FLUSH(gf_master);
1189 (*gf_master->f)(gf_master, GF_EOD);
1190 }
1191
1192
1193 /*
1194 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1195 *
1196 * Filters MUST use the specified interface (pointer to filter
1197 * structure, the unsigned character buffer in that struct, and a
1198 * cmd flag), and pass each resulting octet to the next filter in the
1199 * chain. Only the terminal filter need not call another filter.
1200 * As a result, filters share a pretty general structure.
1201 * Typically three main conditionals separate initialization from
1202 * data from end-of-data command processing.
1203 *
1204 * Lastly, being character-at-a-time, they're a little more complex
1205 * to write than filters operating on buffers because some state
1206 * must typically be kept between characters. However, for a
1207 * little bit of complexity here, much convenience is gained later
1208 * as they can be arbitrarily chained together at run time and
1209 * consume few resources (especially memory or disk) as they work.
1210 * (NOTE 951005: even less cpu now that data between filters is passed
1211 * via a vector.)
1212 *
1213 * A few notes about implementing filters:
1214 *
1215 * - A generic filter template looks like:
1216 *
1217 * void
1218 * gf_xxx_filter(f, flg)
1219 * FILTER_S *f;
1220 * int flg;
1221 * {
1222 * GF_INIT(f, f->next); // def's var's to speed queue drain
1223 *
1224 * if(flg == GF_DATA){
1225 * register unsigned char c;
1226 *
1227 * while(GF_GETC(f, c)){ // macro taking data off input queue
1228 * // operate on c and pass it on here
1229 * GF_PUTC(f->next, c); // macro writing output queue
1230 * }
1231 *
1232 * GF_END(f, f->next); // macro to sync pointers/offsets
1233 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1234 * }
1235 * else if(flg == GF_EOD){
1236 * // process any buffered data here and pass it on
1237 * GF_FLUSH(f->next); // flush pending data to next filter
1238 * (*f->next->f)(f->next, GF_EOD);
1239 * }
1240 * else if(flg == GF_RESET){
1241 * // initialize any data in the struct here
1242 * }
1243 * }
1244 *
1245 * - Any free storage allocated during initialization (typically tied
1246 * to the "line" pointer in FILTER_S) is the filter's responsibility
1247 * to clean up when the GF_EOD command comes through.
1248 *
1249 * - Filter's must pass GF_EOD they receive on to the next
1250 * filter in the chain so it has the opportunity to flush
1251 * any buffered data.
1252 *
1253 * - All filters expect NVT end-of-lines. The idea is to prepend
1254 * or append either the gf_local_nvtnl or gf_nvtnl_local
1255 * os-dependant filters to the data on the appropriate end of the
1256 * pipe for the task at hand.
1257 *
1258 * - NOTE: As of 951004, filters no longer take their input as a single
1259 * char argument, but rather get data to operate on via a vector
1260 * representing the input queue in the FILTER_S structure.
1261 *
1262 */
1263
1264
1265
1266 /*
1267 * BASE64 TO BINARY encoding and decoding routines below
1268 */
1269
1270
1271 /*
1272 * BINARY to BASE64 filter (encoding described in rfc1341)
1273 */
1274 void
gf_binary_b64(FILTER_S * f,int flg)1275 gf_binary_b64(FILTER_S *f, int flg)
1276 {
1277 static char *v =
1278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279 GF_INIT(f, f->next);
1280
1281 if(flg == GF_DATA){
1282 register unsigned char c;
1283 register unsigned char t = f->t;
1284 register long n = f->n;
1285
1286 while(GF_GETC(f, c)){
1287
1288 switch(n++){
1289 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1291 case 42: case 45:
1292 GF_PUTC(f->next, v[c >> 2]);
1293 /* byte 1: high 6 bits (1) */
1294 t = c << 4; /* remember high 2 bits for next */
1295 break;
1296
1297 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1299 case 43:
1300 GF_PUTC(f->next, v[(t|(c>>4)) & 0x3f]);
1301 t = c << 2;
1302 break;
1303
1304 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1306 case 44:
1307 GF_PUTC(f->next, v[(t|(c >> 6)) & 0x3f]);
1308 GF_PUTC(f->next, v[c & 0x3f]);
1309 break;
1310 }
1311
1312 if(n == 45){ /* start a new line? */
1313 GF_PUTC(f->next, '\015');
1314 GF_PUTC(f->next, '\012');
1315 n = 0L;
1316 }
1317 }
1318
1319 f->n = n;
1320 f->t = t;
1321 GF_END(f, f->next);
1322 }
1323 else if(flg == GF_EOD){ /* no more data */
1324 switch (f->n % 3) { /* handle trailing bytes */
1325 case 0: /* no trailing bytes */
1326 break;
1327
1328 case 1:
1329 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1330 GF_PUTC(f->next, '='); /* byte 3 */
1331 GF_PUTC(f->next, '='); /* byte 4 */
1332 break;
1333
1334 case 2:
1335 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1336 GF_PUTC(f->next, '='); /* byte 4 */
1337 break;
1338 }
1339
1340 /* end with CRLF */
1341 if(f->n){
1342 GF_PUTC(f->next, '\015');
1343 GF_PUTC(f->next, '\012');
1344 }
1345
1346 (void) GF_FLUSH(f->next);
1347 (*f->next->f)(f->next, GF_EOD);
1348 }
1349 else if(flg == GF_RESET){
1350 dprint((9, "-- gf_reset binary_b64\n"));
1351 f->n = 0L;
1352 }
1353 }
1354
1355
1356
1357 /*
1358 * BASE64 to BINARY filter (encoding described in rfc1341)
1359 */
1360 void
gf_b64_binary(FILTER_S * f,int flg)1361 gf_b64_binary(FILTER_S *f, int flg)
1362 {
1363 static char v[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371 GF_INIT(f, f->next);
1372
1373 if(flg == GF_DATA){
1374 register unsigned char c;
1375 register unsigned char t = f->t;
1376 register int n = (int) f->n;
1377 register int state = f->f1;
1378
1379 while(GF_GETC(f, c)){
1380
1381 if(state){
1382 state = 0;
1383 if (c != '=') {
1384 gf_error("Illegal '=' in base64 text");
1385 /* NO RETURN */
1386 }
1387 }
1388
1389 /* in range, and a valid value? */
1390 if((c & ~0x7f) || (c = v[c]) > 63){
1391 if(c == 64){
1392 switch (n++) { /* check quantum position */
1393 case 2:
1394 state++; /* expect an equal as next char */
1395 break;
1396
1397 case 3:
1398 n = 0L; /* restart quantum */
1399 break;
1400
1401 default: /* impossible quantum position */
1402 gf_error("Internal base64 decoder error");
1403 /* NO RETURN */
1404 }
1405 }
1406 }
1407 else{
1408 switch (n++) { /* install based on quantum position */
1409 case 0: /* byte 1: high 6 bits */
1410 t = c << 2;
1411 break;
1412
1413 case 1: /* byte 1: low 2 bits */
1414 GF_PUTC(f->next, (t|(c >> 4)));
1415 t = c << 4; /* byte 2: high 4 bits */
1416 break;
1417
1418 case 2: /* byte 2: low 4 bits */
1419 GF_PUTC(f->next, (t|(c >> 2)));
1420 t = c << 6; /* byte 3: high 2 bits */
1421 break;
1422
1423 case 3:
1424 GF_PUTC(f->next, t | c);
1425 n = 0L; /* reinitialize mechanism */
1426 break;
1427 }
1428 }
1429 }
1430
1431 f->f1 = state;
1432 f->t = t;
1433 f->n = n;
1434 GF_END(f, f->next);
1435 }
1436 else if(flg == GF_EOD){
1437 (void) GF_FLUSH(f->next);
1438 (*f->next->f)(f->next, GF_EOD);
1439 }
1440 else if(flg == GF_RESET){
1441 dprint((9, "-- gf_reset b64_binary\n"));
1442 f->n = 0L; /* quantum position */
1443 f->f1 = 0; /* state holder: equal seen? */
1444 }
1445 }
1446
1447
1448
1449
1450 /*
1451 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452 * encoding described in rfc1341
1453 */
1454
1455 #define GF_MAXLINE 80 /* good buffer size */
1456
1457 /*
1458 * default action for QUOTED-PRINTABLE to 8BIT decoder
1459 */
1460 #define GF_QP_DEFAULT(f, c) { \
1461 if((c) == ' '){ \
1462 state = WSPACE; \
1463 /* reset white space! */ \
1464 (f)->linep = (f)->line; \
1465 *((f)->linep)++ = ' '; \
1466 } \
1467 else if((c) == '='){ \
1468 state = EQUAL; \
1469 } \
1470 else \
1471 GF_PUTC((f)->next, (c)); \
1472 }
1473
1474
1475 /*
1476 * QUOTED-PRINTABLE to 8BIT filter
1477 */
1478 void
gf_qp_8bit(FILTER_S * f,int flg)1479 gf_qp_8bit(FILTER_S *f, int flg)
1480 {
1481
1482 GF_INIT(f, f->next);
1483
1484 if(flg == GF_DATA){
1485 register unsigned char c;
1486 register int state = f->f1;
1487
1488 while(GF_GETC(f, c)){
1489
1490 switch(state){
1491 case DFL : /* default case */
1492 default:
1493 GF_QP_DEFAULT(f, c);
1494 break;
1495
1496 case CCR : /* non-significant space */
1497 state = DFL;
1498 if(c == '\012')
1499 continue; /* go on to next char */
1500
1501 GF_QP_DEFAULT(f, c);
1502 break;
1503
1504 case EQUAL :
1505 if(c == '\015'){ /* "=\015" is a soft EOL */
1506 state = CCR;
1507 break;
1508 }
1509
1510 if(c == '='){ /* compatibility clause for old guys */
1511 GF_PUTC(f->next, '=');
1512 state = DFL;
1513 break;
1514 }
1515
1516 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1517 /*
1518 * First character after '=' not a hex digit.
1519 * This ain't right, but we're going to treat it as
1520 * plain old text instead of an '=' followed by hex.
1521 * In other words, they forgot to encode the '='.
1522 * Before 4.60 we just bailed with an error here, but now
1523 * we keep going as long as we are just displaying
1524 * the result (and not saving it or something).
1525 *
1526 * Wait! The users don't like that. They want to be able
1527 * to use it even if it might be wrong. So just plow
1528 * ahead even if displaying.
1529 *
1530 * Better have this be a constant string so that if we
1531 * get multiple instances of it in a single message we
1532 * can avoid the too many error messages problem. It
1533 * better be the same message as the one a few lines
1534 * below, as well.
1535 *
1536 * Turn off decoding after encountering such an error and
1537 * just dump the rest of the text as is.
1538 */
1539 state = STOP_DECODING;
1540 GF_PUTC(f->next, '=');
1541 GF_PUTC(f->next, c);
1542 q_status_message(SM_ORDER,3,3,
1543 _("Warning: Non-hexadecimal character in QP encoding!"));
1544
1545 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c, c));
1546 break;
1547 }
1548
1549 if (isdigit ((unsigned char)c))
1550 f->t = c - '0';
1551 else
1552 f->t = c - (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1553
1554 f->f2 = c; /* store character in case we have to
1555 back out in !isxdigit below */
1556
1557 state = HEX;
1558 break;
1559
1560 case HEX :
1561 state = DFL;
1562 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1563 state = STOP_DECODING;
1564 GF_PUTC(f->next, '=');
1565 GF_PUTC(f->next, f->f2);
1566 GF_PUTC(f->next, c);
1567 q_status_message(SM_ORDER,3,3,
1568 _("Warning: Non-hexadecimal character in QP encoding!"));
1569
1570 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c, c, f->f2));
1571 break;
1572 }
1573
1574 if (isdigit((unsigned char)c))
1575 c -= '0';
1576 else
1577 c -= (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1578
1579 GF_PUTC(f->next, c + (f->t << 4));
1580 break;
1581
1582 case WSPACE :
1583 if(c == ' '){ /* toss it in with other spaces */
1584 if(f->linep - f->line < GF_MAXLINE)
1585 *(f->linep)++ = ' ';
1586 break;
1587 }
1588
1589 state = DFL;
1590 if(c == '\015'){ /* not our white space! */
1591 f->linep = f->line; /* reset buffer */
1592 GF_PUTC(f->next, '\015');
1593 break;
1594 }
1595
1596 /* the spaces are ours, write 'em */
1597 f->n = f->linep - f->line;
1598 while((f->n)--)
1599 GF_PUTC(f->next, ' ');
1600
1601 GF_QP_DEFAULT(f, c); /* take care of 'c' in default way */
1602 break;
1603
1604 case STOP_DECODING :
1605 GF_PUTC(f->next, c);
1606 break;
1607 }
1608 }
1609
1610 f->f1 = state;
1611 GF_END(f, f->next);
1612 }
1613 else if(flg == GF_EOD){
1614 fs_give((void **)&(f->line));
1615 (void) GF_FLUSH(f->next);
1616 (*f->next->f)(f->next, GF_EOD);
1617 }
1618 else if(flg == GF_RESET){
1619 dprint((9, "-- gf_reset qp_8bit\n"));
1620 f->f1 = DFL;
1621 f->linep = f->line = (char *)fs_get(GF_MAXLINE * sizeof(char));
1622 }
1623 }
1624
1625
1626
1627 /*
1628 * USEFUL MACROS TO HELP WITH QP ENCODING
1629 */
1630
1631 #define QP_MAXL 75 /* 76th place only for continuation */
1632
1633 /*
1634 * Macro to test and wrap long quoted printable lines
1635 */
1636 #define GF_8BIT_WRAP(f) { \
1637 GF_PUTC((f)->next, '='); \
1638 GF_PUTC((f)->next, '\015'); \
1639 GF_PUTC((f)->next, '\012'); \
1640 }
1641
1642 /*
1643 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644 * line break if needed.
1645 */
1646 #define GF_8BIT_PUT_QUOTE(f, c) { \
1647 if(((f)->n += 3) > QP_MAXL){ \
1648 GF_8BIT_WRAP(f); \
1649 (f)->n = 3; /* set line count */ \
1650 } \
1651 GF_PUTC((f)->next, '='); \
1652 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1654 }
1655
1656 /*
1657 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1658 * if needed.
1659 */
1660 #define GF_8BIT_PUT(f, c) { \
1661 if((++(f->n)) > QP_MAXL){ \
1662 GF_8BIT_WRAP(f); \
1663 f->n = 1L; \
1664 } \
1665 if(f->n == 1L && c == '.'){ \
1666 GF_8BIT_PUT_QUOTE(f, c); \
1667 f->n = 3; \
1668 } \
1669 else \
1670 GF_PUTC(f->next, c); \
1671 }
1672
1673
1674 /*
1675 * default action for 8bit to quoted printable encoder
1676 */
1677 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1678 state = WSPACE; \
1679 } \
1680 else if(c == '\015'){ \
1681 state = CCR; \
1682 } \
1683 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 || (c & 0x80) || (c == '=')){ \
1685 GF_8BIT_PUT_QUOTE(f, c); \
1686 } \
1687 else{ \
1688 GF_8BIT_PUT(f, c); \
1689 }
1690
1691
1692 /*
1693 * 8BIT to QUOTED-PRINTABLE filter
1694 */
1695 void
gf_8bit_qp(FILTER_S * f,int flg)1696 gf_8bit_qp(FILTER_S *f, int flg)
1697 {
1698 short dummy_dots = 0, dummy_dmap = 1;
1699 GF_INIT(f, f->next);
1700
1701 if(flg == GF_DATA){
1702 register unsigned char c;
1703 register int state = f->f1;
1704
1705 while(GF_GETC(f, c)){
1706
1707 /* keep track of "^JFrom " */
1708 Find_Froms(f->t, dummy_dots, f->f2, dummy_dmap, c);
1709
1710 switch(state){
1711 case DFL : /* handle ordinary case */
1712 GF_8BIT_DEFAULT(f, c);
1713 break;
1714
1715 case CCR : /* true line break? */
1716 state = DFL;
1717 if(c == '\012'){
1718 GF_PUTC(f->next, '\015');
1719 GF_PUTC(f->next, '\012');
1720 f->n = 0L;
1721 }
1722 else{ /* nope, quote the CR */
1723 GF_8BIT_PUT_QUOTE(f, '\015');
1724 GF_8BIT_DEFAULT(f, c); /* and don't forget about c! */
1725 }
1726 break;
1727
1728 case WSPACE:
1729 state = DFL;
1730 if(c == '\015' || f->t){ /* handle the space */
1731 GF_8BIT_PUT_QUOTE(f, ' ');
1732 f->t = 0; /* reset From flag */
1733 }
1734 else
1735 GF_8BIT_PUT(f, ' ');
1736
1737 GF_8BIT_DEFAULT(f, c); /* handle 'c' in the default way */
1738 break;
1739 }
1740 }
1741
1742 f->f1 = state;
1743 GF_END(f, f->next);
1744 }
1745 else if(flg == GF_EOD){
1746 switch(f->f1){
1747 case CCR :
1748 GF_8BIT_PUT_QUOTE(f, '\015'); /* write the last cr */
1749 break;
1750
1751 case WSPACE :
1752 GF_8BIT_PUT_QUOTE(f, ' '); /* write the last space */
1753 break;
1754 }
1755
1756 (void) GF_FLUSH(f->next);
1757 (*f->next->f)(f->next, GF_EOD);
1758 }
1759 else if(flg == GF_RESET){
1760 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 f->f1 = DFL; /* state from last character */
1762 f->f2 = 1; /* state of "^NFrom " bitmap */
1763 f->t = 0;
1764 f->n = 0L; /* number of chars in current line */
1765 }
1766 }
1767
1768 /*
1769 * This filter converts characters in one character set (the character
1770 * set of a message, for example) to another (the user's character set).
1771 */
1772 void
gf_convert_8bit_charset(FILTER_S * f,int flg)1773 gf_convert_8bit_charset(FILTER_S *f, int flg)
1774 {
1775 static unsigned char *conv_table = NULL;
1776 GF_INIT(f, f->next);
1777
1778 if(flg == GF_DATA){
1779 register unsigned char c;
1780
1781 while(GF_GETC(f, c)){
1782 GF_PUTC(f->next, conv_table ? conv_table[c] : c);
1783 }
1784
1785 GF_END(f, f->next);
1786 }
1787 else if(flg == GF_EOD){
1788 (void) GF_FLUSH(f->next);
1789 (*f->next->f)(f->next, GF_EOD);
1790 }
1791 else if(flg == GF_RESET){
1792 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 conv_table = (f->opt) ? (unsigned char *) (f->opt) : NULL;
1794
1795 }
1796 }
1797
1798
1799 typedef struct _utf8c_s {
1800 void *conv_table;
1801 int report_err;
1802 } UTF8C_S;
1803
1804
1805 /*
1806 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807 * Characters missing from the destination set, and invalid UTF-8 sequences,
1808 * will be converted to "?".
1809 */
1810 void
gf_convert_utf8_charset(FILTER_S * f,int flg)1811 gf_convert_utf8_charset(FILTER_S *f, int flg)
1812 {
1813 static unsigned short *conv_table = NULL;
1814 static int report_err = 0;
1815 register int more = f->f2;
1816 register long u = f->n;
1817
1818 /*
1819 * "more" is the number of subsequent octets needed to complete a character,
1820 * it is stored in f->f2.
1821 * "u" is the accumulated Unicode character, it is stored in f->n
1822 */
1823
1824 GF_INIT(f, f->next);
1825
1826 if(flg == GF_DATA){
1827 register unsigned char c;
1828
1829 while(GF_GETC(f, c)){
1830 if(!conv_table){ /* can't do much if no conversion table */
1831 GF_PUTC(f->next, c);
1832 }
1833 /* UTF-8 continuation? */
1834 else if((c > 0x7f) && (c < 0xc0)){
1835 if(more){
1836 u <<= 6; /* shift current value by 6 bits */
1837 u |= c & 0x3f;
1838 if (!--more){ /* last octet? */
1839 if(u >= 0xffff || (u = conv_table[u]) == NOCHAR){
1840 /*
1841 * non-BMP character or a UTF-8 character
1842 * which is not representable in the
1843 * charset we're converting to.
1844 */
1845 c = '?';
1846 if(report_err){
1847 if(f->opt)
1848 fs_give((void **) &f->opt);
1849
1850 /* TRANSLATORS: error while translating from one
1851 character set to another, for example from UTF-8
1852 to ISO-2022-JP or something like that. */
1853 gf_error(_("translation error"));
1854 /* NO RETURN */
1855 }
1856 }
1857 else{
1858 if(u > 0xff){
1859 c = (unsigned char) (u >> 8);
1860 GF_PUTC(f->next, c);
1861 }
1862
1863 c = (unsigned char) u & 0xff;
1864 }
1865
1866 GF_PUTC(f->next, c);
1867 }
1868 }
1869 else{ /* continuation when not in progress */
1870 GF_PUTC(f->next, '?');
1871 }
1872 }
1873 else{
1874 if(more){ /* incomplete UTF-8 character */
1875 GF_PUTC(f->next, '?');
1876 more = 0;
1877 }
1878 if(c < 0x80){ /* U+0000 - U+007f */
1879 GF_PUTC(f->next, c);
1880 }
1881 else if(c < 0xe0){ /* U+0080 - U+07ff */
1882 u = c & 0x1f; /* first 5 bits of 12 */
1883 more = 1;
1884 }
1885 else if(c < 0xf0){ /* U+1000 - U+ffff */
1886 u = c & 0x0f; /* first 4 bits of 16 */
1887 more = 2;
1888 }
1889 /* in case we ever support non-BMP Unicode */
1890 else if (c < 0xf8){ /* U+10000 - U+10ffff */
1891 u = c & 0x07; /* first 3 bits of 20.5 */
1892 more = 3;
1893 }
1894 #if 0 /* ISO 10646 not in Unicode */
1895 else if (c < 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 u = c & 0x03; /* first 2 bits of 26 */
1897 more = 4;
1898 }
1899 else if (c < 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 u = c & 0x03; /* first 2 bits of 26 */
1901 more = 5;
1902 }
1903 #endif
1904 else{ /* not in Unicode */
1905 GF_PUTC(f->next, '?');
1906 }
1907 }
1908 }
1909
1910 f->f2 = more;
1911 f->n = u;
1912 GF_END(f, f->next);
1913 }
1914 else if(flg == GF_EOD){
1915 (void) GF_FLUSH(f->next);
1916 if(f->opt)
1917 fs_give((void **) &f->opt);
1918 (*f->next->f)(f->next, GF_EOD);
1919 }
1920 else if(flg == GF_RESET){
1921 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1922 conv_table = ((UTF8C_S *) f->opt)->conv_table;
1923 report_err = ((UTF8C_S *) f->opt)->report_err;
1924 f->f2 = 0;
1925 f->n = 0L;
1926 }
1927 }
1928
1929
1930 void *
gf_convert_utf8_charset_opt(void * table,int report_err)1931 gf_convert_utf8_charset_opt(void *table, int report_err)
1932 {
1933 UTF8C_S *utf8c;
1934
1935 utf8c = (UTF8C_S *) fs_get(sizeof(UTF8C_S));
1936 utf8c->conv_table = table;
1937 utf8c->report_err = report_err;
1938 return((void *) utf8c);
1939 }
1940
1941
1942 /*
1943 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1944 *
1945 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1946 * or to Shift-JIS (if PC-Pine).
1947 */
1948 void
gf_2022_jp_to_euc(FILTER_S * f,int flg)1949 gf_2022_jp_to_euc(FILTER_S *f, int flg)
1950 {
1951 register unsigned char c;
1952 register int state = f->f1;
1953
1954 /*
1955 * f->t lit means we're in middle of decoding a sequence of characters.
1956 * f->f2 keeps track of first character of pair for Shift-JIS.
1957 * f->f1 is the state.
1958 */
1959
1960 GF_INIT(f, f->next);
1961
1962 if(flg == GF_DATA){
1963 while(GF_GETC(f, c)){
1964 switch(state){
1965 case ESC: /* saw ESC */
1966 if(!f->t && c == '$')
1967 state = ESCDOL;
1968 else if(f->t && c == '(')
1969 state = ESCPAR;
1970 else{
1971 GF_PUTC(f->next, '\033');
1972 GF_PUTC(f->next, c);
1973 state = DFL;
1974 }
1975
1976 break;
1977
1978 case ESCDOL: /* saw ESC $ */
1979 if(c == 'B' || c == '@'){
1980 state = EUC;
1981 f->t = 1; /* filtering into euc */
1982 f->f2 = -1; /* first character of pair */
1983 }
1984 else{
1985 GF_PUTC(f->next, '\033');
1986 GF_PUTC(f->next, '$');
1987 GF_PUTC(f->next, c);
1988 state = DFL;
1989 }
1990
1991 break;
1992
1993 case ESCPAR: /* saw ESC ( */
1994 if(c == 'B' || c == 'J' || c == 'H'){
1995 state = DFL;
1996 f->t = 0; /* done filtering */
1997 }
1998 else{
1999 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2000 GF_PUTC(f->next, '('); /* escape sequences, which */
2001 GF_PUTC(f->next, c); /* this appears to be. */
2002 }
2003
2004 break;
2005
2006 case EUC: /* filtering into euc */
2007 if(c == '\033')
2008 state = ESC;
2009 else{
2010 #ifdef _WINDOWS /* Shift-JIS */
2011 c &= 0x7f; /* 8-bit can't win */
2012 if (f->f2 >= 0){ /* second of a pair? */
2013 int rowOffset = (f->f2 < 95) ? 112 : 176;
2014 int cellOffset = (f->f2 % 2) ? ((c > 95) ? 32 : 31)
2015 : 126;
2016
2017 GF_PUTC(f->next, ((f->f2 + 1) >> 1) + rowOffset);
2018 GF_PUTC(f->next, c + cellOffset);
2019 f->f2 = -1; /* restart */
2020 }
2021 else if(c > 0x20 && c < 0x7f)
2022 f->f2 = c; /* first of pair */
2023 else{
2024 GF_PUTC(f->next, c); /* write CTL as itself */
2025 f->f2 = -1;
2026 }
2027 #else /* EUC */
2028 GF_PUTC(f->next, (c > 0x20 && c < 0x7f) ? c | 0x80 : c);
2029 #endif
2030 }
2031
2032 break;
2033
2034 case DFL:
2035 default:
2036 if(c == '\033')
2037 state = ESC;
2038 else
2039 GF_PUTC(f->next, c);
2040
2041 break;
2042 }
2043 }
2044
2045 f->f1 = state;
2046 GF_END(f, f->next);
2047 }
2048 else if(flg == GF_EOD){
2049 switch(state){
2050 case ESC:
2051 GF_PUTC(f->next, '\033');
2052 break;
2053
2054 case ESCDOL:
2055 GF_PUTC(f->next, '\033');
2056 GF_PUTC(f->next, '$');
2057 break;
2058
2059 case ESCPAR:
2060 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2061 GF_PUTC(f->next, '('); /* escape sequences. */
2062 break;
2063 }
2064
2065 (void) GF_FLUSH(f->next);
2066 (*f->next->f)(f->next, GF_EOD);
2067 }
2068 else if(flg == GF_RESET){
2069 dprint((9, "-- gf_reset jp_to_euc\n"));
2070 f->f1 = DFL; /* state */
2071 f->t = 0; /* not translating to euc */
2072 }
2073 }
2074
2075
2076 /*
2077 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2078 */
2079 void
gf_native8bitjapanese_to_2022_jp(FILTER_S * f,int flg)2080 gf_native8bitjapanese_to_2022_jp(FILTER_S *f, int flg)
2081 {
2082 #ifdef _WINDOWS
2083 gf_sjis_to_2022_jp(f, flg);
2084 #else
2085 gf_euc_to_2022_jp(f, flg);
2086 #endif
2087 }
2088
2089
2090 void
gf_euc_to_2022_jp(FILTER_S * f,int flg)2091 gf_euc_to_2022_jp(FILTER_S *f, int flg)
2092 {
2093 register unsigned char c;
2094
2095 /*
2096 * f->t lit means we've sent the start esc seq but not the end seq.
2097 * f->f2 keeps track of first character of pair for Shift-JIS.
2098 */
2099
2100 GF_INIT(f, f->next);
2101
2102 if(flg == GF_DATA){
2103 while(GF_GETC(f, c)){
2104 if(f->t){
2105 if(c & 0x80){
2106 GF_PUTC(f->next, c & 0x7f);
2107 }
2108 else{
2109 GF_PUTC(f->next, '\033');
2110 GF_PUTC(f->next, '(');
2111 GF_PUTC(f->next, 'B');
2112 GF_PUTC(f->next, c);
2113 f->f2 = -1;
2114 f->t = 0;
2115 }
2116 }
2117 else{
2118 if(c & 0x80){
2119 GF_PUTC(f->next, '\033');
2120 GF_PUTC(f->next, '$');
2121 GF_PUTC(f->next, 'B');
2122 GF_PUTC(f->next, c & 0x7f);
2123 f->t = 1;
2124 }
2125 else{
2126 GF_PUTC(f->next, c);
2127 }
2128 }
2129 }
2130
2131 GF_END(f, f->next);
2132 }
2133 else if(flg == GF_EOD){
2134 if(f->t){
2135 GF_PUTC(f->next, '\033');
2136 GF_PUTC(f->next, '(');
2137 GF_PUTC(f->next, 'B');
2138 f->t = 0;
2139 f->f2 = -1;
2140 }
2141
2142 (void) GF_FLUSH(f->next);
2143 (*f->next->f)(f->next, GF_EOD);
2144 }
2145 else if(flg == GF_RESET){
2146 dprint((9, "-- gf_reset euc_to_jp\n"));
2147 f->t = 0;
2148 f->f2 = -1;
2149 }
2150 }
2151
2152 void
gf_sjis_to_2022_jp(FILTER_S * f,int flg)2153 gf_sjis_to_2022_jp(FILTER_S *f, int flg)
2154 {
2155 register unsigned char c;
2156
2157 /*
2158 * f->t lit means we've sent the start esc seq but not the end seq.
2159 * f->f2 keeps track of first character of pair for Shift-JIS.
2160 */
2161
2162 GF_INIT(f, f->next);
2163
2164 if(flg == GF_DATA){
2165 while(GF_GETC(f, c)){
2166 if(f->t){
2167 if(f->f2 >= 0){ /* second of a pair? */
2168 int adjust = c < 159;
2169 int rowOffset = f->f2 < 160 ? 112 : 176;
2170 int cellOffset = adjust ? (c > 127 ? 32 : 31) : 126;
2171
2172 GF_PUTC(f->next, ((f->f2 - rowOffset) << 1) - adjust);
2173 GF_PUTC(f->next, c - cellOffset);
2174 f->f2 = -1;
2175 }
2176 else if(c & 0x80){
2177 f->f2 = c; /* remember first of pair */
2178 }
2179 else{
2180 GF_PUTC(f->next, '\033');
2181 GF_PUTC(f->next, '(');
2182 GF_PUTC(f->next, 'B');
2183 GF_PUTC(f->next, c);
2184 f->f2 = -1;
2185 f->t = 0;
2186 }
2187 }
2188 else{
2189 if(c & 0x80){
2190 GF_PUTC(f->next, '\033');
2191 GF_PUTC(f->next, '$');
2192 GF_PUTC(f->next, 'B');
2193 f->f2 = c;
2194 f->t = 1;
2195 }
2196 else{
2197 GF_PUTC(f->next, c);
2198 }
2199 }
2200 }
2201
2202 GF_END(f, f->next);
2203 }
2204 else if(flg == GF_EOD){
2205 if(f->t){
2206 GF_PUTC(f->next, '\033');
2207 GF_PUTC(f->next, '(');
2208 GF_PUTC(f->next, 'B');
2209 f->t = 0;
2210 f->f2 = -1;
2211 }
2212
2213 (void) GF_FLUSH(f->next);
2214 (*f->next->f)(f->next, GF_EOD);
2215 }
2216 else if(flg == GF_RESET){
2217 dprint((9, "-- gf_reset sjis_to_jp\n"));
2218 f->t = 0;
2219 f->f2 = -1;
2220 }
2221 }
2222
2223
2224
2225 /*
2226 * Various charset to UTF-8 Translation filter
2227 */
2228
2229 /*
2230 * utf8 conversion options
2231 */
2232 typedef struct _utf8_s {
2233 CHARSET *charset;
2234 unsigned long ucsc;
2235 } UTF8_S;
2236
2237 #define UTF8_BLOCK 1024
2238 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2239 #define UTF8_ADD(f, c) \
2240 { \
2241 if(p >= eobuf){ \
2242 f->f2 += UTF8_BLOCK; \
2243 fs_resize((void **)&f->line, \
2244 (size_t) f->f2 * sizeof(char)); \
2245 eobuf = UTF8_EOB(f); \
2246 p = eobuf - UTF8_BLOCK; \
2247 } \
2248 *p++ = c; \
2249 }
2250 #define GF_UTF8_FLUSH(f) { \
2251 register long n; \
2252 SIZEDTEXT intext, outtext; \
2253 intext.data = (unsigned char *) f->line; \
2254 intext.size = p - f->line; \
2255 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2256 if(!((UTF8_S *) f->opt)->charset){ \
2257 for(n = 0; n < intext.size; n++) \
2258 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2259 } \
2260 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2261 for(n = 0; n < outtext.size; n++) \
2262 GF_PUTC(f->next, outtext.data[n]); \
2263 if(outtext.data && intext.data != outtext.data) \
2264 fs_give((void **) &outtext.data); \
2265 } \
2266 else{ \
2267 for(n = 0; n < intext.size; n++) \
2268 GF_PUTC(f->next, '?'); \
2269 } \
2270 }
2271
2272
2273 /*
2274 * gf_utf8 - text in specified charset to to UTF-8 filter
2275 * Process line-at-a-time rather than character
2276 * because ISO-2022-JP. Call utf8_text_cs by hand
2277 * rather than utf8_text to reduce the cost of
2278 * utf8_charset() for each line.
2279 */
2280 void
gf_utf8(FILTER_S * f,int flg)2281 gf_utf8(FILTER_S *f, int flg)
2282 {
2283 register char *p = f->linep;
2284 register char *eobuf = UTF8_EOB(f);
2285 GF_INIT(f, f->next);
2286
2287 if(flg == GF_DATA){
2288 register int state = f->f1;
2289 register unsigned char c;
2290
2291 while(GF_GETC(f, c)){
2292
2293 switch(state){
2294 case CCR :
2295 state = DFL;
2296 if(c == '\012'){
2297 GF_UTF8_FLUSH(f);
2298 p = f->line;
2299 GF_PUTC(f->next, '\015');
2300 GF_PUTC(f->next, '\012');
2301 }
2302 else{
2303 UTF8_ADD(f, '\015');
2304 UTF8_ADD(f, c);
2305 }
2306
2307 break;
2308
2309 default :
2310 if(c == '\015'){
2311 state = CCR;
2312 }
2313 else
2314 UTF8_ADD(f, c);
2315 }
2316 }
2317
2318 f->f1 = state;
2319 GF_END(f, f->next);
2320 }
2321 else if(flg == GF_EOD){
2322
2323 if(p != f->line)
2324 GF_UTF8_FLUSH(f);
2325
2326 fs_give((void **) &f->line);
2327 fs_give((void **) &f->opt);
2328 (void) GF_FLUSH(f->next);
2329 (*f->next->f)(f->next, GF_EOD);
2330 }
2331 else if(GF_RESET){
2332 dprint((9, "-- gf_reset utf8\n"));
2333 f->f1 = DFL;
2334 f->f2 = UTF8_BLOCK; /* input buffer length */
2335 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
2336 }
2337
2338 f->linep = p;
2339 }
2340
2341
2342 void *
gf_utf8_opt(char * charset)2343 gf_utf8_opt(char *charset)
2344 {
2345 UTF8_S *utf8;
2346
2347 utf8 = (UTF8_S *) fs_get(sizeof(UTF8_S));
2348
2349 utf8->charset = (CHARSET *) utf8_charset(charset);
2350
2351 /*
2352 * When we get 8-bit non-ascii characters but it is supposed to
2353 * be ascii we want it to turn into question marks, not
2354 * just behave as if it is UTF-8 which is what happens
2355 * with ascii because there is no translation table.
2356 * So we need to catch the ascii special case here.
2357 */
2358 if(utf8->charset && utf8->charset->type == CT_ASCII)
2359 utf8->charset = NULL;
2360
2361 return((void *) utf8);
2362 }
2363
2364
2365 /*
2366 * RICHTEXT-TO-PLAINTEXT filter
2367 */
2368
2369 /*
2370 * option to be used by rich2plain (NOTE: if this filter is ever
2371 * used more than once in a pipe, all instances will have the same
2372 * option value)
2373 */
2374
2375
2376 /*----------------------------------------------------------------------
2377 richtext to plaintext filter
2378
2379 Args: f --
2380 flg --
2381
2382 This basically removes all richtext formatting. A cute hack is used
2383 to get bold and underlining to work.
2384 Further work could be done to handle things like centering and right
2385 and left flush, but then it could no longer be done in place. This
2386 operates on text *with* CRLF's.
2387
2388 WARNING: does not wrap lines!
2389 ----*/
2390 void
gf_rich2plain(FILTER_S * f,int flg)2391 gf_rich2plain(FILTER_S *f, int flg)
2392 {
2393 static int rich_bold_on = 0, rich_uline_on = 0;
2394
2395 /* BUG: quote incoming \255 values */
2396 GF_INIT(f, f->next);
2397
2398 if(flg == GF_DATA){
2399 register unsigned char c;
2400 register int state = f->f1;
2401 register int plain;
2402
2403 plain = f->opt ? (*(int *) f->opt) : 0;
2404
2405 while(GF_GETC(f, c)){
2406
2407 switch(state){
2408 case TOKEN : /* collect a richtext token */
2409 if(c == '>'){ /* what should we do with it? */
2410 state = DFL; /* return to default next time */
2411 *(f->linep) = '\0'; /* cap off token */
2412 if(f->line[0] == 'l' && f->line[1] == 't'){
2413 GF_PUTC(f->next, '<'); /* literal '<' */
2414 }
2415 else if(f->line[0] == 'n' && f->line[1] == 'l'){
2416 GF_PUTC(f->next, '\015');/* newline! */
2417 GF_PUTC(f->next, '\012');
2418 }
2419 else if(!strcmp("comment", f->line)){
2420 (f->f2)++;
2421 }
2422 else if(!strcmp("/comment", f->line)){
2423 f->f2 = 0;
2424 }
2425 else if(!strcmp("/paragraph", f->line)) {
2426 GF_PUTC(f->next, '\r');
2427 GF_PUTC(f->next, '\n');
2428 GF_PUTC(f->next, '\r');
2429 GF_PUTC(f->next, '\n');
2430 }
2431 else if(!plain /* gf_rich_plain */){
2432 if(!strcmp(f->line, "bold")) {
2433 GF_PUTC(f->next, TAG_EMBED);
2434 GF_PUTC(f->next, TAG_BOLDON);
2435 rich_bold_on = 1;
2436 } else if(!strcmp(f->line, "/bold")) {
2437 GF_PUTC(f->next, TAG_EMBED);
2438 GF_PUTC(f->next, TAG_BOLDOFF);
2439 rich_bold_on = 0;
2440 } else if(!strcmp(f->line, "italic")) {
2441 GF_PUTC(f->next, TAG_EMBED);
2442 GF_PUTC(f->next, TAG_ULINEON);
2443 rich_uline_on = 1;
2444 } else if(!strcmp(f->line, "/italic")) {
2445 GF_PUTC(f->next, TAG_EMBED);
2446 GF_PUTC(f->next, TAG_ULINEOFF);
2447 rich_uline_on = 0;
2448 } else if(!strcmp(f->line, "underline")) {
2449 GF_PUTC(f->next, TAG_EMBED);
2450 GF_PUTC(f->next, TAG_ULINEON);
2451 rich_uline_on = 1;
2452 } else if(!strcmp(f->line, "/underline")) {
2453 GF_PUTC(f->next, TAG_EMBED);
2454 GF_PUTC(f->next, TAG_ULINEOFF);
2455 rich_uline_on = 0;
2456 }
2457 }
2458 /* else we just ignore the token! */
2459
2460 f->linep = f->line; /* reset token buffer */
2461 }
2462 else{ /* add char to token */
2463 if(f->linep - f->line > 40){
2464 /* What? rfc1341 says 40 char tokens MAX! */
2465 fs_give((void **)&(f->line));
2466 gf_error("Richtext token over 40 characters");
2467 /* NO RETURN */
2468 }
2469
2470 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2471 }
2472 break;
2473
2474 case CCR :
2475 state = DFL; /* back to default next time */
2476 if(c == '\012'){ /* treat as single space? */
2477 GF_PUTC(f->next, ' ');
2478 break;
2479 }
2480 /* fall thru to process c */
2481
2482 case DFL :
2483 default:
2484 if(c == '<')
2485 state = TOKEN;
2486 else if(c == '\015')
2487 state = CCR;
2488 else if(!f->f2) /* not in comment! */
2489 GF_PUTC(f->next, c);
2490
2491 break;
2492 }
2493 }
2494
2495 f->f1 = state;
2496 GF_END(f, f->next);
2497 }
2498 else if(flg == GF_EOD){
2499 if((f->f1 = (f->linep != f->line)) != 0){
2500 /* incomplete token!! */
2501 gf_error("Incomplete token in richtext");
2502 /* NO RETURN */
2503 }
2504
2505 if(rich_uline_on){
2506 GF_PUTC(f->next, TAG_EMBED);
2507 GF_PUTC(f->next, TAG_ULINEOFF);
2508 rich_uline_on = 0;
2509 }
2510 if(rich_bold_on){
2511 GF_PUTC(f->next, TAG_EMBED);
2512 GF_PUTC(f->next, TAG_BOLDOFF);
2513 rich_bold_on = 0;
2514 }
2515
2516 fs_give((void **)&(f->line));
2517 (void) GF_FLUSH(f->next);
2518 (*f->next->f)(f->next, GF_EOD);
2519 }
2520 else if(flg == GF_RESET){
2521 dprint((9, "-- gf_reset rich2plain\n"));
2522 f->f1 = DFL; /* state */
2523 f->f2 = 0; /* set means we're in a comment */
2524 f->linep = f->line = (char *)fs_get(45 * sizeof(char));
2525 }
2526 }
2527
2528
2529 /*
2530 * function called from the outside to set
2531 * richtext filter's options
2532 */
2533 void *
gf_rich2plain_opt(int * plain)2534 gf_rich2plain_opt(int *plain)
2535 {
2536 return((void *) plain);
2537 }
2538
2539
2540
2541 /*
2542 * ENRICHED-TO-PLAIN text filter
2543 */
2544
2545 #define TEF_QUELL 0x01
2546 #define TEF_NOFILL 0x02
2547
2548
2549
2550 /*----------------------------------------------------------------------
2551 enriched text to plain text filter (ala rfc1523)
2552
2553 Args: f -- state and input data
2554 flg --
2555
2556 This basically removes all enriched formatting. A cute hack is used
2557 to get bold and underlining to work.
2558
2559 Further work could be done to handle things like centering and right
2560 and left flush, but then it could no longer be done in place. This
2561 operates on text *with* CRLF's.
2562
2563 WARNING: does not wrap lines!
2564 ----*/
2565 void
gf_enriched2plain(FILTER_S * f,int flg)2566 gf_enriched2plain(FILTER_S *f, int flg)
2567 {
2568 static int enr_uline_on = 0, enr_bold_on = 0;
2569
2570 /* BUG: quote incoming \255 values */
2571 GF_INIT(f, f->next);
2572
2573 if(flg == GF_DATA){
2574 register unsigned char c;
2575 register int state = f->f1;
2576 register int plain;
2577
2578 plain = f->opt ? (*(int *) f->opt) : 0;
2579
2580 while(GF_GETC(f, c)){
2581
2582 switch(state){
2583 case TOKEN : /* collect a richtext token */
2584 if(c == '>'){ /* what should we do with it? */
2585 int off = *f->line == '/';
2586 char *token = f->line + (off ? 1 : 0);
2587 state = DFL;
2588 *f->linep = '\0';
2589 if(!strcmp("param", token)){
2590 if(off)
2591 f->f2 &= ~TEF_QUELL;
2592 else
2593 f->f2 |= TEF_QUELL;
2594 }
2595 else if(!strcmp("nofill", token)){
2596 if(off)
2597 f->f2 &= ~TEF_NOFILL;
2598 else
2599 f->f2 |= TEF_NOFILL;
2600 }
2601 else if(!plain /* gf_enriched_plain */){
2602 /* Following is a cute hack or two to get
2603 bold and underline on the screen.
2604 See Putline0n() where these codes are
2605 interpreted */
2606 if(!strcmp("bold", token)) {
2607 GF_PUTC(f->next, TAG_EMBED);
2608 GF_PUTC(f->next, off ? TAG_BOLDOFF : TAG_BOLDON);
2609 enr_bold_on = off ? 0 : 1;
2610 } else if(!strcmp("italic", token)) {
2611 GF_PUTC(f->next, TAG_EMBED);
2612 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2613 enr_uline_on = off ? 0 : 1;
2614 } else if(!strcmp("underline", token)) {
2615 GF_PUTC(f->next, TAG_EMBED);
2616 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2617 enr_uline_on = off ? 0 : 1;
2618 }
2619 }
2620 /* else we just ignore the token! */
2621
2622 f->linep = f->line; /* reset token buffer */
2623 }
2624 else if(c == '<'){ /* literal '<'? */
2625 if(f->linep == f->line){
2626 GF_PUTC(f->next, '<');
2627 state = DFL;
2628 }
2629 else{
2630 fs_give((void **)&(f->line));
2631 gf_error("Malformed Enriched text: unexpected '<'");
2632 /* NO RETURN */
2633 }
2634 }
2635 else{ /* add char to token */
2636 if(f->linep - f->line > 60){ /* rfc1523 says 60 MAX! */
2637 fs_give((void **)&(f->line));
2638 gf_error("Malformed Enriched text: token too long");
2639 /* NO RETURN */
2640 }
2641
2642 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2643 }
2644 break;
2645
2646 case CCR :
2647 if(c != '\012'){ /* treat as single space? */
2648 state = DFL; /* lone cr? */
2649 f->f2 &= ~TEF_QUELL;
2650 GF_PUTC(f->next, '\015');
2651 goto df;
2652 }
2653
2654 state = CLF;
2655 break;
2656
2657 case CLF :
2658 if(c == '\015'){ /* treat as single space? */
2659 state = CCR; /* repeat crlf's mean real newlines */
2660 f->f2 |= TEF_QUELL;
2661 GF_PUTC(f->next, '\r');
2662 GF_PUTC(f->next, '\n');
2663 break;
2664 }
2665 else{
2666 state = DFL;
2667 if(!((f->f2) & TEF_QUELL))
2668 GF_PUTC(f->next, ' ');
2669
2670 f->f2 &= ~TEF_QUELL;
2671 }
2672
2673 /* fall thru to take care of 'c' */
2674
2675 case DFL :
2676 default :
2677 df :
2678 if(c == '<')
2679 state = TOKEN;
2680 else if(c == '\015' && (!((f->f2) & TEF_NOFILL)))
2681 state = CCR;
2682 else if(!((f->f2) & TEF_QUELL))
2683 GF_PUTC(f->next, c);
2684
2685 break;
2686 }
2687 }
2688
2689 f->f1 = state;
2690 GF_END(f, f->next);
2691 }
2692 else if(flg == GF_EOD){
2693 if((f->f1 = (f->linep != f->line)) != 0){
2694 /* incomplete token!! */
2695 gf_error("Incomplete token in richtext");
2696 /* NO RETURN */
2697 }
2698 if(enr_uline_on){
2699 GF_PUTC(f->next, TAG_EMBED);
2700 GF_PUTC(f->next, TAG_ULINEOFF);
2701 enr_uline_on = 0;
2702 }
2703 if(enr_bold_on){
2704 GF_PUTC(f->next, TAG_EMBED);
2705 GF_PUTC(f->next, TAG_BOLDOFF);
2706 enr_bold_on = 0;
2707 }
2708
2709 /* Make sure we end with a newline so everything gets flushed */
2710 GF_PUTC(f->next, '\015');
2711 GF_PUTC(f->next, '\012');
2712
2713 fs_give((void **)&(f->line));
2714
2715 (void) GF_FLUSH(f->next);
2716 (*f->next->f)(f->next, GF_EOD);
2717 }
2718 else if(flg == GF_RESET){
2719 dprint((9, "-- gf_reset enriched2plain\n"));
2720 f->f1 = DFL; /* state */
2721 f->f2 = 0; /* set means we're in a comment */
2722 f->linep = f->line = (char *)fs_get(65 * sizeof(char));
2723 }
2724 }
2725
2726
2727 /*
2728 * function called from the outside to set
2729 * richtext filter's options
2730 */
2731 void *
gf_enriched2plain_opt(int * plain)2732 gf_enriched2plain_opt(int *plain)
2733 {
2734 return((void *) plain);
2735 }
2736
2737
2738
2739 /*
2740 * HTML-TO-PLAIN text filter
2741 */
2742
2743
2744 /* OK, here's the plan:
2745
2746 * a universal output function handles writing chars and worries
2747 * about wrapping.
2748
2749 * a unversal element collector reads chars and collects params
2750 * and dispatches the appropriate element handler.
2751
2752 * element handlers are stacked. The most recently dispatched gets
2753 * first crack at the incoming character stream. It passes bytes it's
2754 * done with or not interested in to the next
2755
2756 * installs that handler as the current one collecting data...
2757
2758 * stacked handlers take their params from the element collector and
2759 * accept chars or do whatever they need to do. Sort of a vertical
2760 * piping? recursion-like? hmmm.
2761
2762 * at least I think this is how it'll work. tres simple, non?
2763
2764 */
2765
2766
2767 /*
2768 * Some important constants
2769 */
2770 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2771 #define MAX_ENTITY 20 /* maximum length of an entity */
2772 #define MAX_ELEMENT 72 /* maximum length of an element */
2773 #define HTML_MOREDATA 0 /* expect more entity data */
2774 #define HTML_ENTITY 1 /* valid entity collected */
2775 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2776 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2777 #define HTML_LITERAL 0x0400 /* Literal character value */
2778 #define HTML_NEWLINE 0x010A /* hard newline */
2779 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2780 #define HTML_ID_GET 0 /* indent func: return current val */
2781 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2782 #define HTML_ID_INC 2 /* indent func: increment by val */
2783 #define HTML_HX_CENTER 0x0001
2784 #define HTML_HX_ULINE 0x0002
2785 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2786
2787
2788 /* types of lists that we will support */
2789 #define LIST_DECIMAL (long) 0
2790 #define LIST_ALPHALO (long) 1
2791 #define LIST_ALPHAUP (long) 2
2792 #define LIST_ROMANLO (long) 3
2793 #define LIST_ROMANUP (long) 4
2794 #define LIST_UNKNOWN (long) 10
2795
2796 /*
2797 * Handler data, state information including function that uses it
2798 */
2799 typedef struct handler_s {
2800 FILTER_S *html_data;
2801 void *element;
2802 long x, y, z;
2803 void *dp;
2804 unsigned char *s;
2805 struct handler_s *below;
2806 } HANDLER_S;
2807
2808 /*
2809 * Element Property structure
2810 */
2811 typedef struct _element_properties {
2812 char *element;
2813 size_t len;
2814 int (*handler)(HANDLER_S *, int, int);
2815 unsigned blocklevel:1;
2816 unsigned alternate:1;
2817 } ELPROP_S;
2818
2819 /*
2820 * Types used to manage HTML parsing
2821 */
2822 static void html_handoff(HANDLER_S *, int);
2823
2824
2825 /*
2826 * to help manage line wrapping.
2827 */
2828 typedef struct _wrap_line {
2829 char *buf; /* buf to collect wrapped text */
2830 int used, /* number of chars in buf */
2831 width, /* text's width as displayed */
2832 len; /* length of allocated buf */
2833 } WRAPLINE_S;
2834
2835
2836 /*
2837 * to help manage centered text
2838 */
2839 typedef struct _center_s {
2840 WRAPLINE_S line; /* buf to assembled centered text */
2841 WRAPLINE_S word; /* word being to append to Line */
2842 int anchor;
2843 short space;
2844 } CENTER_S;
2845
2846
2847 /*
2848 * Collector data and state information
2849 */
2850 typedef struct collector_s {
2851 char buf[HTML_BUF_LEN]; /* buffer to collect data */
2852 int len; /* length of that buffer */
2853 unsigned unquoted_data:1; /* parameter is not quoted... */
2854 unsigned end_tag:1; /* collecting a closing tag */
2855 unsigned hit_equal:1; /* collecting right half of attrib */
2856 unsigned mkup_decl:1; /* markup declaration */
2857 unsigned start_comment:1; /* markup declaration comment */
2858 unsigned end_comment:1; /* legit comment format */
2859 unsigned hyphen:1; /* markup hyphen read */
2860 unsigned badform:1; /* malformed markup element */
2861 unsigned overrun:1; /* Overran buf above */
2862 unsigned proc_inst:1; /* XML processing instructions */
2863 unsigned empty:1; /* empty element */
2864 unsigned was_quoted:1; /* basically to catch null string */
2865 char quoted; /* quoted element param value */
2866 char *element; /* element's collected name */
2867 PARAMETER *attribs; /* element's collected attributes */
2868 PARAMETER *cur_attrib; /* attribute now being collected */
2869 } CLCTR_S;
2870
2871
2872 /*
2873 * State information for all element handlers
2874 */
2875 typedef struct html_data {
2876 HANDLER_S *h_stack; /* handler list */
2877 CLCTR_S *el_data; /* element collector data */
2878 CENTER_S *centered; /* struct to manage centered text */
2879 int (*token)(FILTER_S *, int);
2880 char quoted; /* quoted, by either ' or ", text */
2881 short indent_level; /* levels of indention */
2882 int in_anchor; /* text now being written to anchor */
2883 int blanks; /* Consecutive blank line count */
2884 int wrapcol; /* column to wrap lines on */
2885 int *prefix; /* buffer containing Anchor prefix */
2886 int prefix_used;
2887 long line_bufsize; /* current size of the line buffer */
2888 COLOR_PAIR *color;
2889 struct {
2890 int state; /* embedded data state */
2891 char *color; /* embedded color pointer */
2892 } embedded;
2893 CBUF_S cb; /* utf8->ucs4 conversion state */
2894 unsigned wrapstate:1; /* whether or not to wrap output */
2895 unsigned li_pending:1; /* <LI> next token expected */
2896 unsigned de_pending:1; /* <DT> or <DD> next token expected */
2897 unsigned bold_on:1; /* currently bolding text */
2898 unsigned uline_on:1; /* currently underlining text */
2899 unsigned center:1; /* center output text */
2900 unsigned bitbucket:1; /* Ignore input */
2901 unsigned head:1; /* In doc's HEAD */
2902 unsigned body:1; /* In doc's BODY */
2903 unsigned alt_entity:1; /* use alternative entity values */
2904 unsigned wrote:1; /* anything witten yet? */
2905 } HTML_DATA_S;
2906
2907
2908 /*
2909 * HTML filter options
2910 */
2911 typedef struct _html_opts {
2912 char *base; /* Base URL for this html file */
2913 int columns, /* Display columns (excluding margins) */
2914 indent; /* Left margin */
2915 HANDLE_S **handlesp; /* Head of handles */
2916 htmlrisk_t warnrisk_f; /* Nasty link warning call */
2917 ELPROP_S *element_table; /* markup element table */
2918 RSS_FEED_S **feedp; /* hook for RSS feed response */
2919 unsigned strip:1; /* Hilite TAGs allowed */
2920 unsigned handles_loc:1; /* Local handles requested? */
2921 unsigned showserver:1; /* Display server after anchors */
2922 unsigned outputted:1; /* any */
2923 unsigned no_relative_links:1; /* Disable embedded relative links */
2924 unsigned related_content:1; /* Embedded related content */
2925 unsigned html:1; /* Output content in HTML */
2926 unsigned html_imgs:1; /* Output IMG tags in HTML content */
2927 } HTML_OPT_S;
2928
2929
2930
2931 /*
2932 * Some macros to make life a little easier
2933 */
2934 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2935 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2936 #define HTML_WROTE(X) (HD(X)->wrote)
2937 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2938 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2939 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2940 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2941 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2942 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2943 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2944 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2945 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2946 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2947 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2948 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2949 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2950 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2951 #define HD(X) ((HTML_DATA_S *)(X)->data)
2952 #define ED(X) (HD(X)->el_data)
2953 #define EL(X) ((ELPROP_S *) (X)->element)
2954 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2955 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2956 #define NEW_CLCTR(X) { \
2957 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2958 memset(ED(X), 0, sizeof(CLCTR_S)); \
2959 HD(X)->token = html_element_collector; \
2960 }
2961
2962 #define FREE_CLCTR(X) { \
2963 if(ED(X)->attribs){ \
2964 PARAMETER *p; \
2965 while((p = ED(X)->attribs) != NULL){ \
2966 ED(X)->attribs = ED(X)->attribs->next; \
2967 if(p->attribute) \
2968 fs_give((void **)&p->attribute); \
2969 if(p->value) \
2970 fs_give((void **)&p->value); \
2971 fs_give((void **)&p); \
2972 } \
2973 } \
2974 if(ED(X)->element) \
2975 fs_give((void **) &ED(X)->element); \
2976 fs_give((void **) &ED(X)); \
2977 HD(X)->token = NULL; \
2978 }
2979 #define HANDLERS(X) (HD(X)->h_stack)
2980 #define BOLD_BIT(X) (HD(X)->bold_on)
2981 #define ULINE_BIT(X) (HD(X)->uline_on)
2982 #define CENTER_BIT(X) (HD(X)->center)
2983 #define HTML_FLUSH(X) { \
2984 html_write(X, (X)->line, (X)->linep - (X)->line); \
2985 (X)->linep = (X)->line; \
2986 (X)->f2 = 0L; \
2987 }
2988 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2989 if((S)){ \
2990 html_output((X), TAG_EMBED); \
2991 html_output((X), TAG_BOLDON); \
2992 } \
2993 else if(!(S)){ \
2994 html_output((X), TAG_EMBED); \
2995 html_output((X), TAG_BOLDOFF); \
2996 } \
2997 }
2998 #define HTML_ULINE(X, S) \
2999 if(! STRIP(X)){ \
3000 if((S)){ \
3001 html_output((X), TAG_EMBED); \
3002 html_output((X), TAG_ULINEON); \
3003 } \
3004 else if(!(S)){ \
3005 html_output((X), TAG_EMBED); \
3006 html_output((X), TAG_ULINEOFF); \
3007 } \
3008 }
3009 #define HTML_ITALIC(X, S) \
3010 if(! STRIP(X)){ \
3011 if(S){ \
3012 html_output((X), TAG_EMBED); \
3013 html_output((X), TAG_ITALICON); \
3014 } \
3015 else if(!(S)){ \
3016 html_output((X), TAG_EMBED); \
3017 html_output((X), TAG_ITALICOFF); \
3018 } \
3019 }
3020 #define HTML_STRIKE(X, S) \
3021 if(! STRIP(X)){ \
3022 if(S){ \
3023 html_output((X), TAG_EMBED); \
3024 html_output((X), TAG_STRIKEON); \
3025 } \
3026 else if(!(S)){ \
3027 html_output((X), TAG_EMBED); \
3028 html_output((X), TAG_STRIKEOFF); \
3029 } \
3030 }
3031 #define HTML_BIG(X, S) \
3032 if(! STRIP(X)){ \
3033 if(S){ \
3034 html_output((X), TAG_EMBED); \
3035 html_output((X), TAG_BIGON); \
3036 } \
3037 else if(!(S)){ \
3038 html_output((X), TAG_EMBED); \
3039 html_output((X), TAG_BIGOFF); \
3040 } \
3041 }
3042 #define HTML_SMALL(X, S) \
3043 if(! STRIP(X)){ \
3044 if(S){ \
3045 html_output((X), TAG_EMBED); \
3046 html_output((X), TAG_SMALLON); \
3047 } \
3048 else if(!(S)){ \
3049 html_output((X), TAG_EMBED); \
3050 html_output((X), TAG_SMALLOFF); \
3051 } \
3052 }
3053 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3054 ? (HD(f)->centered->line.width \
3055 + HD(f)->centered->word.width \
3056 + ((HD(f)->centered->line.width \
3057 && HD(f)->centered->word.width) \
3058 ? 1 : 0)) \
3059 : 0)
3060 #define HTML_DUMP_LIT(F, S, L) { \
3061 int i, c; \
3062 for(i = 0; i < (L); i++){ \
3063 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3064 ? (S)[i] \
3065 : MAKE_LITERAL((S)[i]); \
3066 HTML_TEXT(F, c); \
3067 } \
3068 }
3069 #define HTML_PROC(F, C) { \
3070 if(HD(F)->token){ \
3071 int i; \
3072 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3073 if(i < 0){ \
3074 HTML_DUMP_LIT(F, "<", 1); \
3075 if(HD(F)->el_data->element){ \
3076 HTML_DUMP_LIT(F, \
3077 HD(F)->el_data->element, \
3078 strlen(HD(F)->el_data->element));\
3079 } \
3080 if(HD(F)->el_data->len){ \
3081 HTML_DUMP_LIT(F, \
3082 HD(F)->el_data->buf, \
3083 HD(F)->el_data->len); \
3084 } \
3085 HTML_TEXT(F, C); \
3086 } \
3087 FREE_CLCTR(F); \
3088 } \
3089 } \
3090 else if((C) == '<'){ \
3091 NEW_CLCTR(F); \
3092 } \
3093 else \
3094 HTML_TEXT(F, C); \
3095 }
3096 #define HTML_LINEP_PUTC(F, C) { \
3097 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3098 size_t offset = (F)->linep - (F)->line; \
3099 fs_resize((void **) &(F)->line, \
3100 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3101 HD(F)->line_bufsize *= 2; \
3102 (F)->linep = &(F)->line[offset]; \
3103 } \
3104 *(F)->linep++ = (C); \
3105 }
3106 #define HTML_TEXT(F, C) switch((F)->f1){ \
3107 case WSPACE : \
3108 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3109 break; \
3110 HTML_TEXT_OUT(F, ' '); \
3111 (F)->f1 = DFL;/* stop sending chars here */ \
3112 /* fall thru to process 'c' */ \
3113 case DFL: \
3114 if(HD(F)->bitbucket) \
3115 (F)->f1 = DFL; /* no op */ \
3116 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3117 (F)->f1 = WSPACE;/* coalesce white space */ \
3118 else HTML_TEXT_OUT(F, C); \
3119 break; \
3120 }
3121 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3122 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3123 else \
3124 html_output(F, C);
3125 #ifdef DEBUG
3126 #define HTML_DEBUG_EL(S, D) { \
3127 dprint((5, "-- html %s: %s\n", \
3128 S ? S : "?", \
3129 (D)->element \
3130 ? (D)->element : "NULL")); \
3131 if(debug > 5){ \
3132 PARAMETER *p; \
3133 for(p = (D)->attribs; \
3134 p && p->attribute; \
3135 p = p->next) \
3136 dprint((6, \
3137 " PARM: %s%s%s\n", \
3138 p->attribute \
3139 ? p->attribute : "NULL",\
3140 p->value ? "=" : "", \
3141 p->value ? p->value : ""));\
3142 } \
3143 }
3144 #else
3145 #define HTML_DEBUG_EL(S, D)
3146 #endif
3147
3148 #ifndef SYSTEM_PINE_INFO_PATH
3149 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3150 #endif
3151 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3152 ? SYSTEM_PINE_INFO_PATH : S)
3153
3154 /*
3155 * Protos for Tag handlers
3156 */
3157 int html_head(HANDLER_S *, int, int);
3158 int html_base(HANDLER_S *, int, int);
3159 int html_title(HANDLER_S *, int, int);
3160 int html_body(HANDLER_S *, int, int);
3161 int html_a(HANDLER_S *, int, int);
3162 int html_br(HANDLER_S *, int, int);
3163 int html_hr(HANDLER_S *, int, int);
3164 int html_p(HANDLER_S *, int, int);
3165 int html_table(HANDLER_S *, int, int);
3166 int html_caption(HANDLER_S *, int, int);
3167 int html_tr(HANDLER_S *, int, int);
3168 int html_td(HANDLER_S *, int, int);
3169 int html_th(HANDLER_S *, int, int);
3170 int html_thead(HANDLER_S *, int, int);
3171 int html_tbody(HANDLER_S *, int, int);
3172 int html_tfoot(HANDLER_S *, int, int);
3173 int html_col(HANDLER_S *, int, int);
3174 int html_colgroup(HANDLER_S *, int, int);
3175 int html_b(HANDLER_S *, int, int);
3176 int html_u(HANDLER_S *, int, int);
3177 int html_i(HANDLER_S *, int, int);
3178 int html_em(HANDLER_S *, int, int);
3179 int html_strong(HANDLER_S *, int, int);
3180 int html_s(HANDLER_S *, int, int);
3181 int html_big(HANDLER_S *, int, int);
3182 int html_small(HANDLER_S *, int, int);
3183 int html_font(HANDLER_S *, int, int);
3184 int html_img(HANDLER_S *, int, int);
3185 int html_map(HANDLER_S *, int, int);
3186 int html_area(HANDLER_S *, int, int);
3187 int html_form(HANDLER_S *, int, int);
3188 int html_input(HANDLER_S *, int, int);
3189 int html_option(HANDLER_S *, int, int);
3190 int html_optgroup(HANDLER_S *, int, int);
3191 int html_button(HANDLER_S *, int, int);
3192 int html_select(HANDLER_S *, int, int);
3193 int html_textarea(HANDLER_S *, int, int);
3194 int html_label(HANDLER_S *, int, int);
3195 int html_fieldset(HANDLER_S *, int, int);
3196 int html_ul(HANDLER_S *, int, int);
3197 int html_ol(HANDLER_S *, int, int);
3198 int html_menu(HANDLER_S *, int, int);
3199 int html_dir(HANDLER_S *, int, int);
3200 int html_li(HANDLER_S *, int, int);
3201 int html_h1(HANDLER_S *, int, int);
3202 int html_h2(HANDLER_S *, int, int);
3203 int html_h3(HANDLER_S *, int, int);
3204 int html_h4(HANDLER_S *, int, int);
3205 int html_h5(HANDLER_S *, int, int);
3206 int html_h6(HANDLER_S *, int, int);
3207 int html_blockquote(HANDLER_S *, int, int);
3208 int html_address(HANDLER_S *, int, int);
3209 int html_pre(HANDLER_S *, int, int);
3210 int html_center(HANDLER_S *, int, int);
3211 int html_div(HANDLER_S *, int, int);
3212 int html_span(HANDLER_S *, int, int);
3213 int html_dl(HANDLER_S *, int, int);
3214 int html_dt(HANDLER_S *, int, int);
3215 int html_dd(HANDLER_S *, int, int);
3216 int html_script(HANDLER_S *, int, int);
3217 int html_applet(HANDLER_S *, int, int);
3218 int html_style(HANDLER_S *, int, int);
3219 int html_kbd(HANDLER_S *, int, int);
3220 int html_dfn(HANDLER_S *, int, int);
3221 int html_var(HANDLER_S *, int, int);
3222 int html_tt(HANDLER_S *, int, int);
3223 int html_samp(HANDLER_S *, int, int);
3224 int html_sub(HANDLER_S *, int, int);
3225 int html_sup(HANDLER_S *, int, int);
3226 int html_cite(HANDLER_S *, int, int);
3227 int html_code(HANDLER_S *, int, int);
3228 int html_ins(HANDLER_S *, int, int);
3229 int html_del(HANDLER_S *, int, int);
3230 int html_abbr(HANDLER_S *, int, int);
3231 char *cid_tempfile_name(char *, long, int *);
3232
3233 /*
3234 * Protos for RSS 2.0 Tag handlers
3235 */
3236 int rss_rss(HANDLER_S *, int, int);
3237 int rss_channel(HANDLER_S *, int, int);
3238 int rss_title(HANDLER_S *, int, int);
3239 int rss_image(HANDLER_S *, int, int);
3240 int rss_link(HANDLER_S *, int, int);
3241 int rss_description(HANDLER_S *, int, int);
3242 int rss_ttl(HANDLER_S *, int, int);
3243 int rss_item(HANDLER_S *, int, int);
3244
3245 /*
3246 * Proto's for support routines
3247 */
3248 void html_pop(FILTER_S *, ELPROP_S *);
3249 int html_push(FILTER_S *, ELPROP_S *);
3250 int html_element_collector(FILTER_S *, int);
3251 int html_element_flush(CLCTR_S *);
3252 void html_element_comment(FILTER_S *, char *);
3253 void html_element_output(FILTER_S *, int);
3254 int html_entity_collector(FILTER_S *, int, UCS *, char **);
3255 void html_a_prefix(FILTER_S *);
3256 void html_a_finish(HANDLER_S *);
3257 void html_a_output_prefix(FILTER_S *, int);
3258 void html_a_output_info(HANDLER_S *);
3259 void html_a_relative(char *, char *, HANDLE_S *);
3260 int html_href_relative(char *);
3261 int html_indent(FILTER_S *, int, int);
3262 void html_blank(FILTER_S *, int);
3263 void html_newline(FILTER_S *);
3264 void html_output(FILTER_S *, int);
3265 void html_output_string(FILTER_S *, char *);
3266 void html_output_raw_tag(FILTER_S *, char *);
3267 void html_output_normal(FILTER_S *, int, int, int);
3268 void html_output_flush(FILTER_S *);
3269 void html_output_centered(FILTER_S *, int, int, int);
3270 void html_centered_handle(int *, char *, int);
3271 void html_centered_putc(WRAPLINE_S *, int);
3272 void html_centered_flush(FILTER_S *);
3273 void html_centered_flush_line(FILTER_S *);
3274 void html_write_anchor(FILTER_S *, int);
3275 void html_write_newline(FILTER_S *);
3276 void html_write_indent(FILTER_S *, int);
3277 void html_write(FILTER_S *, char *, int);
3278 void html_putc(FILTER_S *, int);
3279 int html_event_attribute(char *);
3280 char *rss_skip_whitespace(char *s);
3281 ELPROP_S *element_properties(FILTER_S *, char *);
3282
3283
3284 /*
3285 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3286 * W3C doc "Additional named entities for HTML"
3287 */
3288 static struct html_entities {
3289 char *name; /* entity name */
3290 UCS value; /* UCS entity value */
3291 char *plain; /* US-ASCII representation */
3292 } entity_tab[] = {
3293 {"quot", 0x0022}, /* 34 - quotation mark */
3294 {"amp", 0x0026}, /* 38 - ampersand */
3295 {"apos", 0x0027}, /* 39 - apostrophe */
3296 {"lt", 0x003C}, /* 60 - less-than sign */
3297 {"gt", 0x003E}, /* 62 - greater-than sign */
3298 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3299 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3300 {"cent", 0x00A2}, /* 162 - cent sign */
3301 {"pound", 0x00A3}, /* 163 - pound sign */
3302 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3303 {"yen", 0x00A5}, /* 165 - yen sign */
3304 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3305 {"sect", 0x00A7}, /* 167 - section sign */
3306 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3307 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3308 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3309 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3310 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3311 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3312 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3313 {"macr", 0x00AF}, /* 175 - macron */
3314 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3315 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3316 {"sup2", 0x00B2}, /* 178 - superscript two */
3317 {"sup3", 0x00B3}, /* 179 - superscript three */
3318 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3319 {"micro", 0x00B5}, /* 181 - micro sign */
3320 {"para", 0x00B6}, /* 182 - pilcrow sign */
3321 {"middot", 0x00B7}, /* 183 - middle dot */
3322 {"cedil", 0x00B8}, /* 184 - cedilla */
3323 {"sup1", 0x00B9}, /* 185 - superscript one */
3324 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3325 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3326 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3327 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3328 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3329 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3330 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3331 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3332 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3333 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3334 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3335 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3336 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3337 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3338 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3339 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3340 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3341 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3342 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3343 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3344 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3345 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3346 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3347 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3348 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3349 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3350 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3351 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3352 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3353 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3354 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3355 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3356 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3357 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3358 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3359 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3360 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3361 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3362 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3363 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3364 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3365 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3366 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3367 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3368 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3369 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3370 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3371 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3372 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3373 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3374 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3375 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3376 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3377 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3378 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3379 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3380 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3381 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3382 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3383 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3384 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3385 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3386 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3387 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3388 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3389 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3390 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3391 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3392 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3393 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3394 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3395 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3396 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3397 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3398 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3399 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3400 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3401 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3402 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3403 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3404 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3405 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3406 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3407 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3408 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3409 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3410 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3411 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3412 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3413 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3414 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3415 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3416 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3417 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3418 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3419 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3420 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3421 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3422 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3423 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3424 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3425 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3426 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3427 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3428 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3429 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3430 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3431 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3432 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3433 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3434 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3435 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3436 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3437 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3438 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3439 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3440 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3441 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3442 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3443 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3444 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3445 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3446 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3447 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3448 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3449 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3450 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3451 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3452 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3453 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3454 {"ensp", 0x2002}, /* 8194 - en space */
3455 {"emsp", 0x2003}, /* 8195 - em space */
3456 {"thinsp", 0x2009}, /* 8201 - thin space */
3457 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3458 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3459 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3460 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3461 {"ndash", 0x2013}, /* 8211 - en dash */
3462 {"mdash", 0x2014}, /* 8212 - em dash */
3463 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3464 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3465 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3466 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3467 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3468 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3469 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3470 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3471 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3472 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3473 {"dagger", 0x2020}, /* 8224 - dagger */
3474 {"Dagger", 0x2021}, /* 8225 - double dagger */
3475 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3476 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3477 {"permil", 0x2030}, /* 8240 - per mille sign */
3478 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3479 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3480 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3481 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3482 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3483 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3484 {"oline", 0x203E, "-"}, /* 8254 - overline */
3485 {"frasl", 0x2044}, /* 8260 - fraction slash */
3486 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3487 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3488 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3489 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3490 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3491 {"image", 0x2111}, /* 8465 - black-letter capital i */
3492 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3493 {"real", 0x211C}, /* 8476 - black-letter capital r */
3494 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3495 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3496 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3497 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3498 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3499 {"darr", 0x2193}, /* 8595 - downwards arrow */
3500 {"harr", 0x2194}, /* 8596 - left right arrow */
3501 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3502 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3503 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3504 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3505 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3506 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3507 {"forall", 0x2200}, /* 8704 - for all */
3508 {"part", 0x2202}, /* 8706 - partial differential */
3509 {"exist", 0x2203}, /* 8707 - there exists */
3510 {"empty", 0x2205}, /* 8709 - empty set */
3511 {"nabla", 0x2207}, /* 8711 - nabla */
3512 {"isin", 0x2208}, /* 8712 - element of */
3513 {"notin", 0x2209}, /* 8713 - not an element of */
3514 {"ni", 0x220B}, /* 8715 - contains as member */
3515 {"prod", 0x220F}, /* 8719 - n-ary product */
3516 {"sum", 0x2211}, /* 8721 - n-ary summation */
3517 {"minus", 0x2212}, /* 8722 - minus sign */
3518 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3519 {"radic", 0x221A}, /* 8730 - square root */
3520 {"prop", 0x221D}, /* 8733 - proportional to */
3521 {"infin", 0x221E}, /* 8734 - infinity */
3522 {"ang", 0x2220}, /* 8736 - angle */
3523 {"and", 0x2227}, /* 8743 - logical and */
3524 {"or", 0x2228}, /* 8744 - logical or */
3525 {"cap", 0x2229}, /* 8745 - intersection */
3526 {"cup", 0x222A}, /* 8746 - union */
3527 {"int", 0x222B}, /* 8747 - integral */
3528 {"there4", 0x2234}, /* 8756 - therefore */
3529 {"sim", 0x223C}, /* 8764 - tilde operator */
3530 {"cong", 0x2245}, /* 8773 - congruent to */
3531 {"asymp", 0x2248}, /* 8776 - almost equal to */
3532 {"ne", 0x2260}, /* 8800 - not equal to */
3533 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3534 {"le", 0x2264}, /* 8804 - less-than or equal to */
3535 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3536 {"sub", 0x2282}, /* 8834 - subset of */
3537 {"sup", 0x2283}, /* 8835 - superset of */
3538 {"nsub", 0x2284}, /* 8836 - not a subset of */
3539 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3540 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3541 {"oplus", 0x2295}, /* 8853 - circled plus */
3542 {"otimes", 0x2297}, /* 8855 - circled times */
3543 {"perp", 0x22A5}, /* 8869 - up tack */
3544 {"sdot", 0x22C5}, /* 8901 - dot operator */
3545 {"lceil", 0x2308}, /* 8968 - left ceiling */
3546 {"rceil", 0x2309}, /* 8969 - right ceiling */
3547 {"lfloor", 0x230A}, /* 8970 - left floor */
3548 {"rfloor", 0x230B}, /* 8971 - right floor */
3549 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3550 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3551 {"loz", 0x25CA}, /* 9674 - lozenge */
3552 {"spades", 0x2660}, /* 9824 - black spade suit */
3553 {"clubs", 0x2663}, /* 9827 - black club suit */
3554 {"hearts", 0x2665}, /* 9829 - black heart suit */
3555 {"diams", 0x2666} /* 9830 - black diamond suit */
3556 };
3557
3558
3559 /*
3560 * Table of supported elements and corresponding handlers
3561 */
3562 static ELPROP_S html_element_table[] = {
3563 {"HTML", 4}, /* HTML ignore if seen? */
3564 {"HEAD", 4, html_head}, /* slurp until <BODY> ? */
3565 {"TITLE", 5, html_title}, /* Document Title */
3566 {"BASE", 4, html_base}, /* HREF base */
3567 {"BODY", 4, html_body}, /* HTML BODY */
3568 {"A", 1, html_a}, /* Anchor */
3569 {"ABBR", 4, html_abbr}, /* Abbreviation */
3570 {"IMG", 3, html_img}, /* Image */
3571 {"MAP", 3, html_map}, /* Image Map */
3572 {"AREA", 4, html_area}, /* Image Map Area */
3573 {"HR", 2, html_hr, 1, 1}, /* Horizontal Rule */
3574 {"BR", 2, html_br, 0, 1}, /* Line Break */
3575 {"P", 1, html_p, 1}, /* Paragraph */
3576 {"OL", 2, html_ol, 1}, /* Ordered List */
3577 {"UL", 2, html_ul, 1}, /* Unordered List */
3578 {"MENU", 4, html_menu}, /* Menu List */
3579 {"DIR", 3, html_dir}, /* Directory List */
3580 {"LI", 2, html_li}, /* ... List Item */
3581 {"DL", 2, html_dl, 1}, /* Definition List */
3582 {"DT", 2, html_dt}, /* ... Def. Term */
3583 {"DD", 2, html_dd}, /* ... Def. Definition */
3584 {"I", 1, html_i}, /* Italic Text */
3585 {"EM", 2, html_em}, /* Typographic Emphasis */
3586 {"STRONG", 6, html_strong}, /* STRONG Typo Emphasis */
3587 {"VAR", 3, html_i}, /* Variable Name */
3588 {"B", 1, html_b}, /* Bold Text */
3589 {"U", 1, html_u}, /* Underline Text */
3590 {"S", 1, html_s}, /* Strike-Through Text */
3591 {"STRIKE", 6, html_s}, /* Strike-Through Text */
3592 {"BIG", 3, html_big}, /* Big Font Text */
3593 {"SMALL", 5, html_small}, /* Small Font Text */
3594 {"FONT", 4, html_font}, /* Font display directives */
3595 {"BLOCKQUOTE", 10, html_blockquote, 1}, /* Blockquote */
3596 {"ADDRESS", 7, html_address, 1}, /* Address */
3597 {"CENTER", 6, html_center}, /* Centered Text v3.2 */
3598 {"DIV", 3, html_div, 1}, /* Document Division 3.2 */
3599 {"SPAN", 4, html_span}, /* Text Span */
3600 {"H1", 2, html_h1, 1}, /* Headings... */
3601 {"H2", 2, html_h2, 1},
3602 {"H3", 2, html_h3,1},
3603 {"H4", 2, html_h4, 1},
3604 {"H5", 2, html_h5, 1},
3605 {"H6", 2, html_h6, 1},
3606 {"PRE", 3, html_pre, 1}, /* Preformatted Text */
3607 {"KBD", 3, html_kbd}, /* Keyboard Input (NO OP) */
3608 {"DFN", 3, html_dfn}, /* Definition (NO OP) */
3609 {"VAR", 3, html_var}, /* Variable (NO OP) */
3610 {"TT", 2, html_tt}, /* Typetype (NO OP) */
3611 {"SAMP", 4, html_samp}, /* Sample Text (NO OP) */
3612 {"CITE", 4, html_cite}, /* Citation (NO OP) */
3613 {"CODE", 4, html_code}, /* Code Text (NO OP) */
3614 {"INS", 3, html_ins}, /* Text Inserted (NO OP) */
3615 {"DEL", 3, html_del}, /* Text Deleted (NO OP) */
3616 {"SUP", 3, html_sup}, /* Text Superscript (NO OP) */
3617 {"SUB", 3, html_sub}, /* Text Superscript (NO OP) */
3618 {"STYLE", 5, html_style}, /* CSS Definitions */
3619
3620 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3621
3622 {"FORM", 4, html_form, 1}, /* form within a document */
3623 {"INPUT", 5, html_input}, /* One input field, options */
3624 {"BUTTON", 6, html_button}, /* Push Button */
3625 {"OPTION", 6, html_option}, /* One option within Select */
3626 {"OPTION", 6, html_optgroup}, /* Option Group Definition */
3627 {"SELECT", 6, html_select}, /* Selection from a set */
3628 {"TEXTAREA", 8, html_textarea}, /* A multi-line input field */
3629 {"LABEL", 5, html_label}, /* Control Label */
3630 {"FIELDSET", 8, html_fieldset, 1}, /* Fieldset Control Group */
3631
3632 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3633 {"SCRIPT", 6, html_script}, /* Embedded scripting statements */
3634 {"APPLET", 6, NULL}, /* Embedded applet statements */
3635 {"OBJECT", 6, NULL}, /* Embedded object statements */
3636 {"LINK", 4, NULL}, /* References to external data */
3637 {"PARAM", 5, NULL}, /* Applet/Object parameters */
3638
3639 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3640
3641 {"TABLE", 5, html_table, 1}, /* Table */
3642 {"CAPTION", 7, html_caption}, /* Table Caption */
3643 {"TR", 2, html_tr}, /* Table Table Row */
3644 {"TD", 2, html_td}, /* Table Table Data */
3645 {"TH", 2, html_th}, /* Table Table Head */
3646 {"THEAD", 5, html_thead}, /* Table Table Head */
3647 {"TBODY", 5, html_tbody}, /* Table Table Body */
3648 {"TFOOT", 5, html_tfoot}, /* Table Table Foot */
3649 {"COL", 3, html_col}, /* Table Column Attributes */
3650 {"COLGROUP", 8, html_colgroup}, /* Table Column Group Attributes */
3651
3652 {NULL, 0, NULL}
3653 };
3654
3655
3656 /*
3657 * Table of supported RSS 2.0 elements
3658 */
3659 static ELPROP_S rss_element_table[] = {
3660 {"RSS", 3, rss_rss}, /* RSS 2.0 version */
3661 {"CHANNEL", 7, rss_channel}, /* RSS 2.0 Channel */
3662 {"TITLE", 5, rss_title}, /* RSS 2.0 Title */
3663 {"IMAGE", 5, rss_image}, /* RSS 2.0 Channel Image */
3664 {"LINK", 4, rss_link}, /* RSS 2.0 Channel/Item Link */
3665 {"DESCRIPTION", 11, rss_description}, /* RSS 2.0 Channel/Item Description */
3666 {"ITEM", 4, rss_item}, /* RSS 2.0 Channel ITEM */
3667 {"TTL", 3, rss_ttl}, /* RSS 2.0 Item TTL */
3668 {NULL, 0, NULL}
3669 };
3670
3671
3672 /*
3673 * Initialize the given handler, and add it to the stack if it
3674 * requests it.
3675 *
3676 * Returns: 1 if handler chose to get pushed on stack
3677 * 0 if handler declined
3678 */
3679 int
html_push(FILTER_S * fd,ELPROP_S * ep)3680 html_push(FILTER_S *fd, ELPROP_S *ep)
3681 {
3682 HANDLER_S *new;
3683
3684 new = (HANDLER_S *)fs_get(sizeof(HANDLER_S));
3685 memset(new, 0, sizeof(HANDLER_S));
3686 new->html_data = fd;
3687 new->element = ep;
3688 if((*ep->handler)(new, 0, GF_RESET)){ /* stack the handler? */
3689 new->below = HANDLERS(fd);
3690 HANDLERS(fd) = new; /* push */
3691 return(1);
3692 }
3693
3694 fs_give((void **) &new);
3695 return(0);
3696 }
3697
3698
3699 /*
3700 * Remove the most recently installed the given handler
3701 * after letting it accept its demise.
3702 */
3703 void
html_pop(FILTER_S * fd,ELPROP_S * ep)3704 html_pop(FILTER_S *fd, ELPROP_S *ep)
3705 {
3706 HANDLER_S *tp;
3707
3708 for(tp = HANDLERS(fd); tp && ep != EL(tp); tp = tp->below){
3709 HANDLER_S *tp2;
3710
3711 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep->element, EL(tp)->element));
3712 /* if no evidence of opening tag, ignore given closing tag */
3713 for(tp2 = HANDLERS(fd); tp2 && ep != EL(tp2); tp2 = tp2->below)
3714 ;
3715
3716 if(!tp2){
3717 dprint((3, "-- html error: no opening tag for given tag /%s", ep->element));
3718 return;
3719 }
3720
3721 (void) (*EL(tp)->handler)(tp, 0, GF_EOD);
3722 HANDLERS(fd) = tp->below;
3723 }
3724
3725 if(tp){
3726 (void) (*EL(tp)->handler)(tp, 0, GF_EOD); /* may adjust handler list */
3727 if(tp != HANDLERS(fd)){
3728 HANDLER_S *p;
3729
3730 for(p = HANDLERS(fd); p->below != tp; p = p->below)
3731 ;
3732
3733 if(p)
3734 p->below = tp->below; /* remove from middle of stack */
3735 /* BUG: else programming botch and we should die */
3736 }
3737 else
3738 HANDLERS(fd) = tp->below; /* pop */
3739
3740 fs_give((void **)&tp);
3741 }
3742 else{
3743 /* BUG: should MAKE SURE NOT TO EMIT IT */
3744 dprint((3, "-- html error: end tag without a start: %s", ep->element));
3745 }
3746 }
3747
3748
3749 /*
3750 * Deal with data passed a handler in its GF_DATA state
3751 */
3752 static void
html_handoff(HANDLER_S * hd,int ch)3753 html_handoff(HANDLER_S *hd, int ch)
3754 {
3755 if(hd->below)
3756 (void) (*EL(hd->below)->handler)(hd->below, ch, GF_DATA);
3757 else
3758 html_output(hd->html_data, ch);
3759 }
3760
3761
3762 /*
3763 * HTML <BR> element handler
3764 */
3765 int
html_br(HANDLER_S * hd,int ch,int cmd)3766 html_br(HANDLER_S *hd, int ch, int cmd)
3767 {
3768 if(cmd == GF_RESET){
3769 if(PASS_HTML(hd->html_data)){
3770 html_output_raw_tag(hd->html_data, "br");
3771 }
3772 else{
3773 html_output(hd->html_data, HTML_NEWLINE);
3774 }
3775 }
3776
3777 return(0); /* don't get linked */
3778 }
3779
3780
3781 /*
3782 * HTML <HR> (Horizontal Rule) element handler
3783 */
3784 int
html_hr(HANDLER_S * hd,int ch,int cmd)3785 html_hr(HANDLER_S *hd, int ch, int cmd)
3786 {
3787 if(cmd == GF_RESET){
3788 if(PASS_HTML(hd->html_data)){
3789 html_output_raw_tag(hd->html_data, "hr");
3790 }
3791 else{
3792 int i, old_wrap, width, align;
3793 PARAMETER *p;
3794
3795 width = WRAP_COLS(hd->html_data);
3796 align = 0;
3797 for(p = HD(hd->html_data)->el_data->attribs;
3798 p && p->attribute;
3799 p = p->next)
3800 if(p->value){
3801 if(!strucmp(p->attribute, "ALIGN")){
3802 if(!strucmp(p->value, "LEFT"))
3803 align = 1;
3804 else if(!strucmp(p->value, "RIGHT"))
3805 align = 2;
3806 }
3807 else if(!strucmp(p->attribute, "WIDTH")){
3808 char *cp;
3809
3810 width = 0;
3811 for(cp = p->value; *cp; cp++)
3812 if(*cp == '%'){
3813 width = (WRAP_COLS(hd->html_data)*MIN(100,width))/100;
3814 break;
3815 }
3816 else if(isdigit((unsigned char) *cp))
3817 width = (width * 10) + (*cp - '0');
3818
3819 width = MIN(width, WRAP_COLS(hd->html_data));
3820 }
3821 }
3822
3823 html_blank(hd->html_data, 1); /* at least one blank line */
3824
3825 old_wrap = HD(hd->html_data)->wrapstate;
3826 HD(hd->html_data)->wrapstate = 0;
3827 if((i = MAX(0, WRAP_COLS(hd->html_data) - width))
3828 && ((align == 0) ? i /= 2 : (align == 2)))
3829 for(; i > 0; i--)
3830 html_output(hd->html_data, ' ');
3831
3832 for(i = 0; i < width; i++)
3833 html_output(hd->html_data, '_');
3834
3835 html_blank(hd->html_data, 1);
3836 HD(hd->html_data)->wrapstate = old_wrap;
3837 }
3838 }
3839
3840 return(0); /* don't get linked */
3841 }
3842
3843
3844 /*
3845 * HTML <P> (paragraph) element handler
3846 */
3847 int
html_p(HANDLER_S * hd,int ch,int cmd)3848 html_p(HANDLER_S *hd, int ch, int cmd)
3849 {
3850 if(cmd == GF_DATA){
3851 html_handoff(hd, ch);
3852 }
3853 else if(cmd == GF_RESET){
3854 if(PASS_HTML(hd->html_data)){
3855 html_output_raw_tag(hd->html_data, "p");
3856 }
3857 else{
3858 /* Make sure there's at least 1 blank line */
3859 html_blank(hd->html_data, 1);
3860
3861 /* adjust indent level if needed */
3862 if(HD(hd->html_data)->li_pending){
3863 html_indent(hd->html_data, 4, HTML_ID_INC);
3864 HD(hd->html_data)->li_pending = 0;
3865 }
3866 }
3867 }
3868 else if(cmd == GF_EOD){
3869 if(PASS_HTML(hd->html_data)){
3870 html_output_string(hd->html_data, "</p>");
3871 }
3872 else{
3873 /* Make sure there's at least 1 blank line */
3874 html_blank(hd->html_data, 1);
3875 }
3876 }
3877
3878 return(1); /* GET linked */
3879 }
3880
3881
3882 /*
3883 * HTML Table <TABLE> (paragraph) table row
3884 */
3885 int
html_table(HANDLER_S * hd,int ch,int cmd)3886 html_table(HANDLER_S *hd, int ch, int cmd)
3887 {
3888 if(cmd == GF_DATA){
3889 if(PASS_HTML(hd->html_data)){
3890 html_handoff(hd, ch);
3891 }
3892 }
3893 else if(cmd == GF_RESET){
3894 if(PASS_HTML(hd->html_data)){
3895 html_output_raw_tag(hd->html_data, "table");
3896 }
3897 else
3898 /* Make sure there's at least 1 blank line */
3899 html_blank(hd->html_data, 0);
3900 }
3901 else if(cmd == GF_EOD){
3902 if(PASS_HTML(hd->html_data)){
3903 html_output_string(hd->html_data, "</table>");
3904 }
3905 else
3906 /* Make sure there's at least 1 blank line */
3907 html_blank(hd->html_data, 0);
3908 }
3909 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3910 }
3911
3912
3913 /*
3914 * HTML <CAPTION> (Table Caption) element handler
3915 */
3916 int
html_caption(HANDLER_S * hd,int ch,int cmd)3917 html_caption(HANDLER_S *hd, int ch, int cmd)
3918 {
3919 if(cmd == GF_DATA){
3920 html_handoff(hd, ch);
3921 }
3922 else if(cmd == GF_RESET){
3923 if(PASS_HTML(hd->html_data)){
3924 html_output_raw_tag(hd->html_data, "caption");
3925 }
3926 else{
3927 /* turn ON the centered bit */
3928 CENTER_BIT(hd->html_data) = 1;
3929 }
3930 }
3931 else if(cmd == GF_EOD){
3932 if(PASS_HTML(hd->html_data)){
3933 html_output_string(hd->html_data, "</caption>");
3934 }
3935 else{
3936 /* turn OFF the centered bit */
3937 CENTER_BIT(hd->html_data) = 0;
3938 }
3939 }
3940
3941 return(1);
3942 }
3943
3944
3945 /*
3946 * HTML Table <TR> (paragraph) table row
3947 */
3948 int
html_tr(HANDLER_S * hd,int ch,int cmd)3949 html_tr(HANDLER_S *hd, int ch, int cmd)
3950 {
3951 if(cmd == GF_DATA){
3952 if(PASS_HTML(hd->html_data)){
3953 html_handoff(hd, ch);
3954 }
3955 }
3956 else if(cmd == GF_RESET){
3957 if(PASS_HTML(hd->html_data)){
3958 html_output_raw_tag(hd->html_data, "tr");
3959 }
3960 else
3961 /* Make sure there's at least 1 blank line */
3962 html_blank(hd->html_data, 0);
3963 }
3964 else if(cmd == GF_EOD){
3965 if(PASS_HTML(hd->html_data)){
3966 html_output_string(hd->html_data, "</tr>");
3967 }
3968 else
3969 /* Make sure there's at least 1 blank line */
3970 html_blank(hd->html_data, 0);
3971 }
3972 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3973 }
3974
3975
3976 /*
3977 * HTML Table <TD> (paragraph) table data
3978 */
3979 int
html_td(HANDLER_S * hd,int ch,int cmd)3980 html_td(HANDLER_S *hd, int ch, int cmd)
3981 {
3982 if(cmd == GF_DATA){
3983 if(PASS_HTML(hd->html_data)){
3984 html_handoff(hd, ch);
3985 }
3986 }
3987 else if(cmd == GF_RESET){
3988 if(PASS_HTML(hd->html_data)){
3989 html_output_raw_tag(hd->html_data, "td");
3990 }
3991 else{
3992 PARAMETER *p;
3993
3994 for(p = HD(hd->html_data)->el_data->attribs;
3995 p && p->attribute;
3996 p = p->next)
3997 if(!strucmp(p->attribute, "nowrap")
3998 && (hd->html_data->f2 || hd->html_data->n)){
3999 HTML_DUMP_LIT(hd->html_data, " | ", 3);
4000 break;
4001 }
4002 }
4003 }
4004 else if(cmd == GF_EOD){
4005 if(PASS_HTML(hd->html_data)){
4006 html_output_string(hd->html_data, "</td>");
4007 }
4008 }
4009
4010 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4011 }
4012
4013
4014 /*
4015 * HTML Table <TH> (paragraph) table head
4016 */
4017 int
html_th(HANDLER_S * hd,int ch,int cmd)4018 html_th(HANDLER_S *hd, int ch, int cmd)
4019 {
4020 if(cmd == GF_DATA){
4021 if(PASS_HTML(hd->html_data)){
4022 html_handoff(hd, ch);
4023 }
4024 }
4025 else if(cmd == GF_RESET){
4026 if(PASS_HTML(hd->html_data)){
4027 html_output_raw_tag(hd->html_data, "th");
4028 }
4029 else{
4030 PARAMETER *p;
4031
4032 for(p = HD(hd->html_data)->el_data->attribs;
4033 p && p->attribute;
4034 p = p->next)
4035 if(!strucmp(p->attribute, "nowrap")
4036 && (hd->html_data->f2 || hd->html_data->n)){
4037 HTML_DUMP_LIT(hd->html_data, " | ", 3);
4038 break;
4039 }
4040 }
4041 }
4042 else if(cmd == GF_EOD){
4043 if(PASS_HTML(hd->html_data)){
4044 html_output_string(hd->html_data, "</th>");
4045 }
4046 }
4047
4048 return(PASS_HTML(hd->html_data)); /* don't get linked */
4049 }
4050
4051
4052 /*
4053 * HTML Table <THEAD> table head
4054 */
4055 int
html_thead(HANDLER_S * hd,int ch,int cmd)4056 html_thead(HANDLER_S *hd, int ch, int cmd)
4057 {
4058 if(PASS_HTML(hd->html_data)){
4059 if(cmd == GF_DATA){
4060 html_handoff(hd, ch);
4061 }
4062 else if(cmd == GF_RESET){
4063 html_output_raw_tag(hd->html_data, "thead");
4064 }
4065 else if(cmd == GF_EOD){
4066 html_output_string(hd->html_data, "</thead>");
4067 }
4068
4069 return(1); /* GET linked */
4070 }
4071
4072 return(0); /* don't get linked */
4073 }
4074
4075
4076 /*
4077 * HTML Table <TBODY> table body
4078 */
4079 int
html_tbody(HANDLER_S * hd,int ch,int cmd)4080 html_tbody(HANDLER_S *hd, int ch, int cmd)
4081 {
4082 if(PASS_HTML(hd->html_data)){
4083 if(cmd == GF_DATA){
4084 html_handoff(hd, ch);
4085 }
4086 else if(cmd == GF_RESET){
4087 html_output_raw_tag(hd->html_data, "tbody");
4088 }
4089 else if(cmd == GF_EOD){
4090 html_output_string(hd->html_data, "</tbody>");
4091 }
4092
4093 return(1); /* GET linked */
4094 }
4095
4096 return(0); /* don't get linked */
4097 }
4098
4099
4100 /*
4101 * HTML Table <TFOOT> table body
4102 */
4103 int
html_tfoot(HANDLER_S * hd,int ch,int cmd)4104 html_tfoot(HANDLER_S *hd, int ch, int cmd)
4105 {
4106 if(PASS_HTML(hd->html_data)){
4107 if(cmd == GF_DATA){
4108 html_handoff(hd, ch);
4109 }
4110 else if(cmd == GF_RESET){
4111 html_output_raw_tag(hd->html_data, "tfoot");
4112 }
4113 else if(cmd == GF_EOD){
4114 html_output_string(hd->html_data, "</tfoot>");
4115 }
4116
4117 return(1); /* GET linked */
4118 }
4119
4120 return(0); /* don't get linked */
4121 }
4122
4123
4124 /*
4125 * HTML <COL> (Table Column Attributes) element handler
4126 */
4127 int
html_col(HANDLER_S * hd,int ch,int cmd)4128 html_col(HANDLER_S *hd, int ch, int cmd)
4129 {
4130 if(cmd == GF_RESET){
4131 if(PASS_HTML(hd->html_data)){
4132 html_output_raw_tag(hd->html_data, "col");
4133 }
4134 }
4135
4136 return(0); /* don't get linked */
4137 }
4138
4139
4140 /*
4141 * HTML Table <COLGROUP> table body
4142 */
4143 int
html_colgroup(HANDLER_S * hd,int ch,int cmd)4144 html_colgroup(HANDLER_S *hd, int ch, int cmd)
4145 {
4146 if(PASS_HTML(hd->html_data)){
4147 if(cmd == GF_DATA){
4148 html_handoff(hd, ch);
4149 }
4150 else if(cmd == GF_RESET){
4151 html_output_raw_tag(hd->html_data, "colgroup");
4152 }
4153 else if(cmd == GF_EOD){
4154 html_output_string(hd->html_data, "</colgroup>");
4155 }
4156
4157 return(1); /* GET linked */
4158 }
4159
4160 return(0); /* don't get linked */
4161 }
4162
4163
4164 /*
4165 * HTML <I> (italic text) element handler
4166 */
4167 int
html_i(HANDLER_S * hd,int ch,int cmd)4168 html_i(HANDLER_S *hd, int ch, int cmd)
4169 {
4170 if(cmd == GF_DATA){
4171 /* include LITERAL in spaceness test! */
4172 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4173 HTML_ITALIC(hd->html_data, 1);
4174 hd->x = 0;
4175 }
4176
4177 html_handoff(hd, ch);
4178 }
4179 else if(cmd == GF_RESET){
4180 hd->x = 1;
4181 }
4182 else if(cmd == GF_EOD){
4183 if(!hd->x)
4184 HTML_ITALIC(hd->html_data, 0);
4185 }
4186
4187 return(1); /* get linked */
4188 }
4189
4190
4191 /*
4192 * HTML <EM> element handler
4193 */
4194 int
html_em(HANDLER_S * hd,int ch,int cmd)4195 html_em(HANDLER_S *hd, int ch, int cmd)
4196 {
4197 if(cmd == GF_DATA){
4198 if(!PASS_HTML(hd->html_data)){
4199 /* include LITERAL in spaceness test! */
4200 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4201 HTML_ITALIC(hd->html_data, 1);
4202 hd->x = 0;
4203 }
4204 }
4205
4206 html_handoff(hd, ch);
4207 }
4208 else if(cmd == GF_RESET){
4209 if(PASS_HTML(hd->html_data)){
4210 html_output_raw_tag(hd->html_data, "em");
4211 }
4212 else{
4213 hd->x = 1;
4214 }
4215 }
4216 else if(cmd == GF_EOD){
4217 if(PASS_HTML(hd->html_data)){
4218 html_output_string(hd->html_data, "</em>");
4219 }
4220 else{
4221 if(!hd->x)
4222 HTML_ITALIC(hd->html_data, 0);
4223 }
4224 }
4225
4226 return(1); /* get linked */
4227 }
4228
4229
4230 /*
4231 * HTML <STRONG> element handler
4232 */
4233 int
html_strong(HANDLER_S * hd,int ch,int cmd)4234 html_strong(HANDLER_S *hd, int ch, int cmd)
4235 {
4236 if(cmd == GF_DATA){
4237 if(!PASS_HTML(hd->html_data)){
4238 /* include LITERAL in spaceness test! */
4239 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4240 HTML_ITALIC(hd->html_data, 1);
4241 hd->x = 0;
4242 }
4243 }
4244
4245 html_handoff(hd, ch);
4246 }
4247 else if(cmd == GF_RESET){
4248 if(PASS_HTML(hd->html_data)){
4249 html_output_raw_tag(hd->html_data, "strong");
4250 }
4251 else{
4252 hd->x = 1;
4253 }
4254 }
4255 else if(cmd == GF_EOD){
4256 if(PASS_HTML(hd->html_data)){
4257 html_output_string(hd->html_data, "</strong>");
4258 }
4259 else{
4260 if(!hd->x)
4261 HTML_ITALIC(hd->html_data, 0);
4262 }
4263 }
4264
4265 return(1); /* get linked */
4266 }
4267
4268
4269 /*
4270 * HTML <u> (Underline text) element handler
4271 */
4272 int
html_u(HANDLER_S * hd,int ch,int cmd)4273 html_u(HANDLER_S *hd, int ch, int cmd)
4274 {
4275 if(PASS_HTML(hd->html_data)){
4276 if(cmd == GF_DATA){
4277 html_handoff(hd, ch);
4278 }
4279 else if(cmd == GF_RESET){
4280 html_output_raw_tag(hd->html_data, "u");
4281 }
4282 else if(cmd == GF_EOD){
4283 html_output_string(hd->html_data, "</u>");
4284 }
4285
4286 return(1); /* get linked */
4287 }
4288
4289 return(0); /* do NOT get linked */
4290 }
4291
4292
4293 /*
4294 * HTML <b> (Bold text) element handler
4295 */
4296 int
html_b(HANDLER_S * hd,int ch,int cmd)4297 html_b(HANDLER_S *hd, int ch, int cmd)
4298 {
4299 if(cmd == GF_DATA){
4300 if(!PASS_HTML(hd->html_data)){
4301 /* include LITERAL in spaceness test! */
4302 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4303 HTML_BOLD(hd->html_data, 1);
4304 hd->x = 0;
4305 }
4306 }
4307
4308 html_handoff(hd, ch);
4309 }
4310 else if(cmd == GF_RESET){
4311 if(PASS_HTML(hd->html_data)){
4312 html_output_raw_tag(hd->html_data, "b");
4313 }
4314 else{
4315 hd->x = 1;
4316 }
4317 }
4318 else if(cmd == GF_EOD){
4319 if(PASS_HTML(hd->html_data)){
4320 html_output_string(hd->html_data, "</b>");
4321 }
4322 else{
4323 if(!hd->x)
4324 HTML_BOLD(hd->html_data, 0);
4325 }
4326 }
4327
4328 return(1); /* get linked */
4329 }
4330
4331
4332 /*
4333 * HTML <s> (strike-through text) element handler
4334 */
4335 int
html_s(HANDLER_S * hd,int ch,int cmd)4336 html_s(HANDLER_S *hd, int ch, int cmd)
4337 {
4338 if(cmd == GF_DATA){
4339 if(!PASS_HTML(hd->html_data)){
4340 /* include LITERAL in spaceness test! */
4341 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4342 HTML_STRIKE(hd->html_data, 1);
4343 hd->x = 0;
4344 }
4345 }
4346
4347 html_handoff(hd, ch);
4348 }
4349 else if(cmd == GF_RESET){
4350 if(PASS_HTML(hd->html_data)){
4351 html_output_raw_tag(hd->html_data, "s");
4352 }
4353 else{
4354 hd->x = 1;
4355 }
4356 }
4357 else if(cmd == GF_EOD){
4358 if(PASS_HTML(hd->html_data)){
4359 html_output_string(hd->html_data, "</s>");
4360 }
4361 else{
4362 if(!hd->x)
4363 HTML_STRIKE(hd->html_data, 0);
4364 }
4365 }
4366
4367 return(1); /* get linked */
4368 }
4369
4370
4371 /*
4372 * HTML <big> (BIG text) element handler
4373 */
4374 int
html_big(HANDLER_S * hd,int ch,int cmd)4375 html_big(HANDLER_S *hd, int ch, int cmd)
4376 {
4377 if(cmd == GF_DATA){
4378 /* include LITERAL in spaceness test! */
4379 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4380 HTML_BIG(hd->html_data, 1);
4381 hd->x = 0;
4382 }
4383
4384 html_handoff(hd, ch);
4385 }
4386 else if(cmd == GF_RESET){
4387 hd->x = 1;
4388 }
4389 else if(cmd == GF_EOD){
4390 if(!hd->x)
4391 HTML_BIG(hd->html_data, 0);
4392 }
4393
4394 return(1); /* get linked */
4395 }
4396
4397
4398 /*
4399 * HTML <small> (SMALL text) element handler
4400 */
4401 int
html_small(HANDLER_S * hd,int ch,int cmd)4402 html_small(HANDLER_S *hd, int ch, int cmd)
4403 {
4404 if(cmd == GF_DATA){
4405 /* include LITERAL in spaceness test! */
4406 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4407 HTML_SMALL(hd->html_data, 1);
4408 hd->x = 0;
4409 }
4410
4411 html_handoff(hd, ch);
4412 }
4413 else if(cmd == GF_RESET){
4414 hd->x = 1;
4415 }
4416 else if(cmd == GF_EOD){
4417 if(!hd->x)
4418 HTML_SMALL(hd->html_data, 0);
4419 }
4420
4421 return(1); /* get linked */
4422 }
4423
4424
4425 /*
4426 * HTML <FONT> element handler
4427 */
4428 int
html_font(HANDLER_S * hd,int ch,int cmd)4429 html_font(HANDLER_S *hd, int ch, int cmd)
4430 {
4431 if(PASS_HTML(hd->html_data)){
4432 if(cmd == GF_DATA){
4433 html_handoff(hd, ch);
4434 }
4435 else if(cmd == GF_RESET){
4436 html_output_raw_tag(hd->html_data, "font");
4437 }
4438 else if(cmd == GF_EOD){
4439 html_output_string(hd->html_data, "</font>");
4440 }
4441
4442 return(1); /* get linked */
4443 }
4444
4445 return(0);
4446 }
4447
4448
4449 /*
4450 * HTML <IMG> element handler
4451 */
4452 int
html_img(HANDLER_S * hd,int ch,int cmd)4453 html_img(HANDLER_S *hd, int ch, int cmd)
4454 {
4455 PARAMETER *p;
4456 char *alt = NULL, *src = NULL, *s;
4457
4458 if(cmd == GF_RESET){
4459 if(PASS_HTML(hd->html_data)){
4460 html_output_raw_tag(hd->html_data, "img");
4461 }
4462 else{
4463 for(p = HD(hd->html_data)->el_data->attribs;
4464 p && p->attribute;
4465 p = p->next)
4466 if(p->value && p->value[0]){
4467 if(!strucmp(p->attribute, "alt"))
4468 alt = p->value;
4469 if(!strucmp(p->attribute, "src"))
4470 src = p->value;
4471 }
4472
4473 /*
4474 * Multipart/Related Content ID pointer
4475 * ONLY attached messages are recognized
4476 * if we ever decide web bugs aren't a problem
4477 * anymore then we might expand the scope
4478 */
4479 if(src
4480 && DO_HANDLES(hd->html_data)
4481 && RELATED_OK(hd->html_data)
4482 && struncmp(src, "cid:", 4) == 0){
4483 char buf[32];
4484 int i, n;
4485 HANDLE_S *h = new_handle(HANDLESP(hd->html_data));
4486
4487 h->type = IMG;
4488 h->h.img.src = cpystr(src + 4);
4489 h->h.img.alt = cpystr((alt) ? alt : "Attached Image");
4490
4491 HTML_TEXT(hd->html_data, TAG_EMBED);
4492 HTML_TEXT(hd->html_data, TAG_HANDLE);
4493
4494 sprintf(buf, "%d", h->key);
4495 n = strlen(buf);
4496 HTML_TEXT(hd->html_data, n);
4497 for(i = 0; i < n; i++){
4498 unsigned int uic = buf[i];
4499 HTML_TEXT(hd->html_data, uic);
4500 }
4501
4502 return(0);
4503 }
4504 else if(alt && strlen(alt) < 256){ /* arbitrary "reasonable" limit */
4505 HTML_DUMP_LIT(hd->html_data, alt, strlen(alt));
4506 HTML_TEXT(hd->html_data, ' ');
4507 return(0);
4508 }
4509 else if(src
4510 && (s = strrindex(src, '/'))
4511 && *++s != '\0'){
4512 HTML_TEXT(hd->html_data, '[');
4513 HTML_DUMP_LIT(hd->html_data, s, strlen(s));
4514 HTML_TEXT(hd->html_data, ']');
4515 HTML_TEXT(hd->html_data, ' ');
4516 return(0);
4517 }
4518
4519 /* text filler of last resort */
4520 HTML_DUMP_LIT(hd->html_data, "[IMAGE] ", 7);
4521 }
4522 }
4523
4524 return(0); /* don't get linked */
4525 }
4526
4527
4528 /*
4529 * HTML <MAP> (Image Map) element handler
4530 */
4531 int
html_map(HANDLER_S * hd,int ch,int cmd)4532 html_map(HANDLER_S *hd, int ch, int cmd)
4533 {
4534 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4535 if(cmd == GF_DATA){
4536 html_handoff(hd, ch);
4537 }
4538 else if(cmd == GF_RESET){
4539 html_output_raw_tag(hd->html_data, "map");
4540 }
4541 else if(cmd == GF_EOD){
4542 html_output_string(hd->html_data, "</map>");
4543 }
4544
4545 return(1);
4546 }
4547
4548 return(0);
4549 }
4550
4551
4552 /*
4553 * HTML <AREA> (Image Map Area) element handler
4554 */
4555 int
html_area(HANDLER_S * hd,int ch,int cmd)4556 html_area(HANDLER_S *hd, int ch, int cmd)
4557 {
4558 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4559 if(cmd == GF_DATA){
4560 html_handoff(hd, ch);
4561 }
4562 else if(cmd == GF_RESET){
4563 html_output_raw_tag(hd->html_data, "area");
4564 }
4565 else if(cmd == GF_EOD){
4566 html_output_string(hd->html_data, "</area>");
4567 }
4568
4569 return(1);
4570 }
4571
4572 return(0);
4573 }
4574
4575
4576 /*
4577 * HTML <FORM> (Form) element handler
4578 */
4579 int
html_form(HANDLER_S * hd,int ch,int cmd)4580 html_form(HANDLER_S *hd, int ch, int cmd)
4581 {
4582 if(PASS_HTML(hd->html_data)){
4583 if(cmd == GF_DATA){
4584 html_handoff(hd, ch);
4585 }
4586 else if(cmd == GF_RESET){
4587 PARAMETER **pp;
4588
4589 /* SECURITY: make sure to redirect to new browser instance */
4590 for(pp = &(HD(hd->html_data)->el_data->attribs);
4591 *pp && (*pp)->attribute;
4592 pp = &(*pp)->next)
4593 if(!strucmp((*pp)->attribute, "target")){
4594 if((*pp)->value)
4595 fs_give((void **) &(*pp)->value);
4596
4597 (*pp)->value = cpystr("_blank");
4598 }
4599
4600 if(!*pp){
4601 *pp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4602 memset(*pp, 0, sizeof(PARAMETER));
4603 (*pp)->attribute = cpystr("target");
4604 (*pp)->value = cpystr("_blank");
4605 }
4606
4607 html_output_raw_tag(hd->html_data, "form");
4608 }
4609 else if(cmd == GF_EOD){
4610 html_output_string(hd->html_data, "</form>");
4611 }
4612 }
4613 else{
4614 if(cmd == GF_RESET){
4615 html_blank(hd->html_data, 0);
4616 HTML_DUMP_LIT(hd->html_data, "[FORM]", 6);
4617 html_blank(hd->html_data, 0);
4618 }
4619 }
4620
4621 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4622 }
4623
4624
4625 /*
4626 * HTML <INPUT> (Form) element handler
4627 */
4628 int
html_input(HANDLER_S * hd,int ch,int cmd)4629 html_input(HANDLER_S *hd, int ch, int cmd)
4630 {
4631 if(PASS_HTML(hd->html_data)){
4632 if(cmd == GF_RESET){
4633 html_output_raw_tag(hd->html_data, "input");
4634 }
4635 }
4636
4637 return(0); /* don't get linked */
4638 }
4639
4640
4641 /*
4642 * HTML <BUTTON> (Form) element handler
4643 */
4644 int
html_button(HANDLER_S * hd,int ch,int cmd)4645 html_button(HANDLER_S *hd, int ch, int cmd)
4646 {
4647 if(PASS_HTML(hd->html_data)){
4648 if(cmd == GF_DATA){
4649 html_handoff(hd, ch);
4650 }
4651 else if(cmd == GF_RESET){
4652 html_output_raw_tag(hd->html_data, "button");
4653 }
4654 else if(cmd == GF_EOD){
4655 html_output_string(hd->html_data, "</button>");
4656 }
4657
4658 return(1); /* get linked */
4659 }
4660
4661 return(0);
4662 }
4663
4664
4665 /*
4666 * HTML <OPTION> (Form) element handler
4667 */
4668 int
html_option(HANDLER_S * hd,int ch,int cmd)4669 html_option(HANDLER_S *hd, int ch, int cmd)
4670 {
4671 if(PASS_HTML(hd->html_data)){
4672 if(cmd == GF_DATA){
4673 html_handoff(hd, ch);
4674 }
4675 else if(cmd == GF_RESET){
4676 html_output_raw_tag(hd->html_data, "option");
4677 }
4678 else if(cmd == GF_EOD){
4679 html_output_string(hd->html_data, "</option>");
4680 }
4681
4682 return(1); /* get linked */
4683 }
4684
4685 return(0);
4686 }
4687
4688
4689 /*
4690 * HTML <OPTGROUP> (Form) element handler
4691 */
4692 int
html_optgroup(HANDLER_S * hd,int ch,int cmd)4693 html_optgroup(HANDLER_S *hd, int ch, int cmd)
4694 {
4695 if(PASS_HTML(hd->html_data)){
4696 if(cmd == GF_DATA){
4697 html_handoff(hd, ch);
4698 }
4699 else if(cmd == GF_RESET){
4700 html_output_raw_tag(hd->html_data, "optgroup");
4701 }
4702 else if(cmd == GF_EOD){
4703 html_output_string(hd->html_data, "</optgroup>");
4704 }
4705
4706 return(1); /* get linked */
4707 }
4708
4709 return(0);
4710 }
4711
4712
4713 /*
4714 * HTML <SELECT> (Form) element handler
4715 */
4716 int
html_select(HANDLER_S * hd,int ch,int cmd)4717 html_select(HANDLER_S *hd, int ch, int cmd)
4718 {
4719 if(PASS_HTML(hd->html_data)){
4720 if(cmd == GF_DATA){
4721 html_handoff(hd, ch);
4722 }
4723 else if(cmd == GF_RESET){
4724 html_output_raw_tag(hd->html_data, "select");
4725 }
4726 else if(cmd == GF_EOD){
4727 html_output_string(hd->html_data, "</select>");
4728 }
4729
4730 return(1); /* get linked */
4731 }
4732
4733 return(0);
4734 }
4735
4736
4737 /*
4738 * HTML <TEXTAREA> (Form) element handler
4739 */
4740 int
html_textarea(HANDLER_S * hd,int ch,int cmd)4741 html_textarea(HANDLER_S *hd, int ch, int cmd)
4742 {
4743 if(PASS_HTML(hd->html_data)){
4744 if(cmd == GF_DATA){
4745 html_handoff(hd, ch);
4746 }
4747 else if(cmd == GF_RESET){
4748 html_output_raw_tag(hd->html_data, "textarea");
4749 }
4750 else if(cmd == GF_EOD){
4751 html_output_string(hd->html_data, "</textarea>");
4752 }
4753
4754 return(1); /* get linked */
4755 }
4756
4757 return(0);
4758 }
4759
4760
4761 /*
4762 * HTML <LABEL> (Form) element handler
4763 */
4764 int
html_label(HANDLER_S * hd,int ch,int cmd)4765 html_label(HANDLER_S *hd, int ch, int cmd)
4766 {
4767 if(PASS_HTML(hd->html_data)){
4768 if(cmd == GF_DATA){
4769 html_handoff(hd, ch);
4770 }
4771 else if(cmd == GF_RESET){
4772 html_output_raw_tag(hd->html_data, "label");
4773 }
4774 else if(cmd == GF_EOD){
4775 html_output_string(hd->html_data, "</label>");
4776 }
4777
4778 return(1); /* get linked */
4779 }
4780
4781 return(0);
4782 }
4783
4784
4785 /*
4786 * HTML <FIELDSET> (Form) element handler
4787 */
4788 int
html_fieldset(HANDLER_S * hd,int ch,int cmd)4789 html_fieldset(HANDLER_S *hd, int ch, int cmd)
4790 {
4791 if(PASS_HTML(hd->html_data)){
4792 if(cmd == GF_DATA){
4793 html_handoff(hd, ch);
4794 }
4795 else if(cmd == GF_RESET){
4796 html_output_raw_tag(hd->html_data, "fieldset");
4797 }
4798 else if(cmd == GF_EOD){
4799 html_output_string(hd->html_data, "</fieldset>");
4800 }
4801
4802 return(1); /* get linked */
4803 }
4804
4805 return(0);
4806 }
4807
4808
4809 /*
4810 * HTML <HEAD> element handler
4811 */
4812 int
html_head(HANDLER_S * hd,int ch,int cmd)4813 html_head(HANDLER_S *hd, int ch, int cmd)
4814 {
4815 if(cmd == GF_DATA){
4816 html_handoff(hd, ch);
4817 }
4818 else if(cmd == GF_RESET){
4819 HD(hd->html_data)->head = 1;
4820 }
4821 else if(cmd == GF_EOD){
4822 HD(hd->html_data)->head = 0;
4823 }
4824
4825 return(1); /* get linked */
4826 }
4827
4828
4829 /*
4830 * HTML <BASE> element handler
4831 */
4832 int
html_base(HANDLER_S * hd,int ch,int cmd)4833 html_base(HANDLER_S *hd, int ch, int cmd)
4834 {
4835 if(cmd == GF_RESET){
4836 if(HD(hd->html_data)->head && !HTML_BASE(hd->html_data)){
4837 PARAMETER *p;
4838
4839 for(p = HD(hd->html_data)->el_data->attribs;
4840 p && p->attribute && strucmp(p->attribute, "HREF");
4841 p = p->next)
4842 ;
4843
4844 if(p && p->value && !((HTML_OPT_S *)(hd->html_data)->opt)->base)
4845 ((HTML_OPT_S *)(hd->html_data)->opt)->base = cpystr(p->value);
4846 }
4847 }
4848
4849 return(0); /* DON'T get linked */
4850 }
4851
4852
4853 /*
4854 * HTML <TITLE> element handler
4855 */
4856 int
html_title(HANDLER_S * hd,int ch,int cmd)4857 html_title(HANDLER_S *hd, int ch, int cmd)
4858 {
4859 if(cmd == GF_DATA){
4860 if(hd->x + 1 >= hd->y){
4861 hd->y += 80;
4862 fs_resize((void **)&hd->s, (size_t)hd->y * sizeof(unsigned char));
4863 }
4864
4865 hd->s[hd->x++] = (unsigned char) ch;
4866 }
4867 else if(cmd == GF_RESET){
4868 hd->x = 0L;
4869 hd->y = 80L;
4870 hd->s = (unsigned char *)fs_get((size_t)hd->y * sizeof(unsigned char));
4871 }
4872 else if(cmd == GF_EOD){
4873 /* Down the road we probably want to give these bytes to
4874 * someone...
4875 */
4876 hd->s[hd->x] = '\0';
4877 fs_give((void **)&hd->s);
4878 }
4879
4880 return(1); /* get linked */
4881 }
4882
4883
4884 /*
4885 * HTML <BODY> element handler
4886 */
4887 int
html_body(HANDLER_S * hd,int ch,int cmd)4888 html_body(HANDLER_S *hd, int ch, int cmd)
4889 {
4890 if(cmd == GF_DATA){
4891 html_handoff(hd, ch);
4892 }
4893 else if(cmd == GF_RESET){
4894 if(PASS_HTML(hd->html_data)){
4895 PARAMETER *p, *tp;
4896 char **style = NULL, *text = NULL, *bgcolor = NULL, *pcs;
4897
4898 /* modify any attributes in a useful way? */
4899 for(p = HD(hd->html_data)->el_data->attribs;
4900 p && p->attribute;
4901 p = p->next)
4902 if(p->value){
4903 if(!strucmp(p->attribute, "style"))
4904 style = &p->value;
4905 else if(!strucmp(p->attribute, "text"))
4906 text = p->value;
4907 /*
4908 * bgcolor NOT passed since user setting takes precedence
4909 *
4910 else if(!strucmp(p->attribute, "bgcolor"))
4911 bgcolor = p->value;
4912 */
4913 }
4914
4915 /* colors pretty much it */
4916 if(text || bgcolor){
4917 if(!style){
4918 tp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4919 memset(tp, 0, sizeof(PARAMETER));
4920 tp->next = HD(hd->html_data)->el_data->attribs;
4921 HD(hd->html_data)->el_data->attribs = tp;
4922 tp->attribute = cpystr("style");
4923
4924 tmp_20k_buf[0] = '\0';
4925 style = &tp->value;
4926 pcs = "%s%s%s%s%s";
4927 }
4928 else{
4929 snprintf(tmp_20k_buf, SIZEOF_20KBUF, "%s", *style);
4930 fs_give((void **) style);
4931 pcs = "; %s%s%s%s%s";
4932 }
4933
4934 snprintf(tmp_20k_buf + strlen(tmp_20k_buf),
4935 SIZEOF_20KBUF - strlen(tmp_20k_buf),
4936 pcs,
4937 (text) ? "color: " : "", (text) ? text : "",
4938 (text && bgcolor) ? ";" : "",
4939 (bgcolor) ? "background-color: " : "", (bgcolor) ? bgcolor : "");
4940 *style = cpystr(tmp_20k_buf);
4941 }
4942
4943 html_output_raw_tag(hd->html_data, "div");
4944 }
4945
4946 HD(hd->html_data)->body = 1;
4947 }
4948 else if(cmd == GF_EOD){
4949 if(PASS_HTML(hd->html_data)){
4950 html_output_string(hd->html_data, "</div>");
4951 }
4952
4953 HD(hd->html_data)->body = 0;
4954 }
4955
4956 return(1); /* get linked */
4957 }
4958
4959
4960 /*
4961 * HTML <A> (Anchor) element handler
4962 */
4963 int
html_a(HANDLER_S * hd,int ch,int cmd)4964 html_a(HANDLER_S *hd, int ch, int cmd)
4965 {
4966 if(cmd == GF_DATA){
4967 html_handoff(hd, ch);
4968
4969 if(hd->dp) /* remember text within anchor tags */
4970 so_writec(ch, (STORE_S *) hd->dp);
4971 }
4972 else if(cmd == GF_RESET){
4973 int i, n, x;
4974 char buf[256];
4975 HANDLE_S *h;
4976 PARAMETER *p, *href = NULL, *name = NULL;
4977
4978 /*
4979 * Pending Anchor!?!?
4980 * space insertion/line breaking that's yet to get done...
4981 */
4982 if(HD(hd->html_data)->prefix){
4983 dprint((2, "-- html error: nested or unterminated anchor\n"));
4984 html_a_finish(hd);
4985 }
4986
4987 /*
4988 * Look for valid Anchor data vis the filter installer's parms
4989 * (e.g., Only allow references to our internal URLs if asked)
4990 */
4991 for(p = HD(hd->html_data)->el_data->attribs;
4992 p && p->attribute;
4993 p = p->next)
4994 if(!strucmp(p->attribute, "HREF")
4995 && p->value
4996 && (HANDLES_LOC(hd->html_data)
4997 || struncmp(p->value, "x-alpine-", 9)
4998 || struncmp(p->value, "x-pine-help", 11)
4999 || p->value[0] == '#'))
5000 href = p;
5001 else if(!strucmp(p->attribute, "NAME"))
5002 name = p;
5003
5004 if(DO_HANDLES(hd->html_data) && (href || name)){
5005 h = new_handle(HANDLESP(hd->html_data));
5006
5007 /*
5008 * Enhancement: we might want to get fancier and parse the
5009 * href a bit further such that we can launch images using
5010 * our image viewer, or browse local files or directories
5011 * with our internal tools. Of course, having the jump-off
5012 * point into text/html always be the defined "web-browser",
5013 * just might be the least confusing UI-wise...
5014 */
5015 h->type = URL;
5016
5017 if(name && name->value)
5018 h->h.url.name = cpystr(name->value);
5019
5020 /*
5021 * Prepare to build embedded prefix...
5022 */
5023 HD(hd->html_data)->prefix = (int *) fs_get(64 * sizeof(int));
5024 x = 0;
5025
5026 /*
5027 * Is this something that looks like a URL? If not and
5028 * we were giving some "base" string, proceed ala RFC1808...
5029 */
5030 if(href){
5031 if(href->value)
5032 removing_leading_and_trailing_white_space(href->value);
5033 if(HTML_BASE(hd->html_data) && !rfc1738_scan(href->value, &n)){
5034 html_a_relative(HTML_BASE(hd->html_data), href->value, h);
5035 }
5036 else if(!(NO_RELATIVE(hd->html_data) && html_href_relative(href->value)))
5037 h->h.url.path = cpystr(href->value);
5038
5039 if(pico_usingcolor()){
5040 char *fg = NULL, *bg = NULL, *q;
5041
5042 if(ps_global->VAR_SLCTBL_FORE_COLOR
5043 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5044 ps_global->VAR_NORM_FORE_COLOR))
5045 fg = ps_global->VAR_SLCTBL_FORE_COLOR;
5046
5047 if(ps_global->VAR_SLCTBL_BACK_COLOR
5048 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5049 ps_global->VAR_NORM_BACK_COLOR))
5050 bg = ps_global->VAR_SLCTBL_BACK_COLOR;
5051
5052 if(fg || bg){
5053 COLOR_PAIR *tmp;
5054
5055 /*
5056 * The blacks are just known good colors for testing
5057 * whether the other color is good.
5058 */
5059 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5060 bg ? bg : colorx(COL_BLACK));
5061 if(pico_is_good_colorpair(tmp)){
5062 q = color_embed(fg, bg);
5063
5064 for(i = 0; q[i]; i++)
5065 HD(hd->html_data)->prefix[x++] = q[i];
5066 }
5067
5068 if(tmp)
5069 free_color_pair(&tmp);
5070 }
5071
5072 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5073 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5074 }
5075 else
5076 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5077 }
5078
5079 HD(hd->html_data)->prefix[x++] = TAG_EMBED;
5080 HD(hd->html_data)->prefix[x++] = TAG_HANDLE;
5081
5082 snprintf(buf, sizeof(buf), "%ld", hd->x = h->key);
5083 HD(hd->html_data)->prefix[x++] = n = strlen(buf);
5084 for(i = 0; i < n; i++)
5085 HD(hd->html_data)->prefix[x++] = buf[i];
5086
5087 HD(hd->html_data)->prefix_used = x;
5088
5089 hd->dp = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
5090 }
5091 }
5092 else if(cmd == GF_EOD){
5093 html_a_finish(hd);
5094 }
5095
5096 return(1); /* get linked */
5097 }
5098
5099
5100 void
html_a_prefix(FILTER_S * f)5101 html_a_prefix(FILTER_S *f)
5102 {
5103 int *prefix, n;
5104
5105 /* Do this so we don't visit from html_output... */
5106 prefix = HD(f)->prefix;
5107 HD(f)->prefix = NULL;
5108
5109 for(n = 0; n < HD(f)->prefix_used; n++)
5110 html_a_output_prefix(f, prefix[n]);
5111
5112 fs_give((void **) &prefix);
5113 }
5114
5115
5116 /*
5117 * html_a_finish - house keeping associated with end of link tag
5118 */
5119 void
html_a_finish(HANDLER_S * hd)5120 html_a_finish(HANDLER_S *hd)
5121 {
5122 if(DO_HANDLES(hd->html_data)){
5123 if(HD(hd->html_data)->prefix){
5124 if(!PASS_HTML(hd->html_data)){
5125 char *empty_link = "[LINK]";
5126 int i;
5127
5128 html_a_prefix(hd->html_data);
5129 for(i = 0; empty_link[i]; i++)
5130 html_output(hd->html_data, empty_link[i]);
5131 }
5132 }
5133
5134 if(pico_usingcolor()){
5135 char *fg = NULL, *bg = NULL, *p;
5136 int i;
5137
5138 if(ps_global->VAR_SLCTBL_FORE_COLOR
5139 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5140 ps_global->VAR_NORM_FORE_COLOR))
5141 fg = ps_global->VAR_NORM_FORE_COLOR;
5142
5143 if(ps_global->VAR_SLCTBL_BACK_COLOR
5144 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5145 ps_global->VAR_NORM_BACK_COLOR))
5146 bg = ps_global->VAR_NORM_BACK_COLOR;
5147
5148 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5149 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5150
5151 if(fg || bg){
5152 COLOR_PAIR *tmp;
5153
5154 /*
5155 * The blacks are just known good colors for testing
5156 * whether the other color is good.
5157 */
5158 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5159 bg ? bg : colorx(COL_BLACK));
5160 if(pico_is_good_colorpair(tmp)){
5161 p = color_embed(fg, bg);
5162
5163 for(i = 0; p[i]; i++)
5164 html_output(hd->html_data, p[i]);
5165 }
5166
5167 if(tmp)
5168 free_color_pair(&tmp);
5169 }
5170 }
5171 else
5172 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5173
5174 html_output(hd->html_data, TAG_EMBED);
5175 html_output(hd->html_data, TAG_HANDLEOFF);
5176
5177 html_a_output_info(hd);
5178 }
5179 }
5180
5181
5182 /*
5183 * html_output_a_prefix - dump Anchor prefix data
5184 */
5185 void
html_a_output_prefix(FILTER_S * f,int c)5186 html_a_output_prefix(FILTER_S *f, int c)
5187 {
5188 switch(c){
5189 case HTML_DOBOLD :
5190 HTML_BOLD(f, 1);
5191 break;
5192
5193 default :
5194 html_output(f, c);
5195 break;
5196 }
5197 }
5198
5199
5200
5201 /*
5202 * html_a_output_info - dump possibly deceptive link info into text.
5203 * phark the phishers.
5204 */
5205 void
html_a_output_info(HANDLER_S * hd)5206 html_a_output_info(HANDLER_S *hd)
5207 {
5208 int l, risky = 0, hl = 0, tl;
5209 char *url = NULL, *hn = NULL, *txt;
5210 HANDLE_S *h;
5211
5212 /* find host anchor references */
5213 if((h = get_handle(*HANDLESP(hd->html_data), (int) hd->x)) != NULL
5214 && h->h.url.path != NULL
5215 && (hn = rfc1738_scan(rfc1738_str(url = cpystr(h->h.url.path)), &l)) != NULL
5216 && (hn = srchstr(hn,"://")) != NULL){
5217
5218 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++)
5219 ;
5220 }
5221
5222 if(hn && hl){
5223 /*
5224 * look over anchor's text to see if there's a
5225 * mismatch between href target and url-ish
5226 * looking text. throw a red flag if so.
5227 * similarly, toss one if the target's referenced
5228 * by a
5229 */
5230 if(hd->dp){
5231 so_writec('\0', (STORE_S *) hd->dp);
5232
5233 if((txt = (char *) so_text((STORE_S *) hd->dp)) != NULL
5234 && (txt = rfc1738_scan(txt, &tl)) != NULL
5235 && (txt = srchstr(txt,"://")) != NULL){
5236
5237 for(txt += 3, tl = 0; txt[tl] && txt[tl] != '/' && txt[tl] != '?'; tl++)
5238 ;
5239
5240 if(tl != hl)
5241 risky++;
5242 else
5243 /* look for non matching text */
5244 for(l = 0; l < tl && l < hl; l++)
5245 if(tolower((unsigned char) txt[l]) != tolower((unsigned char) hn[l])){
5246 risky++;
5247 break;
5248 }
5249 }
5250
5251 so_give((STORE_S **) &hd->dp);
5252 }
5253
5254 /* look for literal IP, anything possibly encoded or auth specifier */
5255 if(!risky){
5256 int digits = 1;
5257
5258 for(l = 0; l < hl; l++){
5259 if(hn[l] == '@' || hn[l] == '%'){
5260 risky++;
5261 break;
5262 }
5263 else if(!(hn[l] == '.' || isdigit((unsigned char) hn[l])))
5264 digits = 0;
5265 }
5266
5267 if(digits)
5268 risky++;
5269 }
5270
5271 /* Insert text of link's domain */
5272 if(SHOWSERVER(hd->html_data)){
5273 char *q;
5274 COLOR_PAIR *col = NULL, *colnorm = NULL;
5275
5276 html_output(hd->html_data, ' ');
5277 html_output(hd->html_data, '[');
5278
5279 if(pico_usingcolor()
5280 && ps_global->VAR_METAMSG_FORE_COLOR
5281 && ps_global->VAR_METAMSG_BACK_COLOR
5282 && (col = new_color_pair(ps_global->VAR_METAMSG_FORE_COLOR,
5283 ps_global->VAR_METAMSG_BACK_COLOR))){
5284 if(!pico_is_good_colorpair(col))
5285 free_color_pair(&col);
5286
5287 if(col){
5288 q = color_embed(col->fg, col->bg);
5289
5290 for(l = 0; q[l]; l++)
5291 html_output(hd->html_data, q[l]);
5292 }
5293 }
5294
5295 for(l = 0; l < hl; l++)
5296 html_output(hd->html_data, hn[l]);
5297
5298 if(col){
5299 if(ps_global->VAR_NORM_FORE_COLOR
5300 && ps_global->VAR_NORM_BACK_COLOR
5301 && (colnorm = new_color_pair(ps_global->VAR_NORM_FORE_COLOR,
5302 ps_global->VAR_NORM_BACK_COLOR))){
5303 if(!pico_is_good_colorpair(colnorm))
5304 free_color_pair(&colnorm);
5305
5306 if(colnorm){
5307 q = color_embed(colnorm->fg, colnorm->bg);
5308 free_color_pair(&colnorm);
5309
5310 for(l = 0; q[l]; l++)
5311 html_output(hd->html_data, q[l]);
5312 }
5313 }
5314
5315 free_color_pair(&col);
5316 }
5317
5318 html_output(hd->html_data, ']');
5319 }
5320 }
5321
5322 /*
5323 * if things look OK so far, make sure nothing within
5324 * the url looks too fishy...
5325 */
5326 while(!risky && hn
5327 && (hn = rfc1738_scan(hn, &l)) != NULL
5328 && (hn = srchstr(hn,"://")) != NULL){
5329 int digits = 1;
5330
5331 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++){
5332 /*
5333 * auth spec, encoded characters, or possibly non-standard port
5334 * should raise a red flag
5335 */
5336 if(hn[hl] == '@' || hn[hl] == '%' || hn[hl] == ':'){
5337 risky++;
5338 break;
5339 }
5340 else if(!(hn[hl] == '.' || isdigit((unsigned char) hn[hl])))
5341 digits = 0;
5342 }
5343
5344 /* dotted-dec/raw-int address should cause suspicion as well */
5345 if(digits)
5346 risky++;
5347 }
5348
5349 if(risky && ((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)
5350 (*((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)();
5351
5352 if(hd->dp)
5353 so_give((STORE_S **) &hd->dp);
5354
5355
5356 fs_give((void **) &url);
5357 }
5358
5359
5360
5361 /*
5362 * relative_url - put full url path in h based on base and relative url
5363 */
5364 void
html_a_relative(char * base_url,char * rel_url,HANDLE_S * h)5365 html_a_relative(char *base_url, char *rel_url, HANDLE_S *h)
5366 {
5367 size_t len;
5368 char tmp[MAILTMPLEN], *p, *q;
5369 char *scheme = NULL, *net = NULL, *path = NULL,
5370 *parms = NULL, *query = NULL, *frag = NULL,
5371 *base_scheme = NULL, *base_net_loc = NULL,
5372 *base_path = NULL, *base_parms = NULL,
5373 *base_query = NULL, *base_frag = NULL,
5374 *rel_scheme = NULL, *rel_net_loc = NULL,
5375 *rel_path = NULL, *rel_parms = NULL,
5376 *rel_query = NULL, *rel_frag = NULL;
5377
5378 /* Rough parse of base URL */
5379 rfc1808_tokens(base_url, &base_scheme, &base_net_loc, &base_path,
5380 &base_parms, &base_query, &base_frag);
5381
5382 /* Rough parse of this URL */
5383 rfc1808_tokens(rel_url, &rel_scheme, &rel_net_loc, &rel_path,
5384 &rel_parms, &rel_query, &rel_frag);
5385
5386 scheme = rel_scheme; /* defaults */
5387 net = rel_net_loc;
5388 path = rel_path;
5389 parms = rel_parms;
5390 query = rel_query;
5391 frag = rel_frag;
5392 if(!scheme && base_scheme){
5393 scheme = base_scheme;
5394 if(!net){
5395 net = base_net_loc;
5396 if(path){
5397 if(*path != '/'){
5398 if(base_path){
5399 for(p = q = base_path; /* Drop base path's tail */
5400 (p = strchr(p, '/'));
5401 q = ++p)
5402 ;
5403
5404 len = q - base_path;
5405 }
5406 else
5407 len = 0;
5408
5409 if(len + strlen(rel_path) < sizeof(tmp)-1){
5410 if(len)
5411 snprintf(path = tmp, sizeof(tmp), "%.*s", (int) len, base_path);
5412
5413 strncpy(tmp + len, rel_path, sizeof(tmp)-len);
5414 tmp[sizeof(tmp)-1] = '\0';
5415
5416 /* Follow RFC 1808 "Step 6" */
5417 for(p = tmp; (p = strchr(p, '.')); )
5418 switch(*(p+1)){
5419 /*
5420 * a) All occurrences of "./", where "." is a
5421 * complete path segment, are removed.
5422 */
5423 case '/' :
5424 if(p > tmp)
5425 for(q = p; (*q = *(q+2)) != '\0'; q++)
5426 ;
5427 else
5428 p++;
5429
5430 break;
5431
5432 /*
5433 * b) If the path ends with "." as a
5434 * complete path segment, that "." is
5435 * removed.
5436 */
5437 case '\0' :
5438 if(p == tmp || *(p-1) == '/')
5439 *p = '\0';
5440 else
5441 p++;
5442
5443 break;
5444
5445 /*
5446 * c) All occurrences of "<segment>/../",
5447 * where <segment> is a complete path
5448 * segment not equal to "..", are removed.
5449 * Removal of these path segments is
5450 * performed iteratively, removing the
5451 * leftmost matching pattern on each
5452 * iteration, until no matching pattern
5453 * remains.
5454 *
5455 * d) If the path ends with "<segment>/..",
5456 * where <segment> is a complete path
5457 * segment not equal to "..", that
5458 * "<segment>/.." is removed.
5459 */
5460 case '.' :
5461 if(p > tmp + 1){
5462 for(q = p - 2; q > tmp && *q != '/'; q--)
5463 ;
5464
5465 if(*q == '/')
5466 q++;
5467
5468 if(q + 1 == p /* no "//.." */
5469 || (*q == '.' /* and "../.." */
5470 && *(q+1) == '.'
5471 && *(q+2) == '/')){
5472 p += 2;
5473 break;
5474 }
5475
5476 switch(*(p+2)){
5477 case '/' :
5478 len = (p - q) + 3;
5479 p = q;
5480 for(; (*q = *(q+len)) != '\0'; q++)
5481 ;
5482
5483 break;
5484
5485 case '\0':
5486 *(p = q) = '\0';
5487 break;
5488
5489 default:
5490 p += 2;
5491 break;
5492 }
5493 }
5494 else
5495 p += 2;
5496
5497 break;
5498
5499 default :
5500 p++;
5501 break;
5502 }
5503 }
5504 else
5505 path = ""; /* lame. */
5506 }
5507 }
5508 else{
5509 path = base_path;
5510 if(!parms){
5511 parms = base_parms;
5512 if(!query)
5513 query = base_query;
5514 }
5515 }
5516 }
5517 }
5518
5519 len = (scheme ? strlen(scheme) : 0) + (net ? strlen(net) : 0)
5520 + (path ? strlen(path) : 0) + (parms ? strlen(parms) : 0)
5521 + (query ? strlen(query) : 0) + (frag ? strlen(frag ) : 0) + 8;
5522
5523 h->h.url.path = (char *) fs_get(len * sizeof(char));
5524 snprintf(h->h.url.path, len, "%s%s%s%s%s%s%s%s%s%s%s%s",
5525 scheme ? scheme : "", scheme ? ":" : "",
5526 net ? "//" : "", net ? net : "",
5527 (path && *path == '/') ? "" : ((path && net) ? "/" : ""),
5528 path ? path : "",
5529 parms ? ";" : "", parms ? parms : "",
5530 query ? "?" : "", query ? query : "",
5531 frag ? "#" : "", frag ? frag : "");
5532
5533 if(base_scheme)
5534 fs_give((void **) &base_scheme);
5535
5536 if(base_net_loc)
5537 fs_give((void **) &base_net_loc);
5538
5539 if(base_path)
5540 fs_give((void **) &base_path);
5541
5542 if(base_parms)
5543 fs_give((void **) &base_parms);
5544
5545 if(base_query)
5546 fs_give((void **) &base_query);
5547
5548 if(base_frag)
5549 fs_give((void **) &base_frag);
5550
5551 if(rel_scheme)
5552 fs_give((void **) &rel_scheme);
5553
5554 if(rel_net_loc)
5555 fs_give((void **) &rel_net_loc);
5556
5557 if(rel_parms)
5558 fs_give((void **) &rel_parms);
5559
5560 if(rel_query)
5561 fs_give((void **) &rel_query);
5562
5563 if(rel_frag)
5564 fs_give((void **) &rel_frag);
5565
5566 if(rel_path)
5567 fs_give((void **) &rel_path);
5568 }
5569
5570
5571 /*
5572 * html_href_relative - href
5573 */
5574 int
html_href_relative(char * url)5575 html_href_relative(char *url)
5576 {
5577 int i;
5578
5579 if(url)
5580 for(i = 0; i < 32 && url[i]; i++)
5581 if(!(isalpha((unsigned char) url[i]) || url[i] == '_' || url[i] == '-')){
5582 if(url[i] == ':')
5583 return(FALSE);
5584 else
5585 break;
5586 }
5587
5588 return(TRUE);
5589 }
5590
5591
5592 /*
5593 * HTML <UL> (Unordered List) element handler
5594 */
5595 int
html_ul(HANDLER_S * hd,int ch,int cmd)5596 html_ul(HANDLER_S *hd, int ch, int cmd)
5597 {
5598 if(cmd == GF_DATA){
5599 html_handoff(hd, ch);
5600 }
5601 else if(cmd == GF_RESET){
5602 if(PASS_HTML(hd->html_data)){
5603 html_output_raw_tag(hd->html_data, "ul");
5604 }
5605 else{
5606 HD(hd->html_data)->li_pending = 1;
5607 html_blank(hd->html_data, 0);
5608 }
5609 }
5610 else if(cmd == GF_EOD){
5611 if(PASS_HTML(hd->html_data)){
5612 html_output_string(hd->html_data, "</ul>");
5613 }
5614 else{
5615 html_blank(hd->html_data, 0);
5616
5617 if(!HD(hd->html_data)->li_pending)
5618 html_indent(hd->html_data, -4, HTML_ID_INC);
5619 else
5620 HD(hd->html_data)->li_pending = 0;
5621 }
5622 }
5623
5624 return(1); /* get linked */
5625 }
5626
5627
5628 /*
5629 * HTML <OL> (Ordered List) element handler
5630 */
5631 int
html_ol(HANDLER_S * hd,int ch,int cmd)5632 html_ol(HANDLER_S *hd, int ch, int cmd)
5633 {
5634 if(cmd == GF_DATA){
5635 html_handoff(hd, ch);
5636 }
5637 else if(cmd == GF_RESET){
5638 if(PASS_HTML(hd->html_data)){
5639 html_output_raw_tag(hd->html_data, "ol");
5640 }
5641 else{
5642 PARAMETER *p;
5643 /*
5644 * Signal that we're expecting to see <LI> as our next element
5645 * and set the the initial ordered count.
5646 */
5647 hd->x = 1L; /* set default */
5648 hd->y = LIST_DECIMAL; /* set default */
5649 for(p = HD(hd->html_data)->el_data->attribs;
5650 p && p->attribute;
5651 p = p->next)
5652 if(p->value){
5653 if(!strucmp(p->attribute, "TYPE")){
5654 if(!strucmp(p->value, "a")) /* alpha, lowercase */
5655 hd->y = LIST_ALPHALO;
5656 else if(!strucmp(p->value, "A")) /* alpha, uppercase */
5657 hd->y = LIST_ALPHAUP;
5658 else if(!strucmp(p->value, "i")) /* roman, lowercase */
5659 hd->y = LIST_ROMANLO;
5660 else if(!strucmp(p->value, "I")) /* roman, uppercase */
5661 hd->y = LIST_ROMANUP;
5662 else if(strucmp(p->value, "1")) /* decimal, the default */
5663 hd->y = LIST_UNKNOWN;
5664 }
5665 else if(!strucmp(p->attribute, "START"))
5666 hd->x = atol(p->value);
5667 // else ADD SUPPORT FOR OTHER ATTRIBUTES... LATER
5668 // this is not so simple. The main missing support
5669 // is for the STYLE attribute, but implementing that
5670 // correctly will take time, so will be implemented
5671 // after version 2.21 is released.
5672 }
5673 HD(hd->html_data)->li_pending = 1;
5674 html_blank(hd->html_data, 0);
5675 }
5676 }
5677 else if(cmd == GF_EOD){
5678 if(PASS_HTML(hd->html_data)){
5679 html_output_string(hd->html_data, "</ol>");
5680 }
5681 else{
5682 html_blank(hd->html_data, 0);
5683
5684 if(!HD(hd->html_data)->li_pending)
5685 html_indent(hd->html_data, -4, HTML_ID_INC);
5686 else
5687 HD(hd->html_data)->li_pending = 0;
5688 }
5689 }
5690
5691 return(1); /* get linked */
5692 }
5693
5694
5695 /*
5696 * HTML <MENU> (Menu List) element handler
5697 */
5698 int
html_menu(HANDLER_S * hd,int ch,int cmd)5699 html_menu(HANDLER_S *hd, int ch, int cmd)
5700 {
5701 if(cmd == GF_DATA){
5702 html_handoff(hd, ch);
5703 }
5704 else if(cmd == GF_RESET){
5705 if(PASS_HTML(hd->html_data)){
5706 html_output_raw_tag(hd->html_data, "menu");
5707 }
5708 else{
5709 HD(hd->html_data)->li_pending = 1;
5710 }
5711 }
5712 else if(cmd == GF_EOD){
5713 if(PASS_HTML(hd->html_data)){
5714 html_output_string(hd->html_data, "</menu>");
5715 }
5716 else{
5717 html_blank(hd->html_data, 0);
5718
5719 if(!HD(hd->html_data)->li_pending)
5720 html_indent(hd->html_data, -4, HTML_ID_INC);
5721 else
5722 HD(hd->html_data)->li_pending = 0;
5723 }
5724 }
5725
5726 return(1); /* get linked */
5727 }
5728
5729
5730 /*
5731 * HTML <DIR> (Directory List) element handler
5732 */
5733 int
html_dir(HANDLER_S * hd,int ch,int cmd)5734 html_dir(HANDLER_S *hd, int ch, int cmd)
5735 {
5736 if(cmd == GF_DATA){
5737 html_handoff(hd, ch);
5738 }
5739 else if(cmd == GF_RESET){
5740 if(PASS_HTML(hd->html_data)){
5741 html_output_raw_tag(hd->html_data, "dir");
5742 }
5743 else{
5744 HD(hd->html_data)->li_pending = 1;
5745 }
5746 }
5747 else if(cmd == GF_EOD){
5748 if(PASS_HTML(hd->html_data)){
5749 html_output_string(hd->html_data, "</dir>");
5750 }
5751 else{
5752 html_blank(hd->html_data, 0);
5753
5754 if(!HD(hd->html_data)->li_pending)
5755 html_indent(hd->html_data, -4, HTML_ID_INC);
5756 else
5757 HD(hd->html_data)->li_pending = 0;
5758 }
5759 }
5760
5761 return(1); /* get linked */
5762 }
5763
5764
5765 /*
5766 * HTML <LI> (List Item) element handler
5767 */
5768 int
html_li(HANDLER_S * hd,int ch,int cmd)5769 html_li(HANDLER_S *hd, int ch, int cmd)
5770 {
5771 if(cmd == GF_DATA){
5772 if(PASS_HTML(hd->html_data)){
5773 html_handoff(hd, ch);
5774 }
5775 }
5776 else if(cmd == GF_RESET){
5777 HANDLER_S *p, *found = NULL;
5778
5779 /*
5780 * There better be a an unordered list, ordered list,
5781 * Menu or Directory handler installed
5782 * or else we crap out...
5783 */
5784 for(p = HANDLERS(hd->html_data); p; p = p->below)
5785 if(EL(p)->handler == html_ul
5786 || EL(p)->handler == html_ol
5787 || EL(p)->handler == html_menu
5788 || EL(p)->handler == html_dir){
5789 found = p;
5790 break;
5791 }
5792
5793 if(found){
5794 if(PASS_HTML(hd->html_data)){
5795 }
5796 else{
5797 char buf[20], tmp[16], *p;
5798 int wrapstate;
5799
5800 /* Start a new line */
5801 html_blank(hd->html_data, 0);
5802
5803 /* adjust indent level if needed */
5804 if(HD(hd->html_data)->li_pending){
5805 html_indent(hd->html_data, 4, HTML_ID_INC);
5806 HD(hd->html_data)->li_pending = 0;
5807 }
5808
5809 if(EL(found)->handler == html_ul){
5810 int l = html_indent(hd->html_data, 0, HTML_ID_GET);
5811
5812 strncpy(buf, " ", sizeof(buf));
5813 buf[1] = (l < 5) ? '*' : (l < 9) ? '+' : (l < 17) ? 'o' : '#';
5814 }
5815 else if(EL(found)->handler == html_ol){
5816 if(found->y == LIST_DECIMAL || found->y == LIST_UNKNOWN)
5817 snprintf(tmp, sizeof(tmp), "%ld", found->x++);
5818 else if(found->y == LIST_ALPHALO)
5819 convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'a');
5820 else if(found->y == LIST_ALPHAUP)
5821 convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'A');
5822 else if(found->y == LIST_ROMANLO)
5823 convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'i');
5824 else if(found->y == LIST_ROMANUP)
5825 convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'I');
5826 snprintf(buf, sizeof(buf), " %s.", tmp);
5827 buf[sizeof(buf)-1] = '\0';
5828 }
5829 else if(EL(found)->handler == html_menu){
5830 strncpy(buf, " ->", sizeof(buf));
5831 buf[sizeof(buf)-1] = '\0';
5832 }
5833
5834 html_indent(hd->html_data, -4, HTML_ID_INC);
5835
5836 /* So we don't munge whitespace */
5837 wrapstate = HD(hd->html_data)->wrapstate;
5838 HD(hd->html_data)->wrapstate = 0;
5839
5840 html_write_indent(hd->html_data, HD(hd->html_data)->indent_level);
5841 for(p = buf; *p; p++)
5842 html_output(hd->html_data, (int) *p);
5843 HD(hd->html_data)->wrapstate = wrapstate;
5844 html_indent(hd->html_data, 4, HTML_ID_INC);
5845 }
5846 /* else BUG: should really bitch about this */
5847 }
5848
5849 if(PASS_HTML(hd->html_data)){
5850 html_output_raw_tag(hd->html_data, "li");
5851 return(1); /* get linked */
5852 }
5853 }
5854 else if(cmd == GF_EOD){
5855 if(PASS_HTML(hd->html_data)){
5856 html_output_string(hd->html_data, "</li>");
5857 }
5858 }
5859
5860 return(PASS_HTML(hd->html_data)); /* DON'T get linked */
5861 }
5862
5863
5864 /*
5865 * HTML <DL> (Definition List) element handler
5866 */
5867 int
html_dl(HANDLER_S * hd,int ch,int cmd)5868 html_dl(HANDLER_S *hd, int ch, int cmd)
5869 {
5870 if(cmd == GF_DATA){
5871 html_handoff(hd, ch);
5872 }
5873 else if(cmd == GF_RESET){
5874 if(PASS_HTML(hd->html_data)){
5875 html_output_raw_tag(hd->html_data, "dl");
5876 }
5877 else{
5878 /*
5879 * Set indention level for definition terms and definitions...
5880 */
5881 hd->x = html_indent(hd->html_data, 0, HTML_ID_GET);
5882 hd->y = hd->x + 2;
5883 hd->z = hd->y + 4;
5884 }
5885 }
5886 else if(cmd == GF_EOD){
5887 if(PASS_HTML(hd->html_data)){
5888 html_output_string(hd->html_data, "</dl>");
5889 }
5890 else{
5891 html_indent(hd->html_data, (int) hd->x, HTML_ID_SET);
5892 html_blank(hd->html_data, 1);
5893 }
5894 }
5895
5896 return(1); /* get linked */
5897 }
5898
5899
5900 /*
5901 * HTML <DT> (Definition Term) element handler
5902 */
5903 int
html_dt(HANDLER_S * hd,int ch,int cmd)5904 html_dt(HANDLER_S *hd, int ch, int cmd)
5905 {
5906 if(PASS_HTML(hd->html_data)){
5907 if(cmd == GF_DATA){
5908 html_handoff(hd, ch);
5909 }
5910 else if(cmd == GF_RESET){
5911 html_output_raw_tag(hd->html_data, "dt");
5912 }
5913 else if(cmd == GF_EOD){
5914 html_output_string(hd->html_data, "</dt>");
5915 }
5916
5917 return(1); /* get linked */
5918 }
5919
5920 if(cmd == GF_RESET){
5921 HANDLER_S *p;
5922
5923 /*
5924 * There better be a Definition Handler installed
5925 * or else we crap out...
5926 */
5927 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5928 ;
5929
5930 if(p){ /* adjust indent level if needed */
5931 html_indent(hd->html_data, (int) p->y, HTML_ID_SET);
5932 html_blank(hd->html_data, 1);
5933 }
5934 /* BUG: else should really bitch about this */
5935 }
5936
5937 return(0); /* DON'T get linked */
5938 }
5939
5940
5941 /*
5942 * HTML <DD> (Definition Definition) element handler
5943 */
5944 int
html_dd(HANDLER_S * hd,int ch,int cmd)5945 html_dd(HANDLER_S *hd, int ch, int cmd)
5946 {
5947 if(PASS_HTML(hd->html_data)){
5948 if(cmd == GF_DATA){
5949 html_handoff(hd, ch);
5950 }
5951 else if(cmd == GF_RESET){
5952 html_output_raw_tag(hd->html_data, "dd");
5953 }
5954 else if(cmd == GF_EOD){
5955 html_output_string(hd->html_data, "</dd>");
5956 }
5957
5958 return(1); /* get linked */
5959 }
5960
5961 if(cmd == GF_RESET){
5962 HANDLER_S *p;
5963
5964 /*
5965 * There better be a Definition Handler installed
5966 * or else we crap out...
5967 */
5968 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5969 ;
5970
5971 if(p){ /* adjust indent level if needed */
5972 html_indent(hd->html_data, (int) p->z, HTML_ID_SET);
5973 html_blank(hd->html_data, 0);
5974 }
5975 /* BUG: should really bitch about this */
5976 }
5977
5978 return(0); /* DON'T get linked */
5979 }
5980
5981
5982 /*
5983 * HTML <H1> (Headings 1) element handler.
5984 *
5985 * Bold, very-large font, CENTERED. One or two blank lines
5986 * above and below. For our silly character cell's that
5987 * means centered and ALL CAPS...
5988 */
5989 int
html_h1(HANDLER_S * hd,int ch,int cmd)5990 html_h1(HANDLER_S *hd, int ch, int cmd)
5991 {
5992 if(cmd == GF_DATA){
5993 html_handoff(hd, ch);
5994 }
5995 else if(cmd == GF_RESET){
5996 if(PASS_HTML(hd->html_data)){
5997 html_output_raw_tag(hd->html_data, "h1");
5998 }
5999 else{
6000 /* turn ON the centered bit */
6001 CENTER_BIT(hd->html_data) = 1;
6002 }
6003 }
6004 else if(cmd == GF_EOD){
6005 if(PASS_HTML(hd->html_data)){
6006 html_output_string(hd->html_data, "</h1>");
6007 }
6008 else{
6009 /* turn OFF the centered bit, add blank line */
6010 CENTER_BIT(hd->html_data) = 0;
6011 html_blank(hd->html_data, 1);
6012 }
6013 }
6014
6015 return(1); /* get linked */
6016 }
6017
6018
6019 /*
6020 * HTML <H2> (Headings 2) element handler
6021 */
6022 int
html_h2(HANDLER_S * hd,int ch,int cmd)6023 html_h2(HANDLER_S *hd, int ch, int cmd)
6024 {
6025 if(cmd == GF_DATA){
6026 if(PASS_HTML(hd->html_data)){
6027 html_handoff(hd, ch);
6028 }
6029 else{
6030 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6031 HTML_ULINE(hd->html_data, 1);
6032 hd->x ^= HTML_HX_ULINE; /* only once! */
6033 }
6034
6035 html_handoff(hd, (ch < 128 && islower((unsigned char) ch))
6036 ? toupper((unsigned char) ch) : ch);
6037 }
6038 }
6039 else if(cmd == GF_RESET){
6040 if(PASS_HTML(hd->html_data)){
6041 html_output_raw_tag(hd->html_data, "h2");
6042 }
6043 else{
6044 /*
6045 * Bold, large font, flush-left. One or two blank lines
6046 * above and below.
6047 */
6048 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6049 hd->x = HTML_HX_CENTER;
6050 else
6051 hd->x = 0;
6052
6053 hd->x |= HTML_HX_ULINE;
6054
6055 CENTER_BIT(hd->html_data) = 0;
6056 hd->y = html_indent(hd->html_data, 0, HTML_ID_SET);
6057 hd->z = HD(hd->html_data)->wrapcol;
6058 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6059 html_blank(hd->html_data, 1);
6060 }
6061 }
6062 else if(cmd == GF_EOD){
6063 if(PASS_HTML(hd->html_data)){
6064 html_output_string(hd->html_data, "</h2>");
6065 }
6066 else{
6067 /*
6068 * restore previous centering, and indent level
6069 */
6070 if(!(hd->x & HTML_HX_ULINE))
6071 HTML_ULINE(hd->html_data, 0);
6072
6073 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6074 html_blank(hd->html_data, 1);
6075 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6076 HD(hd->html_data)->wrapcol = hd->z;
6077 }
6078 }
6079
6080 return(1); /* get linked */
6081 }
6082
6083
6084 /*
6085 * HTML <H3> (Headings 3) element handler
6086 */
6087 int
html_h3(HANDLER_S * hd,int ch,int cmd)6088 html_h3(HANDLER_S *hd, int ch, int cmd)
6089 {
6090 if(cmd == GF_DATA){
6091 if(!PASS_HTML(hd->html_data)){
6092 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6093 HTML_ULINE(hd->html_data, 1);
6094 hd->x ^= HTML_HX_ULINE; /* only once! */
6095 }
6096 }
6097
6098 html_handoff(hd, ch);
6099 }
6100 else if(cmd == GF_RESET){
6101 if(PASS_HTML(hd->html_data)){
6102 html_output_raw_tag(hd->html_data, "h3");
6103 }
6104 else{
6105 /*
6106 * Italic, large font, slightly indented from the left
6107 * margin. One or two blank lines above and below.
6108 */
6109 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6110 hd->x = HTML_HX_CENTER;
6111 else
6112 hd->x = 0;
6113
6114 hd->x |= HTML_HX_ULINE;
6115 CENTER_BIT(hd->html_data) = 0;
6116 hd->y = html_indent(hd->html_data, 2, HTML_ID_SET);
6117 hd->z = HD(hd->html_data)->wrapcol;
6118 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6119 html_blank(hd->html_data, 1);
6120 }
6121 }
6122 else if(cmd == GF_EOD){
6123 if(PASS_HTML(hd->html_data)){
6124 html_output_string(hd->html_data, "</h3>");
6125 }
6126 else{
6127 /*
6128 * restore previous centering, and indent level
6129 */
6130 if(!(hd->x & HTML_HX_ULINE))
6131 HTML_ULINE(hd->html_data, 0);
6132
6133 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6134 html_blank(hd->html_data, 1);
6135 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6136 HD(hd->html_data)->wrapcol = hd->z;
6137 }
6138 }
6139
6140 return(1); /* get linked */
6141 }
6142
6143
6144 /*
6145 * HTML <H4> (Headings 4) element handler
6146 */
6147 int
html_h4(HANDLER_S * hd,int ch,int cmd)6148 html_h4(HANDLER_S *hd, int ch, int cmd)
6149 {
6150 if(cmd == GF_DATA){
6151 html_handoff(hd, ch);
6152 }
6153 else if(cmd == GF_RESET){
6154 if(PASS_HTML(hd->html_data)){
6155 html_output_raw_tag(hd->html_data, "h4");
6156 }
6157 else{
6158 /*
6159 * Bold, normal font, indented more than H3. One blank line
6160 * above and below.
6161 */
6162 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6163 CENTER_BIT(hd->html_data) = 0;
6164 hd->y = html_indent(hd->html_data, 4, HTML_ID_SET);
6165 hd->z = HD(hd->html_data)->wrapcol;
6166 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6167 html_blank(hd->html_data, 1);
6168 }
6169 }
6170 else if(cmd == GF_EOD){
6171 if(PASS_HTML(hd->html_data)){
6172 html_output_string(hd->html_data, "</h4>");
6173 }
6174 else{
6175 /*
6176 * restore previous centering, and indent level
6177 */
6178 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6179 html_blank(hd->html_data, 1);
6180 CENTER_BIT(hd->html_data) = hd->x;
6181 HD(hd->html_data)->wrapcol = hd->z;
6182 }
6183 }
6184
6185 return(1); /* get linked */
6186 }
6187
6188
6189 /*
6190 * HTML <H5> (Headings 5) element handler
6191 */
6192 int
html_h5(HANDLER_S * hd,int ch,int cmd)6193 html_h5(HANDLER_S *hd, int ch, int cmd)
6194 {
6195 if(cmd == GF_DATA){
6196 html_handoff(hd, ch);
6197 }
6198 else if(cmd == GF_RESET){
6199 if(PASS_HTML(hd->html_data)){
6200 html_output_raw_tag(hd->html_data, "h5");
6201 }
6202 else{
6203 /*
6204 * Italic, normal font, indented as H4. One blank line
6205 * above.
6206 */
6207 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6208 CENTER_BIT(hd->html_data) = 0;
6209 hd->y = html_indent(hd->html_data, 6, HTML_ID_SET);
6210 hd->z = HD(hd->html_data)->wrapcol;
6211 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6212 html_blank(hd->html_data, 1);
6213 }
6214 }
6215 else if(cmd == GF_EOD){
6216 if(PASS_HTML(hd->html_data)){
6217 html_output_string(hd->html_data, "</h5>");
6218 }
6219 else{
6220 /*
6221 * restore previous centering, and indent level
6222 */
6223 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6224 html_blank(hd->html_data, 1);
6225 CENTER_BIT(hd->html_data) = hd->x;
6226 HD(hd->html_data)->wrapcol = hd->z;
6227 }
6228 }
6229
6230 return(1); /* get linked */
6231 }
6232
6233
6234 /*
6235 * HTML <H6> (Headings 6) element handler
6236 */
6237 int
html_h6(HANDLER_S * hd,int ch,int cmd)6238 html_h6(HANDLER_S *hd, int ch, int cmd)
6239 {
6240 if(cmd == GF_DATA){
6241 html_handoff(hd, ch);
6242 }
6243 else if(cmd == GF_RESET){
6244 if(PASS_HTML(hd->html_data)){
6245 html_output_raw_tag(hd->html_data, "h6");
6246 }
6247 else{
6248 /*
6249 * Bold, indented same as normal text, more than H5. One
6250 * blank line above.
6251 */
6252 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6253 CENTER_BIT(hd->html_data) = 0;
6254 hd->y = html_indent(hd->html_data, 8, HTML_ID_SET);
6255 hd->z = HD(hd->html_data)->wrapcol;
6256 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6257 html_blank(hd->html_data, 1);
6258 }
6259 }
6260 else if(cmd == GF_EOD){
6261 if(PASS_HTML(hd->html_data)){
6262 html_output_string(hd->html_data, "</h6>");
6263 }
6264 else{
6265 /*
6266 * restore previous centering, and indent level
6267 */
6268 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6269 html_blank(hd->html_data, 1);
6270 CENTER_BIT(hd->html_data) = hd->x;
6271 HD(hd->html_data)->wrapcol = hd->z;
6272 }
6273 }
6274
6275 return(1); /* get linked */
6276 }
6277
6278
6279 /*
6280 * HTML <BlockQuote> element handler
6281 */
6282 int
html_blockquote(HANDLER_S * hd,int ch,int cmd)6283 html_blockquote(HANDLER_S *hd, int ch, int cmd)
6284 {
6285 int j;
6286 #define HTML_BQ_INDENT 6
6287
6288 if(cmd == GF_DATA){
6289 html_handoff(hd, ch);
6290 }
6291 else if(cmd == GF_RESET){
6292 if(PASS_HTML(hd->html_data)){
6293 html_output_raw_tag(hd->html_data, "blockquote");
6294 }
6295 else{
6296 /*
6297 * A typical rendering might be a slight extra left and
6298 * right indent, and/or italic font. The Blockquote element
6299 * causes a paragraph break, and typically provides space
6300 * above and below the quote.
6301 */
6302 html_indent(hd->html_data, HTML_BQ_INDENT, HTML_ID_INC);
6303 j = HD(hd->html_data)->wrapstate;
6304 HD(hd->html_data)->wrapstate = 0;
6305 html_blank(hd->html_data, 1);
6306 HD(hd->html_data)->wrapstate = j;
6307 HD(hd->html_data)->wrapcol -= HTML_BQ_INDENT;
6308 }
6309 }
6310 else if(cmd == GF_EOD){
6311 if(PASS_HTML(hd->html_data)){
6312 html_output_string(hd->html_data, "</blockquote>");
6313 }
6314 else{
6315 html_blank(hd->html_data, 1);
6316
6317 j = HD(hd->html_data)->wrapstate;
6318 HD(hd->html_data)->wrapstate = 0;
6319 html_indent(hd->html_data, -(HTML_BQ_INDENT), HTML_ID_INC);
6320 HD(hd->html_data)->wrapstate = j;
6321 HD(hd->html_data)->wrapcol += HTML_BQ_INDENT;
6322 }
6323 }
6324
6325 return(1); /* get linked */
6326 }
6327
6328
6329 /*
6330 * HTML <Address> element handler
6331 */
6332 int
html_address(HANDLER_S * hd,int ch,int cmd)6333 html_address(HANDLER_S *hd, int ch, int cmd)
6334 {
6335 int j;
6336 #define HTML_ADD_INDENT 2
6337
6338 if(cmd == GF_DATA){
6339 html_handoff(hd, ch);
6340 }
6341 else if(cmd == GF_RESET){
6342 if(PASS_HTML(hd->html_data)){
6343 html_output_raw_tag(hd->html_data, "address");
6344 }
6345 else{
6346 /*
6347 * A typical rendering might be a slight extra left and
6348 * right indent, and/or italic font. The Blockquote element
6349 * causes a paragraph break, and typically provides space
6350 * above and below the quote.
6351 */
6352 html_indent(hd->html_data, HTML_ADD_INDENT, HTML_ID_INC);
6353 j = HD(hd->html_data)->wrapstate;
6354 HD(hd->html_data)->wrapstate = 0;
6355 html_blank(hd->html_data, 1);
6356 HD(hd->html_data)->wrapstate = j;
6357 }
6358 }
6359 else if(cmd == GF_EOD){
6360 if(PASS_HTML(hd->html_data)){
6361 html_output_string(hd->html_data, "</address>");
6362 }
6363 else{
6364 html_blank(hd->html_data, 1);
6365
6366 j = HD(hd->html_data)->wrapstate;
6367 HD(hd->html_data)->wrapstate = 0;
6368 html_indent(hd->html_data, -(HTML_ADD_INDENT), HTML_ID_INC);
6369 HD(hd->html_data)->wrapstate = j;
6370 }
6371 }
6372
6373 return(1); /* get linked */
6374 }
6375
6376
6377 /*
6378 * HTML <PRE> (Preformatted Text) element handler
6379 */
6380 int
html_pre(HANDLER_S * hd,int ch,int cmd)6381 html_pre(HANDLER_S *hd, int ch, int cmd)
6382 {
6383 if(cmd == GF_DATA){
6384 /*
6385 * remove CRLF after '>' in element.
6386 * We see CRLF because wrapstate is off.
6387 */
6388 switch(hd->y){
6389 case 2 :
6390 if(ch == '\012'){
6391 hd->y = 3;
6392 return(1);
6393 }
6394 else
6395 html_handoff(hd, '\015');
6396
6397 break;
6398
6399 case 1 :
6400 if(ch == '\015'){
6401 hd->y = 2;
6402 return(1);
6403 }
6404
6405 case 3 :
6406 /* passing tags? replace CRLF with <BR> to make
6407 * sure hard newline survives in the end...
6408 */
6409 if(PASS_HTML(hd->html_data))
6410 hd->y = 4; /* keep looking for CRLF */
6411 else
6412 hd->y = 0; /* stop looking */
6413
6414 break;
6415
6416 case 4 :
6417 if(ch == '\015'){
6418 hd->y = 5;
6419 return(1);
6420 }
6421
6422 break;
6423
6424 case 5 :
6425 hd->y = 4;
6426 if(ch == '\012'){
6427 html_output_string(hd->html_data, "<br />");
6428 return(1);
6429 }
6430 else
6431 html_handoff(hd, '\015'); /* not CRLF, pass raw CR */
6432
6433 break;
6434
6435 default : /* zero case */
6436 break;
6437 }
6438
6439 html_handoff(hd, ch);
6440 }
6441 else if(cmd == GF_RESET){
6442 hd->y = 1;
6443 if(PASS_HTML(hd->html_data)){
6444 html_output_raw_tag(hd->html_data, "pre");
6445 }
6446 else{
6447 if(hd->html_data)
6448 hd->html_data->f1 = DFL; \
6449
6450 html_blank(hd->html_data, 1);
6451 hd->x = HD(hd->html_data)->wrapstate;
6452 HD(hd->html_data)->wrapstate = 0;
6453 }
6454 }
6455 else if(cmd == GF_EOD){
6456 if(PASS_HTML(hd->html_data)){
6457 html_output_string(hd->html_data, "</pre>");
6458 }
6459 else{
6460 HD(hd->html_data)->wrapstate = (hd->x != 0);
6461 html_blank(hd->html_data, 0);
6462 }
6463 }
6464
6465 return(1);
6466 }
6467
6468
6469 /*
6470 * HTML <CENTER> (Centered Text) element handler
6471 */
6472 int
html_center(HANDLER_S * hd,int ch,int cmd)6473 html_center(HANDLER_S *hd, int ch, int cmd)
6474 {
6475 if(cmd == GF_DATA){
6476 html_handoff(hd, ch);
6477 }
6478 else if(cmd == GF_RESET){
6479 if(PASS_HTML(hd->html_data)){
6480 html_output_raw_tag(hd->html_data, "center");
6481 }
6482 else{
6483 /* turn ON the centered bit */
6484 CENTER_BIT(hd->html_data) = 1;
6485 }
6486 }
6487 else if(cmd == GF_EOD){
6488 if(PASS_HTML(hd->html_data)){
6489 html_output_string(hd->html_data, "</center>");
6490 }
6491 else{
6492 /* turn OFF the centered bit */
6493 CENTER_BIT(hd->html_data) = 0;
6494 }
6495 }
6496
6497 return(1);
6498 }
6499
6500
6501 /*
6502 * HTML <DIV> (Document Divisions) element handler
6503 */
6504 int
html_div(HANDLER_S * hd,int ch,int cmd)6505 html_div(HANDLER_S *hd, int ch, int cmd)
6506 {
6507 if(cmd == GF_DATA){
6508 html_handoff(hd, ch);
6509 }
6510 else if(cmd == GF_RESET){
6511 if(PASS_HTML(hd->html_data)){
6512 html_output_raw_tag(hd->html_data, "div");
6513 }
6514 else{
6515 PARAMETER *p;
6516
6517 for(p = HD(hd->html_data)->el_data->attribs;
6518 p && p->attribute;
6519 p = p->next)
6520 if(!strucmp(p->attribute, "ALIGN")){
6521 if(p->value){
6522 /* remember previous values */
6523 hd->x = CENTER_BIT(hd->html_data);
6524 hd->y = html_indent(hd->html_data, 0, HTML_ID_GET);
6525
6526 html_blank(hd->html_data, 0);
6527 CENTER_BIT(hd->html_data) = !strucmp(p->value, "CENTER");
6528 html_indent(hd->html_data, 0, HTML_ID_SET);
6529 /* NOTE: "RIGHT" not supported yet */
6530 }
6531 }
6532 }
6533 }
6534 else if(cmd == GF_EOD){
6535 if(PASS_HTML(hd->html_data)){
6536 html_output_string(hd->html_data, "</div>");
6537 }
6538 else{
6539 /* restore centered bit and indentiousness */
6540 CENTER_BIT(hd->html_data) = hd->y;
6541 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6542 html_blank(hd->html_data, 0);
6543 }
6544 }
6545
6546 return(1);
6547 }
6548
6549
6550 /*
6551 * HTML <SPAN> (Text Span) element handler
6552 */
6553 int
html_span(HANDLER_S * hd,int ch,int cmd)6554 html_span(HANDLER_S *hd, int ch, int cmd)
6555 {
6556 if(PASS_HTML(hd->html_data)){
6557 if(cmd == GF_DATA){
6558 html_handoff(hd, ch);
6559 }
6560 else if(cmd == GF_RESET){
6561 html_output_raw_tag(hd->html_data, "span");
6562 }
6563 else if(cmd == GF_EOD){
6564 html_output_string(hd->html_data, "</span>");
6565 }
6566
6567 return(1);
6568 }
6569
6570 return(0);
6571 }
6572
6573
6574 /*
6575 * HTML <KBD> (Text Kbd) element handler
6576 */
6577 int
html_kbd(HANDLER_S * hd,int ch,int cmd)6578 html_kbd(HANDLER_S *hd, int ch, int cmd)
6579 {
6580 if(PASS_HTML(hd->html_data)){
6581 if(cmd == GF_DATA){
6582 html_handoff(hd, ch);
6583 }
6584 else if(cmd == GF_RESET){
6585 html_output_raw_tag(hd->html_data, "kbd");
6586 }
6587 else if(cmd == GF_EOD){
6588 html_output_string(hd->html_data, "</kbd>");
6589 }
6590
6591 return(1);
6592 }
6593
6594 return(0);
6595 }
6596
6597
6598 /*
6599 * HTML <DFN> (Text Definition) element handler
6600 */
6601 int
html_dfn(HANDLER_S * hd,int ch,int cmd)6602 html_dfn(HANDLER_S *hd, int ch, int cmd)
6603 {
6604 if(PASS_HTML(hd->html_data)){
6605 if(cmd == GF_DATA){
6606 html_handoff(hd, ch);
6607 }
6608 else if(cmd == GF_RESET){
6609 html_output_raw_tag(hd->html_data, "dfn");
6610 }
6611 else if(cmd == GF_EOD){
6612 html_output_string(hd->html_data, "</dfn>");
6613 }
6614
6615 return(1);
6616 }
6617
6618 return(0);
6619 }
6620
6621
6622 /*
6623 * HTML <TT> (Text Tt) element handler
6624 */
6625 int
html_tt(HANDLER_S * hd,int ch,int cmd)6626 html_tt(HANDLER_S *hd, int ch, int cmd)
6627 {
6628 if(PASS_HTML(hd->html_data)){
6629 if(cmd == GF_DATA){
6630 html_handoff(hd, ch);
6631 }
6632 else if(cmd == GF_RESET){
6633 html_output_raw_tag(hd->html_data, "tt");
6634 }
6635 else if(cmd == GF_EOD){
6636 html_output_string(hd->html_data, "</tt>");
6637 }
6638
6639 return(1);
6640 }
6641
6642 return(0);
6643 }
6644
6645
6646 /*
6647 * HTML <VAR> (Text Var) element handler
6648 */
6649 int
html_var(HANDLER_S * hd,int ch,int cmd)6650 html_var(HANDLER_S *hd, int ch, int cmd)
6651 {
6652 if(PASS_HTML(hd->html_data)){
6653 if(cmd == GF_DATA){
6654 html_handoff(hd, ch);
6655 }
6656 else if(cmd == GF_RESET){
6657 html_output_raw_tag(hd->html_data, "var");
6658 }
6659 else if(cmd == GF_EOD){
6660 html_output_string(hd->html_data, "</var>");
6661 }
6662
6663 return(1);
6664 }
6665
6666 return(0);
6667 }
6668
6669
6670 /*
6671 * HTML <SAMP> (Text Samp) element handler
6672 */
6673 int
html_samp(HANDLER_S * hd,int ch,int cmd)6674 html_samp(HANDLER_S *hd, int ch, int cmd)
6675 {
6676 if(PASS_HTML(hd->html_data)){
6677 if(cmd == GF_DATA){
6678 html_handoff(hd, ch);
6679 }
6680 else if(cmd == GF_RESET){
6681 html_output_raw_tag(hd->html_data, "samp");
6682 }
6683 else if(cmd == GF_EOD){
6684 html_output_string(hd->html_data, "</samp>");
6685 }
6686
6687 return(1);
6688 }
6689
6690 return(0);
6691 }
6692
6693
6694 /*
6695 * HTML <SUP> (Text Superscript) element handler
6696 */
6697 int
html_sup(HANDLER_S * hd,int ch,int cmd)6698 html_sup(HANDLER_S *hd, int ch, int cmd)
6699 {
6700 if(PASS_HTML(hd->html_data)){
6701 if(cmd == GF_DATA){
6702 html_handoff(hd, ch);
6703 }
6704 else if(cmd == GF_RESET){
6705 html_output_raw_tag(hd->html_data, "sup");
6706 }
6707 else if(cmd == GF_EOD){
6708 html_output_string(hd->html_data, "</sup>");
6709 }
6710
6711 return(1);
6712 }
6713
6714 return(0);
6715 }
6716
6717
6718 /*
6719 * HTML <SUB> (Text Subscript) element handler
6720 */
6721 int
html_sub(HANDLER_S * hd,int ch,int cmd)6722 html_sub(HANDLER_S *hd, int ch, int cmd)
6723 {
6724 if(PASS_HTML(hd->html_data)){
6725 if(cmd == GF_DATA){
6726 html_handoff(hd, ch);
6727 }
6728 else if(cmd == GF_RESET){
6729 html_output_raw_tag(hd->html_data, "sub");
6730 }
6731 else if(cmd == GF_EOD){
6732 html_output_string(hd->html_data, "</sub>");
6733 }
6734
6735 return(1);
6736 }
6737
6738 return(0);
6739 }
6740
6741
6742 /*
6743 * HTML <CITE> (Text Citation) element handler
6744 */
6745 int
html_cite(HANDLER_S * hd,int ch,int cmd)6746 html_cite(HANDLER_S *hd, int ch, int cmd)
6747 {
6748 if(PASS_HTML(hd->html_data)){
6749 if(cmd == GF_DATA){
6750 html_handoff(hd, ch);
6751 }
6752 else if(cmd == GF_RESET){
6753 html_output_raw_tag(hd->html_data, "cite");
6754 }
6755 else if(cmd == GF_EOD){
6756 html_output_string(hd->html_data, "</cite>");
6757 }
6758
6759 return(1);
6760 }
6761
6762 return(0);
6763 }
6764
6765
6766 /*
6767 * HTML <CODE> (Text Code) element handler
6768 */
6769 int
html_code(HANDLER_S * hd,int ch,int cmd)6770 html_code(HANDLER_S *hd, int ch, int cmd)
6771 {
6772 if(PASS_HTML(hd->html_data)){
6773 if(cmd == GF_DATA){
6774 html_handoff(hd, ch);
6775 }
6776 else if(cmd == GF_RESET){
6777 html_output_raw_tag(hd->html_data, "code");
6778 }
6779 else if(cmd == GF_EOD){
6780 html_output_string(hd->html_data, "</code>");
6781 }
6782
6783 return(1);
6784 }
6785
6786 return(0);
6787 }
6788
6789
6790 /*
6791 * HTML <INS> (Text Inserted) element handler
6792 */
6793 int
html_ins(HANDLER_S * hd,int ch,int cmd)6794 html_ins(HANDLER_S *hd, int ch, int cmd)
6795 {
6796 if(PASS_HTML(hd->html_data)){
6797 if(cmd == GF_DATA){
6798 html_handoff(hd, ch);
6799 }
6800 else if(cmd == GF_RESET){
6801 html_output_raw_tag(hd->html_data, "ins");
6802 }
6803 else if(cmd == GF_EOD){
6804 html_output_string(hd->html_data, "</ins>");
6805 }
6806
6807 return(1);
6808 }
6809
6810 return(0);
6811 }
6812
6813
6814 /*
6815 * HTML <DEL> (Text Deleted) element handler
6816 */
6817 int
html_del(HANDLER_S * hd,int ch,int cmd)6818 html_del(HANDLER_S *hd, int ch, int cmd)
6819 {
6820 if(PASS_HTML(hd->html_data)){
6821 if(cmd == GF_DATA){
6822 html_handoff(hd, ch);
6823 }
6824 else if(cmd == GF_RESET){
6825 html_output_raw_tag(hd->html_data, "del");
6826 }
6827 else if(cmd == GF_EOD){
6828 html_output_string(hd->html_data, "</del>");
6829 }
6830
6831 return(1);
6832 }
6833
6834 return(0);
6835 }
6836
6837
6838 /*
6839 * HTML <ABBR> (Text Abbreviation) element handler
6840 */
6841 int
html_abbr(HANDLER_S * hd,int ch,int cmd)6842 html_abbr(HANDLER_S *hd, int ch, int cmd)
6843 {
6844 if(PASS_HTML(hd->html_data)){
6845 if(cmd == GF_DATA){
6846 html_handoff(hd, ch);
6847 }
6848 else if(cmd == GF_RESET){
6849 html_output_raw_tag(hd->html_data, "abbr");
6850 }
6851 else if(cmd == GF_EOD){
6852 html_output_string(hd->html_data, "</abbr>");
6853 }
6854
6855 return(1);
6856 }
6857
6858 return(0);
6859 }
6860
6861
6862 /*
6863 * HTML <SCRIPT> element handler
6864 */
6865 int
html_script(HANDLER_S * hd,int ch,int cmd)6866 html_script(HANDLER_S *hd, int ch, int cmd)
6867 {
6868 /* Link in and drop everything within on the floor */
6869 return(1);
6870 }
6871
6872
6873 /*
6874 * HTML <APPLET> element handler
6875 */
6876 int
html_applet(HANDLER_S * hd,int ch,int cmd)6877 html_applet(HANDLER_S *hd, int ch, int cmd)
6878 {
6879 /* Link in and drop everything within on the floor */
6880 return(1);
6881 }
6882
6883
6884 /*
6885 * HTML <STYLE> CSS element handler
6886 */
6887 int
html_style(HANDLER_S * hd,int ch,int cmd)6888 html_style(HANDLER_S *hd, int ch, int cmd)
6889 {
6890 static STORE_S *css_stuff ;
6891
6892 if(PASS_HTML(hd->html_data)){
6893 if(cmd == GF_DATA){
6894 /* collect style settings */
6895 so_writec(ch, css_stuff);
6896 }
6897 else if(cmd == GF_RESET){
6898 if(css_stuff)
6899 so_give(&css_stuff);
6900
6901 css_stuff = so_get(CharStar, NULL, EDIT_ACCESS);
6902 }
6903 else if(cmd == GF_EOD){
6904 /*
6905 * TODO: strip anything mischievous and pass on
6906 */
6907
6908 so_give(&css_stuff);
6909 }
6910 }
6911
6912 return(1);
6913 }
6914
6915 /*
6916 * RSS 2.0 <RSS> version
6917 */
6918 int
rss_rss(HANDLER_S * hd,int ch,int cmd)6919 rss_rss(HANDLER_S *hd, int ch, int cmd)
6920 {
6921 if(cmd == GF_RESET){
6922 PARAMETER *p;
6923
6924 for(p = HD(hd->html_data)->el_data->attribs;
6925 p && p->attribute;
6926 p = p->next)
6927 if(!strucmp(p->attribute, "VERSION")){
6928 if(p->value && !strucmp(p->value,"2.0"))
6929 return(0); /* do not link in */
6930 }
6931
6932 gf_error("Incompatible RSS version");
6933 /* NO RETURN */
6934 }
6935
6936 return(0); /* not linked or error means we never get here */
6937 }
6938
6939 /*
6940 * RSS 2.0 <CHANNEL>
6941 */
6942 int
rss_channel(HANDLER_S * hd,int ch,int cmd)6943 rss_channel(HANDLER_S *hd, int ch, int cmd)
6944 {
6945 if(cmd == GF_DATA){
6946 html_handoff(hd, ch);
6947 }
6948 else if(cmd == GF_RESET){
6949 RSS_FEED_S *feed;
6950
6951 feed = RSS_FEED(hd->html_data) = fs_get(sizeof(RSS_FEED_S));
6952 memset(feed, 0, sizeof(RSS_FEED_S));
6953 }
6954
6955 return(1); /* link in */
6956 }
6957
6958 /*
6959 * RSS 2.0 <TITLE>
6960 */
6961 int
rss_title(HANDLER_S * hd,int ch,int cmd)6962 rss_title(HANDLER_S *hd, int ch, int cmd)
6963 {
6964 static STORE_S *title_so;
6965
6966 if(cmd == GF_DATA){
6967 /* collect data */
6968 if(title_so){
6969 so_writec(ch, title_so);
6970 }
6971 }
6972 else if(cmd == GF_RESET){
6973 if(RSS_FEED(hd->html_data)){
6974 /* prepare for data */
6975 if(title_so)
6976 so_give(&title_so);
6977
6978 title_so = so_get(CharStar, NULL, EDIT_ACCESS);
6979 }
6980 }
6981 else if(cmd == GF_EOD){
6982 if(title_so){
6983 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6984 RSS_ITEM_S *rip;
6985
6986 if(feed){
6987 if((rip = feed->items) != NULL){
6988 for(; rip->next; rip = rip->next)
6989 ;
6990
6991 if(rip->title)
6992 fs_give((void **) &rip->title);
6993
6994 rip->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6995 }
6996 else{
6997 if(feed->title)
6998 fs_give((void **) &feed->title);
6999
7000 feed->title = cpystr(rss_skip_whitespace(so_text(title_so)));
7001 }
7002 }
7003
7004 so_give(&title_so);
7005 }
7006 }
7007
7008 return(1); /* link in */
7009 }
7010
7011 /*
7012 * RSS 2.0 <IMAGE>
7013 */
7014 int
rss_image(HANDLER_S * hd,int ch,int cmd)7015 rss_image(HANDLER_S *hd, int ch, int cmd)
7016 {
7017 static STORE_S *img_so;
7018
7019 if(cmd == GF_DATA){
7020 /* collect data */
7021 if(img_so){
7022 so_writec(ch, img_so);
7023 }
7024 }
7025 else if(cmd == GF_RESET){
7026 if(RSS_FEED(hd->html_data)){
7027 /* prepare to collect data */
7028 if(img_so)
7029 so_give(&img_so);
7030
7031 img_so = so_get(CharStar, NULL, EDIT_ACCESS);
7032 }
7033 }
7034 else if(cmd == GF_EOD){
7035 if(img_so){
7036 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7037
7038 if(feed){
7039 if(feed->image)
7040 fs_give((void **) &feed->image);
7041
7042 feed->image = cpystr(rss_skip_whitespace(so_text(img_so)));
7043 }
7044
7045 so_give(&img_so);
7046 }
7047 }
7048
7049 return(1); /* link in */
7050 }
7051
7052 /*
7053 * RSS 2.0 <LINK>
7054 */
7055 int
rss_link(HANDLER_S * hd,int ch,int cmd)7056 rss_link(HANDLER_S *hd, int ch, int cmd)
7057 {
7058 static STORE_S *link_so;
7059
7060 if(cmd == GF_DATA){
7061 /* collect data */
7062 if(link_so){
7063 so_writec(ch, link_so);
7064 }
7065 }
7066 else if(cmd == GF_RESET){
7067 if(RSS_FEED(hd->html_data)){
7068 /* prepare to collect data */
7069 if(link_so)
7070 so_give(&link_so);
7071
7072 link_so = so_get(CharStar, NULL, EDIT_ACCESS);
7073 }
7074 }
7075 else if(cmd == GF_EOD){
7076 if(link_so){
7077 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7078 RSS_ITEM_S *rip;
7079
7080 if(feed){
7081 if((rip = feed->items) != NULL){
7082 for(; rip->next; rip = rip->next)
7083 ;
7084
7085 if(rip->link)
7086 fs_give((void **) &rip->link);
7087
7088 rip->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7089 }
7090 else{
7091 if(feed->link)
7092 fs_give((void **) &feed->link);
7093
7094 feed->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7095 }
7096 }
7097
7098 so_give(&link_so);
7099 }
7100 }
7101
7102 return(1); /* link in */
7103 }
7104
7105 /*
7106 * RSS 2.0 <DESCRIPTION>
7107 */
7108 int
rss_description(HANDLER_S * hd,int ch,int cmd)7109 rss_description(HANDLER_S *hd, int ch, int cmd)
7110 {
7111 static STORE_S *desc_so;
7112
7113 if(cmd == GF_DATA){
7114 /* collect data */
7115 if(desc_so){
7116 so_writec(ch, desc_so);
7117 }
7118 }
7119 else if(cmd == GF_RESET){
7120 if(RSS_FEED(hd->html_data)){
7121 /* prepare to collect data */
7122 if(desc_so)
7123 so_give(&desc_so);
7124
7125 desc_so = so_get(CharStar, NULL, EDIT_ACCESS);
7126 }
7127 }
7128 else if(cmd == GF_EOD){
7129 if(desc_so){
7130 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7131 RSS_ITEM_S *rip;
7132
7133 if(feed){
7134 if((rip = feed->items) != NULL){
7135 for(; rip->next; rip = rip->next)
7136 ;
7137
7138 if(rip->description)
7139 fs_give((void **) &rip->description);
7140
7141 rip->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7142 }
7143 else{
7144 if(feed->description)
7145 fs_give((void **) &feed->description);
7146
7147 feed->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7148 }
7149 }
7150
7151 so_give(&desc_so);
7152 }
7153 }
7154
7155 return(1); /* link in */
7156 }
7157
7158 /*
7159 * RSS 2.0 <TTL> (in minutes)
7160 */
7161 int
rss_ttl(HANDLER_S * hd,int ch,int cmd)7162 rss_ttl(HANDLER_S *hd, int ch, int cmd)
7163 {
7164 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7165
7166 if(cmd == GF_DATA){
7167 if(isdigit((unsigned char) ch))
7168 feed->ttl = ((feed->ttl * 10) + (ch - '0'));
7169 }
7170 else if(cmd == GF_RESET){
7171 /* prepare to collect data */
7172 feed->ttl = 0;
7173 }
7174 else if(cmd == GF_EOD){
7175 }
7176
7177 return(1); /* link in */
7178 }
7179
7180 /*
7181 * RSS 2.0 <ITEM>
7182 */
7183 int
rss_item(HANDLER_S * hd,int ch,int cmd)7184 rss_item(HANDLER_S *hd, int ch, int cmd)
7185 {
7186 /* BUG: verify no ITEM nesting? */
7187 if(cmd == GF_RESET){
7188 RSS_FEED_S *feed;
7189
7190 if((feed = RSS_FEED(hd->html_data)) != NULL){
7191 RSS_ITEM_S **rip;
7192 int n = 0;
7193
7194 for(rip = &feed->items; *rip; rip = &(*rip)->next)
7195 if(++n > RSS_ITEM_LIMIT)
7196 return(0);
7197
7198 *rip = fs_get(sizeof(RSS_ITEM_S));
7199 memset(*rip, 0, sizeof(RSS_ITEM_S));
7200 }
7201 }
7202
7203 return(0); /* don't link in */
7204 }
7205
7206
7207 char *
rss_skip_whitespace(char * s)7208 rss_skip_whitespace(char *s)
7209 {
7210 for(; *s && isspace((unsigned char) *s); s++)
7211 ;
7212
7213 return(s);
7214 }
7215
7216
7217 /*
7218 * return the function associated with the given element name
7219 */
7220 ELPROP_S *
element_properties(FILTER_S * fd,char * el_name)7221 element_properties(FILTER_S *fd, char *el_name)
7222 {
7223 register ELPROP_S *el_table = ELEMENTS(fd);
7224 size_t len_name = strlen(el_name);
7225
7226 for(; el_table->element; el_table++)
7227 if(!strucmp(el_name, el_table->element)
7228 || (el_table->alternate
7229 && len_name == el_table->len + 1
7230 && el_name[el_table->len] == '/'
7231 && !struncmp(el_name, el_table->element, el_table->len)))
7232 return(el_table);
7233
7234 return(NULL);
7235 }
7236
7237
7238 /*
7239 * collect element's name and any attribute/value pairs then
7240 * dispatch to the appropriate handler.
7241 *
7242 * Returns 1 : got what we wanted
7243 * 0 : we need more data
7244 * -1 : bogus input
7245 */
7246 int
html_element_collector(FILTER_S * fd,int ch)7247 html_element_collector(FILTER_S *fd, int ch)
7248 {
7249 if(ch == '>'){
7250 if(ED(fd)->overrun){
7251 /*
7252 * If problem processing, don't bother doing anything
7253 * internally, just return such that none of what we've
7254 * digested is displayed.
7255 */
7256 HTML_DEBUG_EL("too long", ED(fd));
7257 return(1); /* Let it go, Jim */
7258 }
7259 else if(ED(fd)->mkup_decl){
7260 if(ED(fd)->badform){
7261 dprint((2, "-- html error: bad form: %.*s\n",
7262 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7263 /*
7264 * Invalid comment -- make some guesses as
7265 * to whether we should stop with this greater-than...
7266 */
7267 if(ED(fd)->buf[0] != '-'
7268 || ED(fd)->len < 4
7269 || (ED(fd)->buf[1] == '-'
7270 && ED(fd)->buf[ED(fd)->len - 1] == '-'
7271 && ED(fd)->buf[ED(fd)->len - 2] == '-'))
7272 return(1);
7273 }
7274 else{
7275 dprint((5, "-- html: OK: %.*s\n",
7276 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7277 if(ED(fd)->start_comment == ED(fd)->end_comment){
7278 if(ED(fd)->len > 10){
7279 ED(fd)->buf[ED(fd)->len - 2] = '\0';
7280 html_element_comment(fd, ED(fd)->buf + 2);
7281 }
7282
7283 return(1);
7284 }
7285 /* else keep collecting comment below */
7286 }
7287 }
7288 else if(ED(fd)->proc_inst){
7289 return(1); /* return without display... */
7290 }
7291 else if(!ED(fd)->quoted || ED(fd)->badform){
7292 ELPROP_S *ep;
7293
7294 /*
7295 * We either have the whole thing or all that we could
7296 * salvage from it. Try our best...
7297 */
7298
7299 if(HD(fd)->bitbucket)
7300 return(1); /* element inside chtml clause! */
7301
7302 if(!ED(fd)->badform && html_element_flush(ED(fd)))
7303 return(1); /* return without display... */
7304
7305 /*
7306 * If we ran into an empty tag or we don't know how to deal
7307 * with it, just go on, ignoring it...
7308 */
7309 if(ED(fd)->element && (ep = element_properties(fd, ED(fd)->element))){
7310 if(ep->handler){
7311 /* dispatch the element's handler */
7312 HTML_DEBUG_EL(ED(fd)->end_tag ? "POP" : "PUSH", ED(fd));
7313 if(ED(fd)->end_tag){
7314 html_pop(fd, ep); /* remove it's handler */
7315 }
7316 else{
7317 /* if a block element, pop any open <p>'s */
7318 if(ep->blocklevel){
7319 HANDLER_S *tp;
7320
7321 for(tp = HANDLERS(fd); tp && EL(tp)->handler == html_p; tp = tp->below){
7322 HTML_DEBUG_EL("Unclosed <P>", ED(fd));
7323 html_pop(fd, EL(tp));
7324 break;
7325 }
7326 }
7327
7328 /* enforce table nesting */
7329 if(!strucmp(ep->element, "tr")){
7330 if(!HANDLERS(fd) || (strucmp(EL(HANDLERS(fd))->element, "table") && strucmp(EL(HANDLERS(fd))->element, "tbody") && strucmp(EL(HANDLERS(fd))->element, "thead"))){
7331 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd)) ? EL(HANDLERS(fd))->element : "NO-HANDLERS"));
7332 if(HANDLERS(fd) && !strucmp(EL(HANDLERS(fd))->element,"tr")){
7333 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7334 html_pop(fd, EL(HANDLERS(fd)));
7335 }
7336 else{
7337 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7338 html_push(fd, element_properties(fd, "table"));
7339 }
7340 }
7341 }
7342 else if(!strucmp(ep->element, "td") || !strucmp(ep->element, "th")){
7343 if(!HANDLERS(fd)){
7344 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7345 html_push(fd, element_properties(fd, "table"));
7346 html_push(fd, element_properties(fd, "tr"));
7347 }
7348 else if(strucmp(EL(HANDLERS(fd))->element, "tr")){
7349 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd))->element));
7350 html_push(fd, element_properties(fd, "tr"));
7351 }
7352 else if(!strucmp(EL(HANDLERS(fd))->element, "td")){
7353 dprint((2, "-- html error: bad nesting popping <TD>"));
7354 html_pop(fd, EL(HANDLERS(fd)));
7355 }
7356 }
7357
7358 /* add it's handler */
7359 if(html_push(fd, ep)){
7360 if(ED(fd)->empty){
7361 /* remove empty element */
7362 html_pop(fd, ep);
7363 }
7364 }
7365 }
7366 }
7367 else {
7368 HTML_DEBUG_EL("IGNORED", ED(fd));
7369 }
7370 }
7371 else{ /* else, empty or unrecognized */
7372 HTML_DEBUG_EL("?", ED(fd));
7373 }
7374
7375 return(1); /* all done! see, that didn't hurt */
7376 }
7377 }
7378 else if(ch == '/' && ED(fd)->element && ED(fd)->len){
7379 ED(fd)->empty = 1;
7380 }
7381 else
7382 ED(fd)->empty = 0;
7383
7384 if(ED(fd)->mkup_decl){
7385 if((ch &= 0xff) == '-'){
7386 if(ED(fd)->hyphen){
7387 ED(fd)->hyphen = 0;
7388 if(ED(fd)->start_comment)
7389 ED(fd)->end_comment = 1;
7390 else
7391 ED(fd)->start_comment = 1;
7392 }
7393 else
7394 ED(fd)->hyphen = 1;
7395 }
7396 else{
7397 if(ED(fd)->end_comment)
7398 ED(fd)->start_comment = ED(fd)->end_comment = 0;
7399
7400 /*
7401 * no "--" after ! or non-whitespace between comments - bad
7402 */
7403 if(ED(fd)->len < 2 || (!ED(fd)->start_comment
7404 && !ASCII_ISSPACE((unsigned char) ch)))
7405 ED(fd)->badform = 1; /* non-comment! */
7406
7407 ED(fd)->hyphen = 0;
7408 }
7409
7410 /*
7411 * Remember the comment for possible later processing, if
7412 * it gets too long, remember first and last few chars
7413 * so we know when to terminate (and throw some garbage
7414 * in between when we toss out what's between.
7415 */
7416 if(ED(fd)->len == HTML_BUF_LEN){
7417 ED(fd)->buf[2] = ED(fd)->buf[3] = 'X';
7418 ED(fd)->buf[4] = ED(fd)->buf[ED(fd)->len - 2];
7419 ED(fd)->buf[5] = ED(fd)->buf[ED(fd)->len - 1];
7420 ED(fd)->len = 6;
7421 }
7422
7423 ED(fd)->buf[(ED(fd)->len)++] = ch;
7424 return(0); /* comments go in the bit bucket */
7425 }
7426 else if(ED(fd)->overrun || ED(fd)->badform){
7427 return(0); /* swallow char's until next '>' */
7428 }
7429 else if(!ED(fd)->element && !ED(fd)->len){
7430 if(ch == '/'){ /* validate leading chars */
7431 ED(fd)->end_tag = 1;
7432 return(0);
7433 }
7434 else if(ch == '!'){
7435 ED(fd)->mkup_decl = 1;
7436 return(0);
7437 }
7438 else if(ch == '?'){
7439 ED(fd)->proc_inst = 1;
7440 return(0);
7441 }
7442 else if(!isalpha((unsigned char) ch))
7443 return(-1); /* can't be a tag! */
7444 }
7445 else if(ch == '\"' || ch == '\''){
7446 if(!ED(fd)->hit_equal){
7447 ED(fd)->badform = 1; /* quote in element name?!? */
7448 return(0);
7449 }
7450
7451 if(ED(fd)->quoted){
7452 if(ED(fd)->quoted == (char) ch){
7453 /* end of a quoted value */
7454 ED(fd)->quoted = 0;
7455 if(ED(fd)->len && html_element_flush(ED(fd)))
7456 ED(fd)->badform = 1;
7457
7458 return(0); /* continue collecting chars */
7459 }
7460 /* ELSE fall thru writing other quoting char */
7461 }
7462 else{
7463 ED(fd)->quoted = (char) ch;
7464 ED(fd)->was_quoted = 1;
7465 return(0); /* need more data */
7466 }
7467 }
7468 else if (ASCII_ISSPACE((unsigned char) ch))
7469 ED(fd)->unquoted_data = 0;
7470 else if (ED(fd)->hit_equal)
7471 ED(fd)->unquoted_data = 1;
7472
7473 ch &= 0xff; /* strip any "literal" high bits */
7474 if(ED(fd)->quoted
7475 || ED(fd)->unquoted_data
7476 || isalnum(ch)
7477 || strchr("#-.!", ch)){
7478 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7479 ? HTML_BUF_LEN:MAX_ELEMENT)){
7480 ED(fd)->buf[(ED(fd)->len)++] = ch;
7481 }
7482 else
7483 ED(fd)->overrun = 1; /* flag it broken */
7484 }
7485 else if(ASCII_ISSPACE((unsigned char) ch) || ch == '='){
7486 if((ED(fd)->len || ED(fd)->was_quoted) && html_element_flush(ED(fd))){
7487 ED(fd)->badform = 1;
7488 return(0); /* else, we ain't done yet */
7489 }
7490
7491 if(!ED(fd)->hit_equal)
7492 ED(fd)->hit_equal = (ch == '=');
7493 }
7494 else if(ch == '/' && ED(fd)->len && !ED(fd)->element){
7495 ELPROP_S *ep;
7496 ep = element_properties(fd, ED(fd)->buf);
7497 if(ep){
7498 if(!ep->alternate)
7499 ED(fd)->badform = 1;
7500 else{
7501 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7502 ? HTML_BUF_LEN:MAX_ELEMENT)){
7503 ED(fd)->buf[(ED(fd)->len)++] = ch; /* add this exception */
7504 }
7505 else
7506 ED(fd)->overrun = 1;
7507 }
7508 }
7509 else
7510 ED(fd)->badform = 1;
7511 }
7512 else
7513 ED(fd)->badform = 1; /* unrecognized data?? */
7514
7515 return(0); /* keep collecting */
7516 }
7517
7518
7519 /*
7520 * Element collector found complete string, integrate it and reset
7521 * internal collection buffer.
7522 *
7523 * Returns zero if element collection buffer flushed, error flag otherwise
7524 */
7525 int
html_element_flush(CLCTR_S * el_data)7526 html_element_flush(CLCTR_S *el_data)
7527 {
7528 int rv = 0;
7529
7530 if(el_data->hit_equal){ /* adding a value */
7531 el_data->hit_equal = 0;
7532 if(el_data->cur_attrib){
7533 if(!el_data->cur_attrib->value){
7534 el_data->cur_attrib->value = cpystr(el_data->len
7535 ? el_data->buf : "");
7536 }
7537 else{
7538 dprint((2, "** element: unexpected value: %.10s...\n",
7539 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7540 rv = 1;
7541 }
7542 }
7543 else{
7544 dprint((2, "** element: missing attribute name: %.10s...\n",
7545 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7546 rv = 2;
7547 }
7548 }
7549 else if(el_data->len){
7550 if(!el_data->element){
7551 el_data->element = cpystr(el_data->buf);
7552 }
7553 else{
7554 PARAMETER *p = (PARAMETER *)fs_get(sizeof(PARAMETER));
7555 memset(p, 0, sizeof(PARAMETER));
7556 if(el_data->attribs){
7557 el_data->cur_attrib->next = p;
7558 el_data->cur_attrib = p;
7559 }
7560 else
7561 el_data->attribs = el_data->cur_attrib = p;
7562
7563 p->attribute = cpystr(el_data->buf);
7564 }
7565
7566 }
7567
7568 el_data->was_quoted = 0; /* reset collector buf and state */
7569 el_data->len = 0;
7570 memset(el_data->buf, 0, HTML_BUF_LEN);
7571 return(rv); /* report whatever happened above */
7572 }
7573
7574
7575 /*
7576 * html_element_comment - "Special" comment handling here
7577 */
7578 void
html_element_comment(FILTER_S * f,char * s)7579 html_element_comment(FILTER_S *f, char *s)
7580 {
7581 char *p;
7582
7583 while(*s && ASCII_ISSPACE((unsigned char) *s))
7584 s++;
7585
7586 /*
7587 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7588 */
7589 if(!struncmp(s, "chtml ", 6)){
7590 s += 6;
7591 if(!struncmp(s, "if ", 3)){
7592 HD(f)->bitbucket = 1; /* default is failure! */
7593 switch(*(s += 3)){
7594 case 'P' :
7595 case 'p' :
7596 if(!struncmp(s + 1, "inemode=", 8)){
7597 if(!strucmp(s = removing_quotes(s + 9), "function_key")
7598 && F_ON(F_USE_FK, ps_global))
7599 HD(f)->bitbucket = 0;
7600 else if(!strucmp(s, "running"))
7601 HD(f)->bitbucket = 0;
7602 #ifdef _WINDOWS
7603 else if(!strucmp(s, "os_windows"))
7604 HD(f)->bitbucket = 0;
7605 #endif
7606 }
7607
7608 break;
7609
7610 case '[' : /* test */
7611 if((p = strindex(++s, ']')) != NULL){
7612 *p = '\0'; /* tie off test string */
7613 removing_leading_white_space(s);
7614 removing_trailing_white_space(s);
7615 if(*s == '-' && *(s+1) == 'r'){ /* readable file? */
7616 for(s += 2; *s && ASCII_ISSPACE((unsigned char) *s); s++)
7617 ;
7618
7619
7620 HD(f)->bitbucket = (can_access(CHTML_VAR_EXPAND(removing_quotes(s)),
7621 READ_ACCESS) != 0);
7622 }
7623 }
7624
7625 break;
7626
7627 default :
7628 break;
7629 }
7630 }
7631 else if(!strucmp(s, "else")){
7632 HD(f)->bitbucket = !HD(f)->bitbucket;
7633 }
7634 else if(!strucmp(s, "endif")){
7635 /* Clean up after chtml here */
7636 HD(f)->bitbucket = 0;
7637 }
7638 }
7639 else if(!HD(f)->bitbucket){
7640 if(!struncmp(s, "#include ", 9)){
7641 char buf[MAILTMPLEN], *bufp;
7642 int len, end_of_line;
7643 FILE *fp;
7644
7645 /* Include the named file */
7646 if(!struncmp(s += 9, "file=", 5)
7647 && (fp = our_fopen(CHTML_VAR_EXPAND(removing_quotes(s+5)), "r"))){
7648 html_element_output(f, HTML_NEWLINE);
7649
7650 while(fgets(buf, sizeof(buf), fp)){
7651 if((len = strlen(buf)) && buf[len-1] == '\n'){
7652 end_of_line = 1;
7653 buf[--len] = '\0';
7654 }
7655 else
7656 end_of_line = 0;
7657
7658 for(bufp = buf; len; bufp++, len--)
7659 html_element_output(f, (int) *bufp);
7660
7661 if(end_of_line)
7662 html_element_output(f, HTML_NEWLINE);
7663 }
7664
7665 fclose(fp);
7666 html_element_output(f, HTML_NEWLINE);
7667 HD(f)->blanks = 0;
7668 if(f->f1 == WSPACE)
7669 f->f1 = DFL;
7670 }
7671 }
7672 else if(!struncmp(s, "#echo ", 6)){
7673 if(!struncmp(s += 6, "var=", 4)){
7674 char *p, buf[MAILTMPLEN];
7675 ADDRESS *adr;
7676 extern char datestamp[];
7677
7678 if(!strcmp(s = removing_quotes(s + 4), "ALPINE_VERSION")){
7679 p = ALPINE_VERSION;
7680 }
7681 else if(!strcmp(s, "ALPINE_REVISION")){
7682 p = get_alpine_revision_string(buf, sizeof(buf));
7683 }
7684 else if(!strcmp(s, "C_CLIENT_VERSION")){
7685 p = CCLIENTVERSION;
7686 }
7687 else if(!strcmp(s, "ALPINE_COMPILE_DATE")){
7688 p = datestamp;
7689 }
7690 else if(!strcmp(s, "ALPINE_TODAYS_DATE")){
7691 rfc822_date(p = buf);
7692 }
7693 else if(!strcmp(s, "_LOCAL_FULLNAME_")){
7694 p = (ps_global->VAR_LOCAL_FULLNAME
7695 && ps_global->VAR_LOCAL_FULLNAME[0])
7696 ? ps_global->VAR_LOCAL_FULLNAME
7697 : "Local Support";
7698 }
7699 else if(!strcmp(s, "_LOCAL_ADDRESS_")){
7700 p = (ps_global->VAR_LOCAL_ADDRESS
7701 && ps_global->VAR_LOCAL_ADDRESS[0])
7702 ? ps_global->VAR_LOCAL_ADDRESS
7703 : "postmaster";
7704 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7705 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7706 mail_free_address(&adr);
7707 }
7708 else if(!strcmp(s, "_BUGS_FULLNAME_")){
7709 p = (ps_global->VAR_BUGS_FULLNAME
7710 && ps_global->VAR_BUGS_FULLNAME[0])
7711 ? ps_global->VAR_BUGS_FULLNAME
7712 : "Place to report Alpine Bugs";
7713 }
7714 else if(!strcmp(s, "_BUGS_ADDRESS_")){
7715 p = (ps_global->VAR_BUGS_ADDRESS
7716 && ps_global->VAR_BUGS_ADDRESS[0])
7717 ? ps_global->VAR_BUGS_ADDRESS : "postmaster";
7718 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7719 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7720 mail_free_address(&adr);
7721 }
7722 else if(!strcmp(s, "CURRENT_DIR")){
7723 getcwd(p = buf, sizeof(buf));
7724 }
7725 else if(!strcmp(s, "HOME_DIR")){
7726 p = ps_global->home_dir;
7727 }
7728 else if(!strcmp(s, "PINE_CONF_PATH")){
7729 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7730 p = "/usr/local/lib/pine.conf";
7731 #else
7732 p = SYSTEM_PINERC;
7733 #endif
7734 }
7735 else if(!strcmp(s, "PINE_CONF_FIXED_PATH")){
7736 #ifdef SYSTEM_PINERC_FIXED
7737 p = SYSTEM_PINERC_FIXED;
7738 #else
7739 p = "/usr/local/lib/pine.conf.fixed";
7740 #endif
7741 }
7742 else if(!strcmp(s, "PINE_INFO_PATH")){
7743 p = SYSTEM_PINE_INFO_PATH;
7744 }
7745 else if(!strcmp(s, "MAIL_SPOOL_PATH")){
7746 p = sysinbox();
7747 }
7748 else if(!strcmp(s, "MAIL_SPOOL_LOCK_PATH")){
7749 /* Don't put the leading /tmp/. */
7750 int i, j;
7751
7752 p = sysinbox();
7753 if(p){
7754 for(j = 0, i = 0; p[i] && j < MAILTMPLEN - 1; i++){
7755 if(p[i] == '/')
7756 buf[j++] = '\\';
7757 else
7758 buf[j++] = p[i];
7759 }
7760 buf[j++] = '\0';
7761 p = buf;
7762 }
7763 }
7764 else if(!struncmp(s, "VAR_", 4)){
7765 p = s+4;
7766 if(pith_opt_pretty_var_name)
7767 p = (*pith_opt_pretty_var_name)(p);
7768 }
7769 else if(!struncmp(s, "FEAT_", 5)){
7770 p = s+5;
7771 if(pith_opt_pretty_feature_name)
7772 p = (*pith_opt_pretty_feature_name)(p, -1);
7773 }
7774 else
7775 p = NULL;
7776
7777 if(p){
7778 if(f->f1 == WSPACE){
7779 html_element_output(f, ' ');
7780 f->f1 = DFL; /* clear it */
7781 }
7782
7783 while(*p)
7784 html_element_output(f, (int) *p++);
7785 }
7786 }
7787 }
7788 }
7789 }
7790
7791
7792 void
html_element_output(FILTER_S * f,int ch)7793 html_element_output(FILTER_S *f, int ch)
7794 {
7795 if(HANDLERS(f))
7796 (*EL(HANDLERS(f))->handler)(HANDLERS(f), ch, GF_DATA);
7797 else
7798 html_output(f, ch);
7799 }
7800
7801 /*
7802 * collect html entity and return its UCS value when done.
7803 *
7804 * Returns HTML_MOREDATA : we need more data
7805 * HTML_ENTITY : entity collected
7806 * HTML_BADVALUE : good data, but no named match or out of range
7807 * HTML_BADDATA : invalid input
7808 *
7809 * NOTES:
7810 * - entity format is "'&' tag ';'" and represents a literal char
7811 * - named entities are CASE SENSITIVE.
7812 * - numeric char references (where the tag is prefixed with a '#')
7813 * are a char with that numbers value
7814 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7815 */
7816 int
html_entity_collector(FILTER_S * f,int ch,UCS * ucs,char ** alt)7817 html_entity_collector(FILTER_S *f, int ch, UCS *ucs, char **alt)
7818 {
7819 static int len = 0;
7820 static char buf[MAX_ENTITY+2];
7821 int rv, i;
7822
7823 if(len == MAX_ENTITY){
7824 rv = HTML_BADDATA;
7825 }
7826 else if((len == 0)
7827 ? (isalpha((unsigned char) ch) || ch == '#')
7828 : ((isdigit((unsigned char) ch)
7829 || (len == 1 && (unsigned char) ch == 'x')
7830 || (len == 1 &&(unsigned char) ch == 'X')
7831 || (len > 1 && isxdigit((unsigned char) ch))
7832 || (isalpha((unsigned char) ch) && buf[0] != '#')))){
7833 buf[len++] = ch;
7834 return(HTML_MOREDATA);
7835 }
7836 else if(ch == ';' || ASCII_ISSPACE((unsigned char) ch)){
7837 buf[len] = '\0'; /* got something! */
7838 if(buf[0] == '#'){
7839 if(buf[1] == 'x' || buf[1] == 'X')
7840 *ucs = (UCS) strtoul(&buf[2], NULL, 16);
7841 else
7842 *ucs = (UCS) strtoul(&buf[1], NULL, 10);
7843
7844 if(alt){
7845 *alt = NULL;
7846 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7847 if(entity_tab[i].value == *ucs){
7848 *alt = entity_tab[i].plain;
7849 break;
7850 }
7851 }
7852
7853 len = 0;
7854 return(HTML_ENTITY);
7855 }
7856 else{
7857 rv = HTML_BADVALUE; /* in case of no match */
7858 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7859 if(strcmp(entity_tab[i].name, buf) == 0){
7860 *ucs = entity_tab[i].value;
7861 if(alt)
7862 *alt = entity_tab[i].plain;
7863
7864 len = 0;
7865 return(HTML_ENTITY);
7866 }
7867 }
7868 }
7869 else
7870 rv = HTML_BADDATA; /* bogus input! */
7871
7872 if(alt){
7873 buf[len] = '\0';
7874 *alt = buf;
7875 }
7876
7877 len = 0;
7878 return(rv);
7879 }
7880
7881
7882 /*----------------------------------------------------------------------
7883 HTML text to plain text filter
7884
7885 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7886 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7887 formatting.
7888
7889 ----*/
7890 void
gf_html2plain(FILTER_S * f,int flg)7891 gf_html2plain(FILTER_S *f, int flg)
7892 {
7893 /* BUG: quote incoming \255 values (see "yuml" above!) */
7894 if(flg == GF_DATA){
7895 register int c;
7896 GF_INIT(f, f->next);
7897
7898 if(!HTML_WROTE(f)){
7899 int ii;
7900
7901 for(ii = HTML_INDENT(f); ii > 0; ii--)
7902 html_putc(f, ' ');
7903
7904 HTML_WROTE(f) = 1;
7905 }
7906
7907 while(GF_GETC(f, c)){
7908 /*
7909 * First we have to collect any literal entities...
7910 * that is, IF we're not already collecting one
7911 * AND we're not in element's text or, if we are, we're
7912 * not in quoted text. Whew.
7913 */
7914 if(f->t){
7915 char *alt = NULL;
7916 UCS ucs;
7917
7918 switch(html_entity_collector(f, c, &ucs, &alt)){
7919 case HTML_MOREDATA: /* more data required? */
7920 continue; /* go get another char */
7921
7922 case HTML_BADVALUE :
7923 case HTML_BADDATA :
7924 /* if supplied, process bogus data */
7925 HTML_PROC(f, '&');
7926 for(; *alt; alt++){
7927 unsigned int uic = *alt;
7928 HTML_PROC(f, uic);
7929 }
7930
7931 if(c == '&' && !HD(f)->quoted){
7932 f->t = '&';
7933 continue;
7934 }
7935 else
7936 f->t = 0; /* don't come back next time */
7937
7938 break;
7939
7940 default : /* thing to process */
7941 f->t = 0; /* don't come back */
7942
7943 /*
7944 * do something with UCS codepoint. If it's
7945 * not displayable then use the alt version
7946 * otherwise
7947 * cvt UCS to UTF-8 and toss into next filter.
7948 */
7949 if(ucs > 127 && wcellwidth(ucs) < 0){
7950 if(alt){
7951 for(; *alt; alt++){
7952 c = MAKE_LITERAL(*alt);
7953 HTML_PROC(f, c);
7954 }
7955
7956 continue;
7957 }
7958 else
7959 c = MAKE_LITERAL('?');
7960 }
7961 else{
7962 unsigned char utf8buf[8], *p1, *p2;
7963
7964 p2 = utf8_put(p1 = (unsigned char *) utf8buf, (unsigned long) ucs);
7965 for(; p1 < p2; p1++){
7966 c = MAKE_LITERAL(*p1);
7967 HTML_PROC(f, c);
7968 }
7969
7970 continue;
7971 }
7972
7973 break;
7974 }
7975 }
7976 else if(!PASS_HTML(f) && c == '&' && !HD(f)->quoted){
7977 f->t = '&';
7978 continue;
7979 }
7980
7981 /*
7982 * then we process whatever we got...
7983 */
7984
7985 HTML_PROC(f, c);
7986 }
7987
7988 GF_OP_END(f); /* clean up our input pointers */
7989 }
7990 else if(flg == GF_EOD){
7991 while(HANDLERS(f)){
7992 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f))->element));
7993 html_pop(f, EL(HANDLERS(f)));
7994 }
7995
7996 html_output(f, HTML_NEWLINE);
7997 if(ULINE_BIT(f))
7998 HTML_ULINE(f, ULINE_BIT(f) = 0);
7999
8000 if(BOLD_BIT(f))
8001 HTML_BOLD(f, BOLD_BIT(f) = 0);
8002
8003 HTML_FLUSH(f);
8004 fs_give((void **)&f->line);
8005 if(HD(f)->color)
8006 free_color_pair(&HD(f)->color);
8007
8008 fs_give(&f->data);
8009 if(f->opt){
8010 if(((HTML_OPT_S *)f->opt)->base)
8011 fs_give((void **) &((HTML_OPT_S *)f->opt)->base);
8012
8013 fs_give(&f->opt);
8014 }
8015
8016 (*f->next->f)(f->next, GF_DATA);
8017 (*f->next->f)(f->next, GF_EOD);
8018 }
8019 else if(flg == GF_RESET){
8020 dprint((9, "-- gf_reset html2plain\n"));
8021 f->data = (HTML_DATA_S *) fs_get(sizeof(HTML_DATA_S));
8022 memset(f->data, 0, sizeof(HTML_DATA_S));
8023 /* start with flowing text */
8024 HD(f)->wrapstate = !PASS_HTML(f);
8025 HD(f)->wrapcol = WRAP_COLS(f);
8026 f->f1 = DFL; /* state */
8027 f->f2 = 0; /* chars in wrap buffer */
8028 f->n = 0L; /* chars on line so far */
8029 f->linep = f->line = (char *)fs_get(HTML_BUF_LEN * sizeof(char));
8030 HD(f)->line_bufsize = HTML_BUF_LEN; /* initial bufsize of line */
8031 HD(f)->alt_entity = (!ps_global->display_charmap
8032 || strucmp(ps_global->display_charmap, "iso-8859-1"));
8033 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8034 }
8035 }
8036
8037
8038
8039 /*
8040 * html_indent - do the requested indent level function with appropriate
8041 * flushing and such.
8042 *
8043 * Returns: indent level prior to set/increment
8044 */
8045 int
html_indent(FILTER_S * f,int val,int func)8046 html_indent(FILTER_S *f, int val, int func)
8047 {
8048 int old = HD(f)->indent_level;
8049
8050 /* flush pending data at old indent level */
8051 switch(func){
8052 case HTML_ID_INC :
8053 html_output_flush(f);
8054 if((HD(f)->indent_level += val) < 0)
8055 HD(f)->indent_level = 0;
8056
8057 break;
8058
8059 case HTML_ID_SET :
8060 html_output_flush(f);
8061 HD(f)->indent_level = val;
8062 break;
8063
8064 default :
8065 break;
8066 }
8067
8068 return(old);
8069 }
8070
8071
8072
8073 /*
8074 * html_blanks - Insert n blank lines into output
8075 */
8076 void
html_blank(FILTER_S * f,int n)8077 html_blank(FILTER_S *f, int n)
8078 {
8079 /* Cap off any flowing text, and then write blank lines */
8080 if(f->f2 || f->n || CENTER_BIT(f) || HD(f)->centered || WRAPPED_LEN(f))
8081 html_output(f, HTML_NEWLINE);
8082
8083 if(HD(f)->wrapstate)
8084 while(HD(f)->blanks < n) /* blanks inc'd by HTML_NEWLINE */
8085 html_output(f, HTML_NEWLINE);
8086 }
8087
8088
8089
8090 /*
8091 * html_newline -- insert a newline mindful of embedded tags
8092 */
8093 void
html_newline(FILTER_S * f)8094 html_newline(FILTER_S *f)
8095 {
8096 html_write_newline(f); /* commit an actual newline */
8097
8098 if(f->n){ /* and keep track of blank lines */
8099 HD(f)->blanks = 0;
8100 f->n = 0L;
8101 }
8102 else
8103 HD(f)->blanks++;
8104 }
8105
8106
8107 /*
8108 * output the given char, handling any requested wrapping.
8109 * It's understood that all whitespace handed us is written. In other
8110 * words, junk whitespace is weeded out before it's given to us here.
8111 *
8112 */
8113 void
html_output(FILTER_S * f,int ch)8114 html_output(FILTER_S *f, int ch)
8115 {
8116 UCS uc;
8117 int width;
8118 void (*o_f)(FILTER_S *, int, int, int) = CENTER_BIT(f) ? html_output_centered : html_output_normal;
8119
8120 /*
8121 * if ch is a control token, just pass it on, else, collect
8122 * utf8-encoded characters to determine width,then feed into
8123 * output routines
8124 */
8125 if(ch == TAG_EMBED || HD(f)->embedded.state || (ch > 0xff && IS_LITERAL(ch) == 0)){
8126 (*o_f)(f, ch, 1, 0);
8127 }
8128 else if(utf8_to_ucs4_oneatatime(ch & 0xff, &(HD(f)->cb), &uc, &width)){
8129 unsigned char *cp;
8130
8131 for(cp = HD(f)->cb.cbuf; cp <= HD(f)->cb.cbufend; cp++){
8132 (*o_f)(f, *cp, width, HD(f)->cb.cbufend - cp);
8133 width = 0; /* only count it once */
8134 }
8135
8136 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8137 }
8138 else
8139 HD(f)->cb.cbufend = HD(f)->cb.cbufp;
8140 /* else do nothing until we have a full character */
8141 }
8142
8143
8144 void
html_output_string(FILTER_S * f,char * s)8145 html_output_string(FILTER_S *f, char *s)
8146 {
8147 for(; *s; s++)
8148 html_output(f, *s);
8149 }
8150
8151
8152 void
html_output_raw_tag(FILTER_S * f,char * tag)8153 html_output_raw_tag(FILTER_S *f, char *tag)
8154 {
8155 PARAMETER *p;
8156 char *vp;
8157 int i;
8158
8159 html_output(f, '<');
8160 html_output_string(f, tag);
8161 for(p = HD(f)->el_data->attribs;
8162 p && p->attribute;
8163 p = p->next){
8164 /* SECURITY: no javascript */
8165 /* PRIVACY: no img src without permission */
8166 /* BUGS: no class collisions since <head> ignored */
8167 if(html_event_attribute(p->attribute)
8168 || !strucmp(p->attribute, "class")
8169 || (!PASS_IMAGES(f) && !strucmp(tag, "img") && !strucmp(p->attribute, "src")))
8170 continue;
8171
8172 /* PRIVACY: sniff out background images */
8173 if(p->value && !PASS_IMAGES(f)){
8174 if(!strucmp(p->attribute, "style")){
8175 if((vp = srchstr(p->value, "background-image")) != NULL){
8176 /* neuter in place */
8177 vp[11] = vp[12] = vp[13] = vp[14] = vp[15] = 'X';
8178 }
8179 else{
8180 for(vp = p->value; (vp = srchstr(vp, "background")) != NULL; vp++)
8181 if(vp[10] == ' ' || vp[10] == ':')
8182 for(i = 11; vp[i] && vp[i] != ';'; i++)
8183 if((vp[i] == 'u' && vp[i+1] == 'r' && vp[i+2] == 'l' && vp[i+3] == '(')
8184 || vp[i] == ':' || vp[i] == '/' || vp[i] == '.')
8185 vp[0] = 'X';
8186 }
8187 }
8188 else if(!strucmp(p->attribute, "background")){
8189 char *ip;
8190
8191 for(ip = p->value; *ip && !(*ip == ':' || *ip == '/' || *ip == '.'); ip++)
8192 ;
8193
8194 if(ip)
8195 continue;
8196 }
8197 }
8198
8199 html_output(f, ' ');
8200 html_output_string(f, p->attribute);
8201 if(p->value){
8202 html_output(f, '=');
8203 html_output(f, '\"');
8204 html_output_string(f, p->value);
8205 html_output(f, '\"');
8206 }
8207 }
8208
8209 /* append warning to form submission */
8210 if(!strucmp(tag, "form")){
8211 html_output_string(f, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8212 }
8213
8214 if(ED(f)->end_tag){
8215 html_output(f, ' ');
8216 html_output(f, '/');
8217 }
8218
8219 html_output(f, '>');
8220 }
8221
8222
8223 int
html_event_attribute(char * attr)8224 html_event_attribute(char *attr)
8225 {
8226 int i;
8227 static char *events[] = {
8228 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8229 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8230 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8231 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8232 };
8233
8234 if((attr[0] == 'o' || attr[0] == 'O') && (attr[1] == 'n' || attr[1] == 'N'))
8235 for(i = 0; i < sizeof(events)/sizeof(events[0]); i++)
8236 if(!strucmp(attr, events[i]))
8237 return(TRUE);
8238
8239 return(FALSE);
8240 }
8241
8242
8243 void
html_output_normal(FILTER_S * f,int ch,int width,int remaining)8244 html_output_normal(FILTER_S *f, int ch, int width, int remaining)
8245 {
8246 static int written = 0;
8247 static int cwidth;
8248
8249 if(HD(f)->centered){
8250 html_centered_flush(f);
8251 fs_give((void **) &HD(f)->centered->line.buf);
8252 fs_give((void **) &HD(f)->centered->word.buf);
8253 fs_give((void **) &HD(f)->centered);
8254 }
8255
8256 if(HD(f)->wrapstate){
8257 if(ch == HTML_NEWLINE){ /* hard newline */
8258 html_output_flush(f);
8259 html_newline(f);
8260 }
8261 else
8262 HD(f)->blanks = 0; /* reset blank line counter */
8263
8264 if(ch == TAG_EMBED){ /* takes up no space */
8265 HD(f)->embedded.state = -5;
8266 HTML_LINEP_PUTC(f, TAG_EMBED);
8267 }
8268 else if(HD(f)->embedded.state){ /* ditto */
8269 if(HD(f)->embedded.state == -5){
8270 /* looking for specially handled tags following TAG_EMBED */
8271 if(ch == TAG_HANDLE)
8272 HD(f)->embedded.state = -1; /* next ch is length */
8273 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8274 if(!HD(f)->color)
8275 HD(f)->color = new_color_pair(NULL, NULL);
8276
8277 if(ch == TAG_FGCOLOR)
8278 HD(f)->embedded.color = HD(f)->color->fg;
8279 else
8280 HD(f)->embedded.color = HD(f)->color->bg;
8281
8282 HD(f)->embedded.state = RGBLEN;
8283 }
8284 else
8285 HD(f)->embedded.state = 0; /* non-special */
8286 }
8287 else if(HD(f)->embedded.state > 0){
8288 /* collecting up an RGBLEN color or length, ignore tags */
8289 (HD(f)->embedded.state)--;
8290 if(HD(f)->embedded.color)
8291 *HD(f)->embedded.color++ = ch;
8292
8293 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8294 *HD(f)->embedded.color = '\0';
8295 HD(f)->embedded.color = NULL;
8296 }
8297 }
8298 else if(HD(f)->embedded.state < 0){
8299 HD(f)->embedded.state = ch; /* number of embedded chars */
8300 }
8301 else{
8302 (HD(f)->embedded.state)--;
8303 if(HD(f)->embedded.color)
8304 *HD(f)->embedded.color++ = ch;
8305
8306 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8307 *HD(f)->embedded.color = '\0';
8308 HD(f)->embedded.color = NULL;
8309 }
8310 }
8311
8312 HTML_LINEP_PUTC(f, ch);
8313 }
8314 else if(HTML_ISSPACE(ch)){
8315 html_output_flush(f);
8316 }
8317 else{
8318 if(HD(f)->prefix)
8319 html_a_prefix(f);
8320
8321 if(written == 0)
8322 cwidth = width;
8323
8324 if(f->f2 + cwidth + 1 >= WRAP_COLS(f)){
8325 HTML_LINEP_PUTC(f, ch & 0xff);
8326 written++;
8327 if(remaining == 0){
8328 HTML_FLUSH(f);
8329 html_newline(f);
8330 }
8331 if(HD(f)->in_anchor)
8332 html_write_anchor(f, HD(f)->in_anchor);
8333 }
8334 else{
8335 HTML_LINEP_PUTC(f, ch & 0xff);
8336 written++;
8337 }
8338
8339 if(remaining == 0){
8340 written = 0;
8341 f->f2 += cwidth;
8342 }
8343 }
8344 }
8345 else{
8346 if(HD(f)->prefix)
8347 html_a_prefix(f);
8348
8349 html_output_flush(f);
8350
8351 switch(HD(f)->embedded.state){
8352 case 0 :
8353 switch(ch){
8354 default :
8355 /*
8356 * It's difficult to both preserve whitespace and wrap at the
8357 * same time so we'll do a dumb wrap at the edge of the screen.
8358 * Since this shouldn't come up much in real life we'll hope
8359 * it is good enough.
8360 */
8361 if(!PASS_HTML(f) && (f->n + width) > WRAP_COLS(f))
8362 html_newline(f);
8363
8364 f->n += width; /* inc displayed char count */
8365 HD(f)->blanks = 0; /* reset blank line counter */
8366 html_putc(f, ch & 0xff);
8367 break;
8368
8369 case TAG_EMBED : /* takes up no space */
8370 html_putc(f, TAG_EMBED);
8371 HD(f)->embedded.state = -2;
8372 break;
8373
8374 case HTML_NEWLINE : /* newline handling */
8375 if(!f->n)
8376 break;
8377
8378 case '\n' :
8379 html_newline(f);
8380
8381 case '\r' :
8382 break;
8383 }
8384
8385 break;
8386
8387 case -2 :
8388 HD(f)->embedded.state = 0;
8389 switch(ch){
8390 case TAG_HANDLE :
8391 HD(f)->embedded.state = -1; /* next ch is length */
8392 break;
8393
8394 case TAG_BOLDON :
8395 BOLD_BIT(f) = 1;
8396 break;
8397
8398 case TAG_BOLDOFF :
8399 BOLD_BIT(f) = 0;
8400 break;
8401
8402 case TAG_ULINEON :
8403 ULINE_BIT(f) = 1;
8404 break;
8405
8406 case TAG_ULINEOFF :
8407 ULINE_BIT(f) = 0;
8408 break;
8409
8410 case TAG_FGCOLOR :
8411 if(!HD(f)->color)
8412 HD(f)->color = new_color_pair(NULL, NULL);
8413
8414 HD(f)->embedded.color = HD(f)->color->fg;
8415 HD(f)->embedded.state = 11;
8416 break;
8417
8418 case TAG_BGCOLOR :
8419 if(!HD(f)->color)
8420 HD(f)->color = new_color_pair(NULL, NULL);
8421
8422 HD(f)->embedded.color = HD(f)->color->bg;
8423 HD(f)->embedded.state = 11;
8424 break;
8425
8426 case TAG_HANDLEOFF :
8427 ch = TAG_INVOFF;
8428 HD(f)->in_anchor = 0;
8429 break;
8430
8431 default :
8432 break;
8433 }
8434
8435 html_putc(f, ch);
8436 break;
8437
8438 case -1 :
8439 HD(f)->embedded.state = ch; /* number of embedded chars */
8440 html_putc(f, ch);
8441 break;
8442
8443 default :
8444 HD(f)->embedded.state--;
8445 if(HD(f)->embedded.color)
8446 *HD(f)->embedded.color++ = ch;
8447
8448 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8449 *HD(f)->embedded.color = '\0';
8450 HD(f)->embedded.color = NULL;
8451 }
8452
8453 html_putc(f, ch);
8454 break;
8455 }
8456 }
8457 }
8458
8459
8460 /*
8461 * flush any buffered chars waiting for wrapping.
8462 */
8463 void
html_output_flush(FILTER_S * f)8464 html_output_flush(FILTER_S *f)
8465 {
8466 if(f->f2){
8467 if(f->n && ((int) f->n) + 1 + f->f2 > HD(f)->wrapcol)
8468 html_newline(f); /* wrap? */
8469
8470 if(f->n){ /* text already on the line? */
8471 html_putc(f, ' ');
8472 f->n++; /* increment count */
8473 }
8474 else{
8475 /* write at start of new line */
8476 html_write_indent(f, HD(f)->indent_level);
8477
8478 if(HD(f)->in_anchor)
8479 html_write_anchor(f, HD(f)->in_anchor);
8480 }
8481
8482 f->n += f->f2;
8483 HTML_FLUSH(f);
8484 }
8485 }
8486
8487
8488
8489 /*
8490 * html_output_centered - managed writing centered text
8491 */
8492 void
html_output_centered(FILTER_S * f,int ch,int width,int remaining)8493 html_output_centered(FILTER_S *f, int ch, int width, int remaining)
8494 {
8495 static int written;
8496 static int cwidth;
8497
8498 if(!HD(f)->centered){ /* new text? */
8499 html_output_flush(f);
8500 if(f->n) /* start on blank line */
8501 html_newline(f);
8502
8503 HD(f)->centered = (CENTER_S *) fs_get(sizeof(CENTER_S));
8504 memset(HD(f)->centered, 0, sizeof(CENTER_S));
8505 /* and grab a buf to start collecting centered text */
8506 HD(f)->centered->line.len = WRAP_COLS(f);
8507 HD(f)->centered->line.buf = (char *) fs_get(HD(f)->centered->line.len
8508 * sizeof(char));
8509 HD(f)->centered->line.used = HD(f)->centered->line.width = 0;
8510 HD(f)->centered->word.len = 32;
8511 HD(f)->centered->word.buf = (char *) fs_get(HD(f)->centered->word.len
8512 * sizeof(char));
8513 HD(f)->centered->word.used = HD(f)->centered->word.width = 0;
8514 }
8515
8516 if(ch == HTML_NEWLINE){ /* hard newline */
8517 html_centered_flush(f);
8518 }
8519 else if(ch == TAG_EMBED){ /* takes up no space */
8520 HD(f)->embedded.state = -5;
8521 html_centered_putc(&HD(f)->centered->word, TAG_EMBED);
8522 }
8523 else if(HD(f)->embedded.state){
8524 if(HD(f)->embedded.state == -5){
8525 /* looking for specially handled tags following TAG_EMBED */
8526 if(ch == TAG_HANDLE)
8527 HD(f)->embedded.state = -1; /* next ch is length */
8528 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8529 if(!HD(f)->color)
8530 HD(f)->color = new_color_pair(NULL, NULL);
8531
8532 if(ch == TAG_FGCOLOR)
8533 HD(f)->embedded.color = HD(f)->color->fg;
8534 else
8535 HD(f)->embedded.color = HD(f)->color->bg;
8536
8537 HD(f)->embedded.state = RGBLEN;
8538 }
8539 else
8540 HD(f)->embedded.state = 0; /* non-special */
8541 }
8542 else if(HD(f)->embedded.state > 0){
8543 /* collecting up an RGBLEN color or length, ignore tags */
8544 (HD(f)->embedded.state)--;
8545 if(HD(f)->embedded.color)
8546 *HD(f)->embedded.color++ = ch;
8547
8548 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8549 *HD(f)->embedded.color = '\0';
8550 HD(f)->embedded.color = NULL;
8551 }
8552 }
8553 else if(HD(f)->embedded.state < 0){
8554 HD(f)->embedded.state = ch; /* number of embedded chars */
8555 }
8556 else{
8557 (HD(f)->embedded.state)--;
8558 if(HD(f)->embedded.color)
8559 *HD(f)->embedded.color++ = ch;
8560
8561 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8562 *HD(f)->embedded.color = '\0';
8563 HD(f)->embedded.color = NULL;
8564 }
8565 }
8566
8567 html_centered_putc(&HD(f)->centered->word, ch);
8568 }
8569 else if(ASCII_ISSPACE((unsigned char) ch)){
8570 if(!HD(f)->centered->space++){ /* end of a word? flush! */
8571 int i;
8572
8573 if(WRAPPED_LEN(f) > HD(f)->wrapcol){
8574 html_centered_flush_line(f);
8575 /* fall thru to put current "word" on blank "line" */
8576 }
8577 else if(HD(f)->centered->line.width){
8578 /* put space char between line and appended word */
8579 html_centered_putc(&HD(f)->centered->line, ' ');
8580 HD(f)->centered->line.width++;
8581 }
8582
8583 for(i = 0; i < HD(f)->centered->word.used; i++)
8584 html_centered_putc(&HD(f)->centered->line,
8585 HD(f)->centered->word.buf[i]);
8586
8587 HD(f)->centered->line.width += HD(f)->centered->word.width;
8588 HD(f)->centered->word.used = 0;
8589 HD(f)->centered->word.width = 0;
8590 }
8591 }
8592 else{
8593 if(HD(f)->prefix)
8594 html_a_prefix(f);
8595
8596 /* ch is start of next word */
8597 HD(f)->centered->space = 0;
8598 if(HD(f)->centered->word.width >= WRAP_COLS(f))
8599 html_centered_flush(f);
8600
8601 html_centered_putc(&HD(f)->centered->word, ch);
8602
8603 if(written == 0)
8604 cwidth = width;
8605
8606 written++;
8607
8608 if(remaining == 0){
8609 written = 0;
8610 HD(f)->centered->word.width += cwidth;
8611 }
8612 }
8613 }
8614
8615
8616 /*
8617 * html_centered_putc -- add given char to given WRAPLINE_S
8618 */
8619 void
html_centered_putc(WRAPLINE_S * wp,int ch)8620 html_centered_putc(WRAPLINE_S *wp, int ch)
8621 {
8622 if(wp->used + 1 >= wp->len){
8623 wp->len += 64;
8624 fs_resize((void **) &wp->buf, wp->len * sizeof(char));
8625 }
8626
8627 wp->buf[wp->used++] = ch;
8628 }
8629
8630
8631
8632 /*
8633 * html_centered_flush - finish writing any pending centered output
8634 */
8635 void
html_centered_flush(FILTER_S * f)8636 html_centered_flush(FILTER_S *f)
8637 {
8638 int i;
8639
8640 /*
8641 * If word present (what about line?) we need to deal with
8642 * appending it...
8643 */
8644 if(HD(f)->centered->word.width && WRAPPED_LEN(f) > HD(f)->wrapcol)
8645 html_centered_flush_line(f);
8646
8647 if(WRAPPED_LEN(f)){
8648 /* figure out how much to indent */
8649 if((i = (WRAP_COLS(f) - WRAPPED_LEN(f))/2) > 0)
8650 html_write_indent(f, i);
8651
8652 if(HD(f)->centered->anchor)
8653 html_write_anchor(f, HD(f)->centered->anchor);
8654
8655 html_centered_handle(&HD(f)->centered->anchor,
8656 HD(f)->centered->line.buf,
8657 HD(f)->centered->line.used);
8658 html_write(f, HD(f)->centered->line.buf, HD(f)->centered->line.used);
8659
8660 if(HD(f)->centered->word.used){
8661 if(HD(f)->centered->line.width)
8662 html_putc(f, ' ');
8663
8664 html_centered_handle(&HD(f)->centered->anchor,
8665 HD(f)->centered->word.buf,
8666 HD(f)->centered->word.used);
8667 html_write(f, HD(f)->centered->word.buf,
8668 HD(f)->centered->word.used);
8669 }
8670
8671 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8672 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8673 }
8674 else{
8675 if(HD(f)->centered->word.used){
8676 html_write(f, HD(f)->centered->word.buf,
8677 HD(f)->centered->word.used);
8678 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8679 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8680 }
8681 HD(f)->blanks++; /* advance the blank line counter */
8682 }
8683
8684 html_newline(f); /* finish the line */
8685 }
8686
8687
8688 /*
8689 * html_centered_handle - scan the line for embedded handles
8690 */
8691 void
html_centered_handle(int * h,char * line,int len)8692 html_centered_handle(int *h, char *line, int len)
8693 {
8694 int n;
8695
8696 while(len-- > 0)
8697 if(*line++ == TAG_EMBED && len-- > 0)
8698 switch(*line++){
8699 case TAG_HANDLE :
8700 if((n = *line++) >= --len){
8701 *h = 0;
8702 len -= n;
8703 while(n--)
8704 *h = (*h * 10) + (*line++ - '0');
8705 }
8706 break;
8707
8708 case TAG_HANDLEOFF :
8709 case TAG_INVOFF :
8710 *h = 0; /* assumption 23,342: inverse off ends tags */
8711 break;
8712
8713 default :
8714 break;
8715 }
8716 }
8717
8718
8719
8720 /*
8721 * html_centered_flush_line - flush the centered "line" only
8722 */
8723 void
html_centered_flush_line(FILTER_S * f)8724 html_centered_flush_line(FILTER_S *f)
8725 {
8726 if(HD(f)->centered->line.used){
8727 int i, j;
8728
8729 /* hide "word" from flush */
8730 i = HD(f)->centered->word.used;
8731 j = HD(f)->centered->word.width;
8732 HD(f)->centered->word.used = 0;
8733 HD(f)->centered->word.width = 0;
8734 html_centered_flush(f);
8735
8736 HD(f)->centered->word.used = i;
8737 HD(f)->centered->word.width = j;
8738 }
8739 }
8740
8741
8742 /*
8743 * html_write_indent - write indention mindful of display attributes
8744 */
8745 void
html_write_indent(FILTER_S * f,int indent)8746 html_write_indent(FILTER_S *f, int indent)
8747 {
8748 if(! STRIP(f)){
8749 if(BOLD_BIT(f)){
8750 html_putc(f, TAG_EMBED);
8751 html_putc(f, TAG_BOLDOFF);
8752 }
8753
8754 if(ULINE_BIT(f)){
8755 html_putc(f, TAG_EMBED);
8756 html_putc(f, TAG_ULINEOFF);
8757 }
8758 }
8759
8760 f->n = indent;
8761 while(indent-- > 0)
8762 html_putc(f, ' '); /* indent as needed */
8763
8764 /*
8765 * Resume any previous embedded state
8766 */
8767 if(! STRIP(f)){
8768 if(BOLD_BIT(f)){
8769 html_putc(f, TAG_EMBED);
8770 html_putc(f, TAG_BOLDON);
8771 }
8772
8773 if(ULINE_BIT(f)){
8774 html_putc(f, TAG_EMBED);
8775 html_putc(f, TAG_ULINEON);
8776 }
8777 }
8778 }
8779
8780
8781 /*
8782 *
8783 */
8784 void
html_write_anchor(FILTER_S * f,int anchor)8785 html_write_anchor(FILTER_S *f, int anchor)
8786 {
8787 char buf[256];
8788 int i;
8789
8790 html_putc(f, TAG_EMBED);
8791 html_putc(f, TAG_HANDLE);
8792 snprintf(buf, sizeof(buf), "%d", anchor);
8793 html_putc(f, (int) strlen(buf));
8794
8795 for(i = 0; buf[i]; i++)
8796 html_putc(f, buf[i]);
8797 }
8798
8799
8800 /*
8801 * html_write_newline - write a newline mindful of display attributes
8802 */
8803 void
html_write_newline(FILTER_S * f)8804 html_write_newline(FILTER_S *f)
8805 {
8806 int i;
8807
8808 if(! STRIP(f)){ /* First tie, off any embedded state */
8809 if(HD(f)->in_anchor){
8810 html_putc(f, TAG_EMBED);
8811 html_putc(f, TAG_INVOFF);
8812 }
8813
8814 if(BOLD_BIT(f)){
8815 html_putc(f, TAG_EMBED);
8816 html_putc(f, TAG_BOLDOFF);
8817 }
8818
8819 if(ULINE_BIT(f)){
8820 html_putc(f, TAG_EMBED);
8821 html_putc(f, TAG_ULINEOFF);
8822 }
8823
8824 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8825 char *p;
8826 int i;
8827
8828 p = color_embed(ps_global->VAR_NORM_FORE_COLOR,
8829 ps_global->VAR_NORM_BACK_COLOR);
8830 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8831 html_putc(f, p[i]);
8832 }
8833 }
8834
8835 html_write(f, "\015\012", 2);
8836 for(i = HTML_INDENT(f); i > 0; i--)
8837 html_putc(f, ' ');
8838
8839 if(! STRIP(f)){ /* First tie, off any embedded state */
8840 if(BOLD_BIT(f)){
8841 html_putc(f, TAG_EMBED);
8842 html_putc(f, TAG_BOLDON);
8843 }
8844
8845 if(ULINE_BIT(f)){
8846 html_putc(f, TAG_EMBED);
8847 html_putc(f, TAG_ULINEON);
8848 }
8849
8850 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8851 char *p, *tfg, *tbg;
8852 int i;
8853 COLOR_PAIR *tmp;
8854
8855 tfg = HD(f)->color->fg;
8856 tbg = HD(f)->color->bg;
8857 tmp = new_color_pair(tfg[0] ? tfg
8858 : color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR),
8859 tbg[0] ? tbg
8860 : color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR));
8861 if(pico_is_good_colorpair(tmp)){
8862 p = color_embed(tfg[0] ? tfg
8863 : ps_global->VAR_NORM_FORE_COLOR,
8864 tbg[0] ? tbg
8865 : ps_global->VAR_NORM_BACK_COLOR);
8866 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8867 html_putc(f, p[i]);
8868 }
8869
8870 if(tmp)
8871 free_color_pair(&tmp);
8872 }
8873 }
8874 }
8875
8876
8877 /*
8878 * html_write - write given n-length string to next filter
8879 */
8880 void
html_write(FILTER_S * f,char * s,int n)8881 html_write(FILTER_S *f, char *s, int n)
8882 {
8883 GF_INIT(f, f->next);
8884
8885 while(n-- > 0){
8886 /* keep track of attribute state? Not if last char! */
8887 if(!STRIP(f) && *s == TAG_EMBED && n-- > 0){
8888 GF_PUTC(f->next, TAG_EMBED);
8889 switch(*++s){
8890 case TAG_BOLDON :
8891 BOLD_BIT(f) = 1;
8892 break;
8893 case TAG_BOLDOFF :
8894 BOLD_BIT(f) = 0;
8895 break;
8896 case TAG_ULINEON :
8897 ULINE_BIT(f) = 1;
8898 break;
8899 case TAG_ULINEOFF :
8900 ULINE_BIT(f) = 0;
8901 break;
8902 case TAG_HANDLEOFF :
8903 HD(f)->in_anchor = 0;
8904 GF_PUTC(f->next, TAG_INVOFF);
8905 s++;
8906 continue;
8907 case TAG_HANDLE :
8908 if(n-- > 0){
8909 int i = *++s;
8910
8911 GF_PUTC(f->next, TAG_HANDLE);
8912 if(i <= n){
8913 int anum = 0;
8914 HANDLE_S *h;
8915
8916 n -= i;
8917 GF_PUTC(f->next, i);
8918 while(1){
8919 anum = (anum * 10) + (*++s - '0');
8920 if(--i)
8921 GF_PUTC(f->next, *s);
8922 else
8923 break;
8924 }
8925
8926 if(DO_HANDLES(f)
8927 && (h = get_handle(*HANDLESP(f), anum)) != NULL
8928 && (h->type == URL || h->type == Attach)){
8929 HD(f)->in_anchor = anum;
8930 }
8931 }
8932 }
8933
8934 break;
8935 default:
8936 break;
8937 }
8938 }
8939
8940 GF_PUTC(f->next, (*s++) & 0xff);
8941 }
8942
8943 GF_IP_END(f->next); /* clean up next's input pointers */
8944 }
8945
8946
8947 /*
8948 * html_putc -- actual work of writing to next filter.
8949 * NOTE: Small opt not using full GF_END since our input
8950 * pointers don't need adjusting.
8951 */
8952 void
html_putc(FILTER_S * f,int ch)8953 html_putc(FILTER_S *f, int ch)
8954 {
8955 GF_INIT(f, f->next);
8956 GF_PUTC(f->next, ch & 0xff);
8957 GF_IP_END(f->next); /* clean up next's input pointers */
8958 }
8959
8960
8961
8962 /*
8963 * Only current option is to turn on embedded data stripping for text
8964 * bound to a printer or composer.
8965 */
8966 void *
gf_html2plain_opt(char * base,int columns,int * margin,HANDLE_S ** handlesp,htmlrisk_t risk_f,int flags)8967 gf_html2plain_opt(char *base,
8968 int columns,
8969 int *margin,
8970 HANDLE_S **handlesp,
8971 htmlrisk_t risk_f,
8972 int flags)
8973 {
8974 HTML_OPT_S *op;
8975 int margin_l, margin_r;
8976
8977 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8978
8979 op->base = cpystr(base);
8980 margin_l = (margin) ? margin[0] : 0;
8981 margin_r = (margin) ? margin[1] : 0;
8982 op->indent = margin_l;
8983 op->columns = columns - (margin_l + margin_r);
8984 op->strip = ((flags & GFHP_STRIPPED) == GFHP_STRIPPED);
8985 op->handlesp = handlesp;
8986 op->handles_loc = ((flags & GFHP_LOCAL_HANDLES) == GFHP_LOCAL_HANDLES);
8987 op->showserver = ((flags & GFHP_SHOW_SERVER) == GFHP_SHOW_SERVER);
8988 op->warnrisk_f = risk_f;
8989 op->no_relative_links = ((flags & GFHP_NO_RELATIVE) == GFHP_NO_RELATIVE);
8990 op->related_content = ((flags & GFHP_RELATED_CONTENT) == GFHP_RELATED_CONTENT);
8991 op->html = ((flags & GFHP_HTML) == GFHP_HTML);
8992 op->html_imgs = ((flags & GFHP_HTML_IMAGES) == GFHP_HTML_IMAGES);
8993 op->element_table = html_element_table;
8994 return((void *) op);
8995 }
8996
8997
8998 void *
gf_html2plain_rss_opt(RSS_FEED_S ** feedp,int flags)8999 gf_html2plain_rss_opt(RSS_FEED_S **feedp, int flags)
9000 {
9001 HTML_OPT_S *op;
9002
9003 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
9004 memset(op, 0, sizeof(HTML_OPT_S));
9005
9006 op->base = cpystr("");
9007 op->element_table = rss_element_table;
9008 *(op->feedp = feedp) = NULL;
9009 return((void *) op);
9010 }
9011
9012 void
gf_html2plain_rss_free(RSS_FEED_S ** feedp)9013 gf_html2plain_rss_free(RSS_FEED_S **feedp)
9014 {
9015 if(feedp && *feedp){
9016 if((*feedp)->title)
9017 fs_give((void **) &(*feedp)->title);
9018
9019 if((*feedp)->link)
9020 fs_give((void **) &(*feedp)->link);
9021
9022 if((*feedp)->description)
9023 fs_give((void **) &(*feedp)->description);
9024
9025 if((*feedp)->source)
9026 fs_give((void **) &(*feedp)->source);
9027
9028 if((*feedp)->image)
9029 fs_give((void **) &(*feedp)->image);
9030
9031 gf_html2plain_rss_free_items(&((*feedp)->items));
9032 fs_give((void **) feedp);
9033 }
9034 }
9035
9036 void
gf_html2plain_rss_free_items(RSS_ITEM_S ** itemp)9037 gf_html2plain_rss_free_items(RSS_ITEM_S **itemp)
9038 {
9039 if(itemp && *itemp){
9040 if((*itemp)->title)
9041 fs_give((void **) &(*itemp)->title);
9042
9043 if((*itemp)->link)
9044 fs_give((void **) &(*itemp)->link);
9045
9046 if((*itemp)->description)
9047 fs_give((void **) &(*itemp)->description);
9048
9049 if((*itemp)->source)
9050 fs_give((void **) &(*itemp)->source);
9051
9052 gf_html2plain_rss_free_items(&(*itemp)->next);
9053 fs_give((void **) itemp);
9054 }
9055 }
9056
9057 char *
cid_tempfile_name(char * line,long n,int * is_cidp)9058 cid_tempfile_name(char *line, long n, int *is_cidp)
9059 {
9060 int f2 = 0;
9061 int i, found;
9062 char *s, *t = NULL, *u, c;
9063 char imgfile[1024];
9064 char *extp = NULL;
9065
9066 c = line[n];
9067 line[n] = '\0';
9068 s = NULL;
9069 *is_cidp = 0;
9070 if(n > 0){
9071 if (line[0] == '\"')
9072 f2 = 1;
9073 if (n - f2 > 3){
9074 if (!struncmp(line+f2, "cid:", 4)){
9075 *is_cidp = 1;
9076 f2 += 4;
9077 s = fs_get((n - f2 + 4)*sizeof(char));
9078 sprintf(s, "<%s", line+f2);
9079 if (s[strlen(s)-1] == '\"')
9080 s[strlen(s)-1] = '>';
9081 else{
9082 i = strlen(s);
9083 s[i] = '>';
9084 s[i + 1] = '\0';
9085 }
9086 /* find the tmpdir where all these files will be saved to */
9087 if(t == NULL){
9088 for(i = 0; ps_global->atmts[i].tmpdir == NULL && ps_global->atmts[i].description != NULL; i++);
9089 t = ps_global->atmts[i].description ? ps_global->atmts[i].tmpdir : NULL;
9090 }
9091
9092 /* now we need to look for s in the list of attachments */
9093 for (i = 0, found = 0; found == 0 && ps_global->atmts[i].description != NULL; i++)
9094 if (ps_global->atmts[i].body
9095 && ps_global->atmts[i].body->type == TYPEIMAGE
9096 && strcmp(ps_global->atmts[i].body->id, s) == 0){
9097 found++;
9098 break;
9099 }
9100
9101 fs_give((void **) &s);
9102 if(found && ps_global->atmts[i].cid_tmpfile == NULL){
9103 PARAMETER *param;
9104 if (ps_global->atmts[i].cid_tmpfile == NULL){
9105 for(param = ps_global->atmts[i].body->parameter; param ; param = param->next){
9106 if (!strucmp(param->attribute, "NAME")){
9107 strncpy(imgfile, param->value, sizeof(imgfile));
9108 imgfile[sizeof(imgfile)-1] = '\0';
9109 extp = strrchr(imgfile, '.');
9110 if(extp) extp++;
9111 }
9112 }
9113 ps_global->atmts[i].cid_tmpfile = temp_nam_ext(t, "tmp-img-", extp);
9114 }
9115 }
9116 if(found && ps_global->atmts[i].cid_tmpfile != NULL)
9117 s = strstr(ps_global->atmts[i].cid_tmpfile, "tmp-img-");
9118 }
9119 }
9120 }
9121 line[n] = c;
9122 return s;
9123 }
9124
9125 #define COLLECT(X, C) { \
9126 if((X)->n == buflen){ \
9127 fs_resize((void **) &((X)->line), buflen + 1024); \
9128 (X)->linep = (X)->line + buflen; \
9129 buflen += 1024; \
9130 } \
9131 *((X)->linep)++ = (C); \
9132 (X)->n = (X)->linep - (X)->line; \
9133 }
9134
9135 #define RESET_FILTER(X) { \
9136 (X)->linep = (X)->line; \
9137 (X)->n = 0L; \
9138 }
9139
9140 void
gf_html_cid2file(FILTER_S * f,int cmd)9141 gf_html_cid2file(FILTER_S *f, int cmd)
9142 {
9143 register char *p;
9144 register unsigned char c;
9145 static long buflen = 0L;
9146
9147 GF_INIT(f, f->next);
9148
9149 if(cmd == GF_DATA){
9150 register int state = f->f1;
9151
9152 while(GF_GETC(f, c)){
9153
9154 if(state == 0){ /* look for "<img " */
9155 if (c == '<') f->f2 = 1;
9156 else if(f->f2 > 0){
9157 if (f->f2 == 1 && (c == 'i' || c == 'I')) f->f2 = 2;
9158 else if (f->f2 == 2 && (c == 'm' || c == 'M')) f->f2 = 3;
9159 else if (f->f2 == 3 && (c == 'g' || c == 'G')) f->f2 = 4;
9160 else if (f->f2 == 4 && ASCII_ISSPACE(c)){ f->f2 = 0; state = 1; }
9161 else f->f2 = 0;
9162 }
9163 }
9164 else if(state == 1){ /* look for "src=" */
9165 if (c == 's' || c == 'S') f->f2 = 1;
9166 else if (f->f2 == 1 && (c == 'r' || c == 'R')) f->f2 = 2;
9167 else if (f->f2 == 2 && (c == 'c' || c == 'C')) f->f2 = 3;
9168 else if (f->f2 == 3 && c == '='){ GF_PUTC(f->next, c); state = 2; }
9169 else if (f->f2 == 3 && !ASCII_ISSPACE(c)) f->f2 = 0;
9170 else f->f2 = 0;
9171 }
9172 else if (state == 2){ /* collect all data */
9173 if(ASCII_ISSPACE(c) || c == '>'){
9174 long n;
9175 int is_cid;
9176 if(f->n > 0){
9177 char *s = cid_tempfile_name(f->line, f->n, &is_cid);
9178 if(is_cid){
9179 RESET_FILTER(f);
9180 if(s != NULL)
9181 for(; *s != '\0'; s++)
9182 COLLECT(f, *s);
9183 }
9184 }
9185 GF_PUTC(f->next, '\"');
9186 if(is_cid || f->t){
9187 for(p = f->line; f->n; f->n--, p++){
9188 if(*p == '\"') continue;
9189 GF_PUTC(f->next, *p);
9190 }
9191 }
9192 else f->n = 0;
9193 GF_PUTC(f->next, '\"');
9194 /* no need to write "c" right now to the stream. It will be written below */
9195 state = ASCII_ISSPACE(c) ? 1 : 0;
9196 RESET_FILTER(f);
9197 }
9198 else COLLECT(f, c); /* collect this data */
9199 }
9200
9201 p = f->line;
9202 if(state < 2)
9203 GF_PUTC(f->next, c);
9204 }
9205
9206 f->f1 = state;
9207 GF_END(f, f->next);
9208 }
9209 else if(cmd == GF_EOD){
9210 if(f->f1 == 2){
9211 char *s = cid_tempfile_name(f->line, f->n, &f->f2);
9212 GF_PUTC(f->next, '\"');
9213 if (f->f2 || f->t){
9214 for(p = s; *p; p++){
9215 if(*p == '\"') continue;
9216 GF_PUTC(f->next, *p);
9217 }
9218 }
9219 GF_PUTC(f->next, '\"');
9220 GF_PUTC(f->next, '>');
9221 }
9222
9223 buflen = 0;
9224 fs_give((void **)&(f->line)); /* free temp line buffer */
9225 (void) GF_FLUSH(f->next);
9226 (*f->next->f)(f->next, GF_EOD);
9227 }
9228 else if(cmd == GF_RESET){
9229 dprint((9, "-- gf_reset cid2file\n"));
9230 f->n = 0L; /* number of bytes in buffer */
9231 f->f1 = 0; /* state */
9232 f->f2 = 0; /* total number of bytes read that match pattern */
9233 f->t = *(char *)f->opt;
9234 }
9235 }
9236
9237 /* END OF HTML-TO-PLAIN text filter */
9238
9239 /*
9240 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9241 * from the text stream.
9242 */
9243
9244 #define MAX_ESC_LEN 5
9245
9246 /*
9247 * the simple filter, removes unknown escape codes from the stream
9248 */
9249 void
gf_escape_filter(FILTER_S * f,int flg)9250 gf_escape_filter(FILTER_S *f, int flg)
9251 {
9252 register char *p;
9253 GF_INIT(f, f->next);
9254
9255 if(flg == GF_DATA){
9256 register unsigned char c;
9257 register int state = f->f1;
9258
9259 while(GF_GETC(f, c)){
9260
9261 if(state){
9262 if(c == '\033' || f->n == MAX_ESC_LEN){
9263 f->line[f->n] = '\0';
9264 f->n = 0L;
9265 if(!match_escapes(f->line)){
9266 GF_PUTC(f->next, '^');
9267 GF_PUTC(f->next, '[');
9268 }
9269 else
9270 GF_PUTC(f->next, '\033');
9271
9272 p = f->line;
9273 while(*p)
9274 GF_PUTC(f->next, *p++);
9275
9276 if(c == '\033')
9277 continue;
9278 else
9279 state = 0; /* fall thru */
9280 }
9281 else{
9282 f->line[f->n++] = c; /* collect */
9283 continue;
9284 }
9285 }
9286
9287 if(c == '\033')
9288 state = 1;
9289 else
9290 GF_PUTC(f->next, c);
9291 }
9292
9293 f->f1 = state;
9294 GF_END(f, f->next);
9295 }
9296 else if(flg == GF_EOD){
9297 if(f->f1){
9298 if(!match_escapes(f->line)){
9299 GF_PUTC(f->next, '^');
9300 GF_PUTC(f->next, '[');
9301 }
9302 else
9303 GF_PUTC(f->next, '\033');
9304 }
9305
9306 for(p = f->line; f->n; f->n--, p++)
9307 GF_PUTC(f->next, *p);
9308
9309 fs_give((void **)&(f->line)); /* free temp line buffer */
9310 (void) GF_FLUSH(f->next);
9311 (*f->next->f)(f->next, GF_EOD);
9312 }
9313 else if(flg == GF_RESET){
9314 dprint((9, "-- gf_reset escape\n"));
9315 f->f1 = 0;
9316 f->n = 0L;
9317 f->linep = f->line = (char *)fs_get((MAX_ESC_LEN + 1) * sizeof(char));
9318 }
9319 }
9320
9321
9322
9323 /*
9324 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9325 * corresponding string representations (you know, ^blah and such)...
9326 */
9327
9328 /*
9329 * the simple filter transforms unknown control characters in the stream
9330 * into harmless strings.
9331 */
9332 void
gf_control_filter(FILTER_S * f,int flg)9333 gf_control_filter(FILTER_S *f, int flg)
9334 {
9335 GF_INIT(f, f->next);
9336
9337 if(flg == GF_DATA){
9338 register unsigned char c;
9339 register int filt_only_c0;
9340
9341 filt_only_c0 = f->opt ? (*(int *) f->opt) : 0;
9342
9343 while(GF_GETC(f, c)){
9344
9345 if(((c < 0x20 || c == 0x7f)
9346 || (c >= 0x80 && c < 0xA0 && !filt_only_c0))
9347 && !(ASCII_ISSPACE((unsigned char) c)
9348 || c == '\016' || c == '\017' || c == '\033')){
9349 GF_PUTC(f->next, c >= 0x80 ? '~' : '^');
9350 GF_PUTC(f->next, (c == 0x7f) ? '?' : (c & 0x1f) + '@');
9351 }
9352 else
9353 GF_PUTC(f->next, c);
9354 }
9355
9356 GF_END(f, f->next);
9357 }
9358 else if(flg == GF_EOD){
9359 (void) GF_FLUSH(f->next);
9360 (*f->next->f)(f->next, GF_EOD);
9361 }
9362 }
9363
9364
9365 /*
9366 * function called from the outside to set
9367 * control filter's option, which says to filter C0 control characters
9368 * but not C1 control chars. We don't call it at all if we don't want
9369 * to filter C0 chars either.
9370 */
9371 void *
gf_control_filter_opt(int * filt_only_c0)9372 gf_control_filter_opt(int *filt_only_c0)
9373 {
9374 return((void *) filt_only_c0);
9375 }
9376
9377
9378 /*
9379 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9380 * This prevents the possibility of embedding other tags.
9381 * We assume that this filter should only be used for something
9382 * that is eventually writing to a display, which has the special
9383 * knowledge of quoted TAG_EMBEDs.
9384 */
9385 void
gf_tag_filter(FILTER_S * f,int flg)9386 gf_tag_filter(FILTER_S *f, int flg)
9387 {
9388 GF_INIT(f, f->next);
9389
9390 if(flg == GF_DATA){
9391 register unsigned char c;
9392
9393 while(GF_GETC(f, c)){
9394
9395 if((c & 0xff) == (TAG_EMBED & 0xff)){
9396 GF_PUTC(f->next, TAG_EMBED);
9397 GF_PUTC(f->next, c);
9398 }
9399 else
9400 GF_PUTC(f->next, c);
9401 }
9402
9403 GF_END(f, f->next);
9404 }
9405 else if(flg == GF_EOD){
9406 (void) GF_FLUSH(f->next);
9407 (*f->next->f)(f->next, GF_EOD);
9408 }
9409 }
9410
9411
9412 /*
9413 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9414 * specified line width
9415 */
9416
9417
9418 typedef struct wrap_col_s {
9419 unsigned bold:1;
9420 unsigned uline:1;
9421 unsigned inverse:1;
9422 unsigned tags:1;
9423 unsigned do_indent:1;
9424 unsigned on_comma:1;
9425 unsigned flowed:1;
9426 unsigned delsp:1;
9427 unsigned quoted:1;
9428 unsigned allwsp:1;
9429 unsigned hard_nl:1;
9430 unsigned leave_flowed:1;
9431 unsigned use_color:1;
9432 unsigned hdr_color:1;
9433 unsigned for_compose:1;
9434 unsigned handle_soft_hyphen:1;
9435 unsigned saw_soft_hyphen:1;
9436 unsigned trailing_space:1;
9437 unsigned char utf8buf[7];
9438 unsigned char *utf8bufp;
9439 COLOR_PAIR *color;
9440 STORE_S *spaces;
9441 short embedded,
9442 space_len;
9443 char *lineendp;
9444 int anchor,
9445 prefbrk,
9446 prefbrkn,
9447 quote_depth,
9448 quote_count,
9449 sig,
9450 state,
9451 wrap_col,
9452 wrap_max,
9453 margin_l,
9454 margin_r,
9455 indent;
9456 char special[256];
9457 } WRAP_S;
9458
9459 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9460 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9461 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9462 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9463 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9464 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9465 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9466 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9467 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9468 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9469 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9470 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9471 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9472 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9473 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9474 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9475 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9476 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9477 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9478 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9479 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9480 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9481 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9482 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9483 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9484 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9485 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9486 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9487 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9488 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9489 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9490 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9491 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9492 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9493 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9494 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9495 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9496 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9497 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9498 #define WRAP_PUTC(F,C,W) { \
9499 if((F)->linep == WRAP_LASTC(F)){ \
9500 size_t offset = (F)->linep - (F)->line; \
9501 fs_resize((void **) &(F)->line, \
9502 (2 * offset) * sizeof(char)); \
9503 (F)->linep = &(F)->line[offset]; \
9504 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9505 } \
9506 *(F)->linep++ = (C); \
9507 (F)->f2 += (W); \
9508 }
9509
9510 #define WRAP_EMBED_PUTC(F,C) { \
9511 if((F)->f2){ \
9512 WRAP_PUTC((F), C, 0); \
9513 } \
9514 else \
9515 so_writec(C, WRAP_SPACES(F)); \
9516 }
9517
9518 #define WRAP_COLOR_UNSET(F) { \
9519 if(WRAP_COLOR_SET(F)){ \
9520 WRAP_COLOR(F)->fg[0] = '\0'; \
9521 } \
9522 }
9523
9524 /*
9525 * wrap_flush_embed flags
9526 */
9527 #define WFE_NONE 0 /* Nothing special */
9528 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9529
9530
9531 int wrap_flush(FILTER_S *, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9532 int wrap_flush_embed(FILTER_S *, unsigned char **, unsigned char **,
9533 unsigned char **, unsigned char **);
9534 int wrap_flush_s(FILTER_S *,char *, int, int, unsigned char **, unsigned char **,
9535 unsigned char **, unsigned char **, int);
9536 int wrap_eol(FILTER_S *, int, unsigned char **, unsigned char **,
9537 unsigned char **, unsigned char **);
9538 int wrap_bol(FILTER_S *, int, int, unsigned char **,
9539 unsigned char **, unsigned char **, unsigned char **);
9540 int wrap_quote_insert(FILTER_S *, unsigned char **, unsigned char **,
9541 unsigned char **, unsigned char **);
9542
9543 /*
9544 * the no longer simple filter, breaks lines at end of white space nearest
9545 * to global "gf_wrap_width" in length
9546 * It also supports margins, indents (inverse indenting, really) and
9547 * flowed text (ala RFC 3676)
9548 *
9549 */
9550 void
gf_wrap(FILTER_S * f,int flg)9551 gf_wrap(FILTER_S *f, int flg)
9552 {
9553 register long i;
9554 GF_INIT(f, f->next);
9555
9556 /*
9557 * f->f1 state
9558 * f->line buffer where next "word" being considered is stored
9559 * f->f2 width in screen cells of f->line stuff
9560 * f->n width in screen cells of the part of this line committed to next
9561 * filter so far
9562 */
9563
9564 if(flg == GF_DATA){
9565 register unsigned char c;
9566 register int state = f->f1;
9567 int width, full_character;
9568
9569 while(GF_GETC(f, c)){
9570
9571 switch(state){
9572 case CCR : /* CRLF or CR in text ? */
9573 state = BOL; /* either way, handle start */
9574
9575 if(WRAP_FLOW(f)){
9576 /* wrapped line? */
9577 if(f->f2 == 0 && WRAP_SPC_LEN(f) && WRAP_TRL_SPC(f)){
9578 /*
9579 * whack trailing space char, but be aware
9580 * of embeds in space buffer. grok them just
9581 * in case they contain a 0x20 value
9582 */
9583 if(WRAP_DELSP(f)){
9584 char *sb, *sbp, *scp = NULL;
9585 int x;
9586
9587 for(sb = sbp = (char *)so_text(WRAP_SPACES(f)); *sbp; sbp++){
9588 switch(*sbp){
9589 case ' ' :
9590 scp = sbp;
9591 break;
9592
9593 case TAG_EMBED :
9594 sbp++;
9595 switch (*sbp++){
9596 case TAG_HANDLE :
9597 x = (int) *sbp++;
9598 if(strlen(sbp) >= x)
9599 sbp += (x - 1);
9600
9601 break;
9602
9603 case TAG_FGCOLOR :
9604 case TAG_BGCOLOR :
9605 if(strlen(sbp) >= RGBLEN)
9606 sbp += (RGBLEN - 1);
9607
9608 break;
9609
9610 default :
9611 break;
9612 }
9613
9614 break;
9615
9616 default :
9617 break;
9618 }
9619 }
9620
9621 /* replace space buf without trailing space char */
9622 if(scp){
9623 STORE_S *ns = so_get(CharStar, NULL, EDIT_ACCESS);
9624
9625 *scp++ = '\0';
9626 WRAP_SPC_LEN(f)--;
9627 WRAP_TRL_SPC(f) = 0;
9628
9629 so_puts(ns, sb);
9630 so_puts(ns, scp);
9631
9632 so_give(&WRAP_SPACES(f));
9633 WRAP_SPACES(f) = ns;
9634 }
9635 }
9636 }
9637 else{ /* fixed line */
9638 WRAP_HARD(f) = 1;
9639 wrap_flush(f, &ip, &eib, &op, &eob);
9640 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9641
9642 /*
9643 * When we get to a real end of line, we don't need to
9644 * remember what the special color was anymore because
9645 * we aren't going to be changing back to it. We unset it
9646 * so that we don't keep resetting the color to normal.
9647 */
9648 WRAP_COLOR_UNSET(f);
9649 }
9650
9651 if(c == '\012'){ /* get c following LF */
9652 break;
9653 }
9654 /* else c is first char of new line, fall thru */
9655 }
9656 else{
9657 wrap_flush(f, &ip, &eib, &op, &eob);
9658 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9659 WRAP_COLOR_UNSET(f); /* see note above */
9660 if(c == '\012'){
9661 break;
9662 }
9663 /* else fall thru to deal with beginning of line */
9664 }
9665
9666 case BOL :
9667 if(WRAP_FLOW(f)){
9668 if(c == '>'){
9669 WRAP_FL_QC(f) = 1; /* init it */
9670 state = FL_QLEV; /* go collect it */
9671 }
9672 else {
9673 /* if EMBEDed, process it and return here */
9674 if(c == (unsigned char) TAG_EMBED){
9675 WRAP_EMBED_PUTC(f, TAG_EMBED);
9676 WRAP_STATE(f) = state;
9677 state = TAG;
9678 continue;
9679 }
9680
9681 /* quote level change implies new paragraph */
9682 if(WRAP_FL_QD(f)){
9683 WRAP_FL_QD(f) = 0;
9684 if(WRAP_HARD(f) == 0){
9685 WRAP_HARD(f) = 1;
9686 wrap_flush(f, &ip, &eib, &op, &eob);
9687 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9688 WRAP_COLOR_UNSET(f); /* see note above */
9689 }
9690 }
9691
9692 if(WRAP_HARD(f)){
9693 wrap_bol(f, 0, 1, &ip, &eib, &op,
9694 &eob); /* write quoting prefix */
9695 WRAP_HARD(f) = 0;
9696 }
9697
9698 switch (c) {
9699 case '\015' : /* a blank line? */
9700 wrap_flush(f, &ip, &eib, &op, &eob);
9701 state = CCR; /* go collect it */
9702 break;
9703
9704 case ' ' : /* space stuffed */
9705 state = FL_STF; /* just eat it */
9706 break;
9707
9708 case '-' : /* possible sig-dash */
9709 WRAP_FL_SIG(f) = 1; /* init state */
9710 state = FL_SIG; /* go collect it */
9711 break;
9712
9713 default :
9714 state = DFL; /* go back to normal */
9715 goto case_dfl; /* handle c like DFL case */
9716 }
9717 }
9718 }
9719 else{
9720 state = DFL;
9721 if(WRAP_COMMA(f) && c == TAB){
9722 wrap_bol(f, 1, 0, &ip, &eib, &op,
9723 &eob); /* convert to normal indent */
9724 break;
9725 }
9726
9727 wrap_bol(f,0,0, &ip, &eib, &op, &eob);
9728 goto case_dfl; /* handle c like DFL case */
9729 }
9730
9731 break;
9732
9733 case FL_QLEV :
9734 if(c == '>'){ /* another level */
9735 WRAP_FL_QC(f)++;
9736 }
9737 else {
9738 /* if EMBEDed, process it and return here */
9739 if(c == (unsigned char) TAG_EMBED){
9740 WRAP_EMBED_PUTC(f, TAG_EMBED);
9741 WRAP_STATE(f) = state;
9742 state = TAG;
9743 continue;
9744 }
9745
9746 /* quote level change signals new paragraph */
9747 if(WRAP_FL_QC(f) != WRAP_FL_QD(f)){
9748 WRAP_FL_QD(f) = WRAP_FL_QC(f);
9749 if(WRAP_HARD(f) == 0){ /* add hard newline */
9750 WRAP_HARD(f) = 1; /* hard newline */
9751 wrap_flush(f, &ip, &eib, &op, &eob);
9752 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9753 WRAP_COLOR_UNSET(f); /* see note above */
9754 }
9755 }
9756
9757 if(WRAP_HARD(f)){
9758 wrap_bol(f,0,1, &ip, &eib, &op, &eob);
9759 WRAP_HARD(f) = 0;
9760 }
9761
9762 switch (c) {
9763 case '\015' : /* a blank line? */
9764 wrap_flush(f, &ip, &eib, &op, &eob);
9765 state = CCR; /* go collect it */
9766 break;
9767
9768 case ' ' : /* space-stuffed! */
9769 state = FL_STF; /* just eat it */
9770 break;
9771
9772 case '-' : /* sig dash? */
9773 WRAP_FL_SIG(f) = 1;
9774 state = FL_SIG;
9775 break;
9776
9777 default : /* something else */
9778 state = DFL;
9779 goto case_dfl; /* handle c like DFL */
9780 }
9781 }
9782
9783 break;
9784
9785 case FL_STF : /* space stuffed */
9786 switch (c) {
9787 case '\015' : /* a blank line? */
9788 wrap_flush(f, &ip, &eib, &op, &eob);
9789 state = CCR; /* go collect it */
9790 break;
9791
9792 case (unsigned char) TAG_EMBED : /* process TAG data */
9793 WRAP_EMBED_PUTC(f, TAG_EMBED);
9794 WRAP_STATE(f) = state; /* and return */
9795 state = TAG;
9796 continue;
9797
9798 case '-' : /* sig dash? */
9799 WRAP_FL_SIG(f) = 1;
9800 WRAP_ALLWSP(f) = 0;
9801 state = FL_SIG;
9802 break;
9803
9804 default : /* something else */
9805 state = DFL;
9806 goto case_dfl; /* handle c like DFL */
9807 }
9808
9809 break;
9810
9811 case FL_SIG : /* sig-dash collector */
9812 switch (WRAP_FL_SIG(f)){ /* possible sig-dash? */
9813 case 1 :
9814 if(c != '-'){ /* not a sigdash */
9815 if((f->n + WRAP_SPC_LEN(f) + 1) > WRAP_COL(f)){
9816 wrap_flush_embed(f, &ip, &eib, &op,
9817 &eob); /* note any embedded*/
9818 wrap_eol(f, 1, &ip, &eib,
9819 &op, &eob); /* plunk down newline */
9820 wrap_bol(f, 1, 1, &ip, &eib,
9821 &op, &eob); /* write any prefix */
9822 }
9823
9824 WRAP_PUTC(f,'-', 1); /* write what we got */
9825
9826 WRAP_FL_SIG(f) = 0;
9827 state = DFL;
9828 goto case_dfl;
9829 }
9830
9831 /* don't put anything yet until we know to wrap or not */
9832 WRAP_FL_SIG(f) = 2;
9833 break;
9834
9835 case 2 :
9836 if(c != ' '){ /* not a sigdash */
9837 WRAP_PUTC(f, '-', 1);
9838 if((f->n + WRAP_SPC_LEN(f) + 2) > WRAP_COL(f)){
9839 wrap_flush_embed(f, &ip, &eib, &op,
9840 &eob); /* note any embedded*/
9841 wrap_eol(f, 1, &ip, &eib,
9842 &op, &eob); /* plunk down newline */
9843 wrap_bol(f, 1, 1, &ip, &eib, &op,
9844 &eob); /* write any prefix */
9845 }
9846
9847 WRAP_PUTC(f,'-', 1); /* write what we got */
9848
9849 WRAP_FL_SIG(f) = 0;
9850 state = DFL;
9851 goto case_dfl;
9852 }
9853
9854 /* don't put anything yet until we know to wrap or not */
9855 WRAP_FL_SIG(f) = 3;
9856 break;
9857
9858 case 3 :
9859 if(c == '\015'){ /* success! */
9860 /* known sigdash, newline if soft nl */
9861 if(WRAP_SPC_LEN(f)){
9862 wrap_flush(f, &ip, &eib, &op, &eob);
9863 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9864 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9865 }
9866 WRAP_PUTC(f,'-',1);
9867 WRAP_PUTC(f,'-',1);
9868 WRAP_PUTC(f,' ',1);
9869
9870 state = CCR;
9871 break;
9872 }
9873 else{
9874 WRAP_FL_SIG(f) = 4; /* possible success */
9875 }
9876
9877 case 4 :
9878 switch(c){
9879 case (unsigned char) TAG_EMBED :
9880 /*
9881 * At this point we're almost 100% sure that we've got
9882 * a sigdash. Putc it (adding newline if previous
9883 * was a soft nl) so we get it the right color
9884 * before we store this new embedded stuff
9885 */
9886 if(WRAP_SPC_LEN(f)){
9887 wrap_flush(f, &ip, &eib, &op, &eob);
9888 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9889 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9890 }
9891 WRAP_PUTC(f,'-',1);
9892 WRAP_PUTC(f,'-',1);
9893 WRAP_PUTC(f,' ',1);
9894
9895 WRAP_FL_SIG(f) = 5;
9896 break;
9897
9898 case '\015' : /* success! */
9899 /*
9900 * We shouldn't get here, but in case we do, we have
9901 * not yet put the sigdash
9902 */
9903 if(WRAP_SPC_LEN(f)){
9904 wrap_flush(f, &ip, &eib, &op, &eob);
9905 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9906 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9907 }
9908 WRAP_PUTC(f,'-',1);
9909 WRAP_PUTC(f,'-',1);
9910 WRAP_PUTC(f,' ',1);
9911
9912 state = CCR;
9913 break;
9914
9915 default : /* that's no sigdash! */
9916 /* write what we got but didn't put yet */
9917 WRAP_PUTC(f,'-', 1);
9918 WRAP_PUTC(f,'-', 1);
9919 WRAP_PUTC(f,' ', 1);
9920
9921 WRAP_FL_SIG(f) = 0;
9922 wrap_flush(f, &ip, &eib, &op, &eob);
9923 WRAP_SPC_LEN(f) = 1;
9924 state = DFL; /* set normal state */
9925 goto case_dfl; /* and go do "c" */
9926 }
9927
9928 break;
9929
9930 case 5 :
9931 WRAP_STATE(f) = FL_SIG; /* come back here */
9932 WRAP_FL_SIG(f) = 6; /* and seek EOL */
9933 WRAP_EMBED_PUTC(f, TAG_EMBED);
9934 state = TAG; /* process embed */
9935 goto case_tag;
9936
9937 case 6 :
9938 /*
9939 * at this point we've already putc the sigdash in case 4
9940 */
9941 switch(c){
9942 case (unsigned char) TAG_EMBED :
9943 WRAP_FL_SIG(f) = 5;
9944 break;
9945
9946 case '\015' : /* success! */
9947 state = CCR;
9948 break;
9949
9950 default : /* that's no sigdash! */
9951 /*
9952 * probably never reached (fake sigdash with embedded
9953 * stuff) but if this did get reached, then we
9954 * might have accidentally disobeyed a soft nl
9955 */
9956 WRAP_FL_SIG(f) = 0;
9957 wrap_flush(f, &ip, &eib, &op, &eob);
9958 WRAP_SPC_LEN(f) = 1;
9959 state = DFL; /* set normal state */
9960 goto case_dfl; /* and go do "c" */
9961 }
9962
9963 break;
9964
9965
9966 default :
9967 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9968 WRAP_FL_SIG(f)));
9969 WRAP_FL_SIG(f) = 0;
9970 state = DFL; /* set normal state */
9971 goto case_dfl; /* and go process "c" */
9972 }
9973
9974 break;
9975
9976 case_dfl :
9977 case DFL :
9978 /*
9979 * This was just if(WRAP_SPEC(f, c)) before the change to add
9980 * the == 0 test. This isn't quite right, either. We should really
9981 * be looking for special characters in the UCS characters, not
9982 * in the incoming stream of UTF-8. It is not right to
9983 * call this on bytes that are in the middle of a UTF-8 character,
9984 * hence the == 0 test which restricts it to the first byte
9985 * of a character. This isn't right, either, but it's closer.
9986 * Also change the definition of WRAP_SPEC so that isspace only
9987 * matches ascii characters, which will never be in the middle
9988 * of a UTF-8 multi-byte character.
9989 */
9990 if((WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 0 && WRAP_SPEC(f, c)){
9991 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9992 switch(c){
9993 default :
9994 if(WRAP_QUOTED(f))
9995 break;
9996
9997 if(f->f2){ /* any non-lwsp to flush? */
9998 if(WRAP_COMMA(f)){
9999 /* remember our second best break point */
10000 WRAP_PB_OFF(f) = f->linep - f->line;
10001 WRAP_PB_LEN(f) = f->f2;
10002 break;
10003 }
10004 else
10005 wrap_flush(f, &ip, &eib, &op, &eob);
10006 }
10007
10008 switch(c){ /* remember separator */
10009 case ' ' :
10010 WRAP_SPC_LEN(f)++;
10011 WRAP_TRL_SPC(f) = 1;
10012 so_writec(' ',WRAP_SPACES(f));
10013 break;
10014
10015 case TAB :
10016 {
10017 int i = (int) f->n + WRAP_SPC_LEN(f);
10018
10019 do
10020 WRAP_SPC_LEN(f)++;
10021 while(++i & 0x07);
10022
10023 so_writec(TAB,WRAP_SPACES(f));
10024 WRAP_TRL_SPC(f) = 0;
10025 }
10026
10027 break;
10028
10029 default : /* some control char? */
10030 WRAP_SPC_LEN(f) += 2;
10031 WRAP_TRL_SPC(f) = 0;
10032 break;
10033 }
10034
10035 continue;
10036
10037 case '\"' :
10038 WRAP_QUOTED(f) = !WRAP_QUOTED(f);
10039 break;
10040
10041 case '\015' : /* already has newline? */
10042 state = CCR;
10043 continue;
10044
10045 case '\012' : /* bare LF in text? */
10046 wrap_flush(f, &ip, &eib, &op, &eob); /* they must've */
10047 wrap_eol(f, 0, &ip, &eib, &op, &eob); /* meant */
10048 wrap_bol(f,1,1, &ip, &eib, &op, &eob); /* newline... */
10049 continue;
10050
10051 case (unsigned char) TAG_EMBED :
10052 WRAP_EMBED_PUTC(f, TAG_EMBED);
10053 WRAP_STATE(f) = state;
10054 state = TAG;
10055 continue;
10056
10057 case ',' :
10058 if(!WRAP_QUOTED(f)){
10059 /* handle this special case in general code below */
10060 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_MAX_COL(f)
10061 && WRAP_ALLWSP(f) && WRAP_PB_OFF(f))
10062 break;
10063
10064 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_COL(f)){
10065 if(WRAP_ALLWSP(f)) /* if anything visible */
10066 wrap_flush(f, &ip, &eib, &op,
10067 &eob); /* ... blat buf'd chars */
10068
10069 wrap_eol(f, 1, &ip, &eib, &op,
10070 &eob); /* plunk down newline */
10071 wrap_bol(f, 1, 1, &ip, &eib, &op,
10072 &eob); /* write any prefix */
10073 }
10074
10075 WRAP_PUTC(f, ',', 1); /* put out comma */
10076 wrap_flush(f, &ip, &eib, &op,
10077 &eob); /* write buf'd chars */
10078 continue;
10079 }
10080
10081 break;
10082 }
10083 }
10084 else if(WRAP_HANDLE_SOFT_HYPHEN(f)
10085 && (WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 1
10086 && WRAP_UTF8BUF(f, 0) == 0xC2 && c == 0xAD){
10087 /*
10088 * This is a soft hyphen. If there is enough space for
10089 * a real hyphen to fit on the line here then we can
10090 * flush everything up to before the soft hyphen,
10091 * and simply remember that we saw a soft hyphen.
10092 * If it turns out that we can't fit the next piece in
10093 * then wrap_eol will append a real hyphen to the line.
10094 * If we can fit another piece in it will be because we've
10095 * reached the next break point. At that point we'll flush
10096 * everything but won't include the unneeded hyphen. We erase
10097 * the fact that we saw this soft hyphen because it have
10098 * become irrelevant.
10099 *
10100 * If the hyphen is the character that puts us over the edge
10101 * we go through the else case.
10102 */
10103
10104 /* erase this soft hyphen character from buffer */
10105 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10106
10107 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
10108 if(f->f2) /* any non-lwsp to flush? */
10109 wrap_flush(f, &ip, &eib, &op, &eob);
10110
10111 /* remember that we saw the soft hyphen */
10112 WRAP_SAW_SOFT_HYPHEN(f) = 1;
10113 }
10114 else{
10115 /*
10116 * Everything up to the hyphen fits, otherwise it
10117 * would have already been flushed the last time
10118 * through the loop. But the hyphen won't fit. So
10119 * we need to go back to the last line break and
10120 * break there instead. Then start a new line with
10121 * the buffered up characters and the soft hyphen.
10122 */
10123 wrap_flush_embed(f, &ip, &eib, &op, &eob);
10124 wrap_eol(f, 1, &ip, &eib, &op,
10125 &eob); /* plunk down newline */
10126 wrap_bol(f,1,1, &ip, &eib, &op,
10127 &eob); /* write any prefix */
10128
10129 /*
10130 * Now we're in the same situation as we would have
10131 * been above except we're on a new line. Try to
10132 * flush out the characters seen up to the hyphen.
10133 */
10134 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
10135 if(f->f2) /* any non-lwsp to flush? */
10136 wrap_flush(f, &ip, &eib, &op, &eob);
10137
10138 /* remember that we saw the soft hyphen */
10139 WRAP_SAW_SOFT_HYPHEN(f) = 1;
10140 }
10141 else
10142 WRAP_SAW_SOFT_HYPHEN(f) = 0;
10143 }
10144
10145 continue;
10146 }
10147
10148 full_character = 0;
10149
10150 {
10151 unsigned char *inputp;
10152 unsigned long remaining_octets;
10153 UCS ucs;
10154
10155 if(WRAP_UTF8BUFP(f) < &WRAP_UTF8BUF(f, 0) + 6){ /* always true */
10156
10157 *WRAP_UTF8BUFP(f)++ = c;
10158 remaining_octets = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10159 if(remaining_octets == 1 && isascii(WRAP_UTF8BUF(f, 0))){
10160 full_character++;
10161 if(c == TAB){
10162 int i = (int) f->n;
10163
10164 while(i & 0x07)
10165 i++;
10166
10167 width = i - f->n;
10168 }
10169 else if(c < 0x80 && iscntrl((unsigned char) c))
10170 width = 2;
10171 else
10172 width = 1;
10173 }
10174 else{
10175 inputp = &WRAP_UTF8BUF(f, 0);
10176 ucs = (UCS) utf8_get(&inputp, &remaining_octets);
10177 switch(ucs){
10178 case U8G_ENDSTRG: /* incomplete character, wait */
10179 case U8G_ENDSTRI: /* incomplete character, wait */
10180 width = 0;
10181 break;
10182
10183 default:
10184 if(ucs & U8G_ERROR || ucs == UBOGON){
10185 /*
10186 * None of these cases is supposed to happen. If it
10187 * does happen then the input stream isn't UTF-8
10188 * so something is wrong. Writechar will treat
10189 * each octet in the input buffer as a separate
10190 * error character and print a '?' for each,
10191 * so the width will be the number of octets.
10192 */
10193 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10194 full_character++;
10195 }
10196 else{
10197 /* got a character */
10198 width = wcellwidth(ucs);
10199 full_character++;
10200
10201 if(width < 0){
10202 /*
10203 * This happens when we have a UTF-8 character that
10204 * we aren't able to print in our locale. For example,
10205 * if the locale is setup with the terminal
10206 * expecting ISO-8859-1 characters then there are
10207 * lots of UTF-8 characters that can't be printed.
10208 * Print a '?' instead.
10209 */
10210 width = 1;
10211 }
10212 }
10213
10214 break;
10215 }
10216 }
10217 }
10218 else{
10219 /*
10220 * This cannot happen because an error would have
10221 * happened at least by character #6. So if we get
10222 * here there is a bug in utf8_get().
10223 */
10224 if(WRAP_UTF8BUFP(f) == &WRAP_UTF8BUF(f, 0) + 6){
10225 *WRAP_UTF8BUFP(f)++ = c;
10226 }
10227
10228 /*
10229 * We could possibly do some more sophisticated
10230 * resynchronization here, but we aren't doing
10231 * anything in Writechar so it wouldn't match up
10232 * with that anyway. Just figure each character will
10233 * end up being printed as a ? character.
10234 */
10235 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10236 full_character++;
10237 }
10238 }
10239
10240 if(WRAP_ALLWSP(f)){
10241 /*
10242 * Nothing is visible yet but the first word may be too long
10243 * all by itself. We need to break early.
10244 */
10245 if(f->n + WRAP_SPC_LEN(f) + f->f2 + width > WRAP_MAX_COL(f)){
10246 /*
10247 * A little reaching behind the curtain here.
10248 * if there's at least a preferable break point, use
10249 * it and stuff what's left back into the wrap buffer.
10250 * The "nwsp" latch is used to skip leading whitespace
10251 * The second half of the test prevents us from wrapping
10252 * at the preferred break point in the case that it
10253 * is so early in the line that it doesn't help.
10254 * That is, the width of the indent is even more than
10255 * the width of the first part before the preferred
10256 * break point. An example would be breaking after
10257 * "To:" when the indent is 4 which is > 3.
10258 */
10259 if(WRAP_PB_OFF(f) && WRAP_PB_LEN(f) >= WRAP_INDENT(f)){
10260 char *p1 = f->line + WRAP_PB_OFF(f);
10261 char *p2 = f->linep;
10262 char c2;
10263 int nwsp = 0, left_after_wrap;
10264
10265 left_after_wrap = f->f2 - WRAP_PB_LEN(f);
10266
10267 f->f2 = WRAP_PB_LEN(f);
10268 f->linep = p1;
10269
10270 wrap_flush(f, &ip, &eib, &op, &eob); /* flush shortened buf */
10271
10272 /* put back rest of characters */
10273 while(p1 < p2){
10274 c2 = *p1++;
10275 if(!(c2 == ' ' || c2 == '\t') || nwsp){
10276 WRAP_PUTC(f, c2, 0);
10277 nwsp = 1;
10278 }
10279 else
10280 left_after_wrap--; /* wrong if a tab! */
10281 }
10282
10283 f->f2 = MAX(left_after_wrap, 0);
10284
10285 wrap_eol(f, 1, &ip, &eib, &op,
10286 &eob); /* plunk down newline */
10287 wrap_bol(f,1,1, &ip, &eib, &op,
10288 &eob); /* write any prefix */
10289
10290 /*
10291 * What's this for?
10292 * If we do the less preferable break point at
10293 * the space we don't want to lose the fact that
10294 * we might be able to break at this comma for
10295 * the next one.
10296 */
10297 if(full_character && c == ','){
10298 WRAP_PUTC(f, c, 1);
10299 wrap_flush(f, &ip, &eib, &op, &eob);
10300 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10301 }
10302 }
10303 else{
10304 wrap_flush(f, &ip, &eib, &op, &eob);
10305
10306 wrap_eol(f, 1, &ip, &eib, &op,
10307 &eob); /* plunk down newline */
10308 wrap_bol(f,1,1, &ip, &eib, &op,
10309 &eob); /* write any prefix */
10310 }
10311 }
10312 }
10313 else if((f->n + WRAP_SPC_LEN(f) + f->f2 + width) > WRAP_COL(f)){
10314 wrap_flush_embed(f, &ip, &eib, &op, &eob);
10315 wrap_eol(f, 1, &ip, &eib, &op,
10316 &eob); /* plunk down newline */
10317 wrap_bol(f,1,1, &ip, &eib, &op,
10318 &eob); /* write any prefix */
10319 }
10320
10321 /*
10322 * Commit entire multibyte UTF-8 character at once
10323 * instead of writing partial characters into the
10324 * buffer.
10325 */
10326 if(full_character){
10327 unsigned char *q;
10328
10329 for(q = &WRAP_UTF8BUF(f, 0); q < WRAP_UTF8BUFP(f); q++){
10330 WRAP_PUTC(f, *q, width);
10331 width = 0;
10332 }
10333
10334 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10335 }
10336
10337 break;
10338
10339 case_tag :
10340 case TAG :
10341 WRAP_EMBED_PUTC(f, c);
10342 switch(c){
10343 case TAG_HANDLE :
10344 WRAP_EMBED(f) = -1;
10345 state = HANDLE;
10346 break;
10347
10348 case TAG_FGCOLOR :
10349 case TAG_BGCOLOR :
10350 WRAP_EMBED(f) = RGBLEN;
10351 state = HDATA;
10352 break;
10353
10354 default :
10355 state = WRAP_STATE(f);
10356 break;
10357 }
10358
10359 break;
10360
10361 case HANDLE :
10362 WRAP_EMBED_PUTC(f, c);
10363 WRAP_EMBED(f) = c;
10364 state = HDATA;
10365 break;
10366
10367 case HDATA :
10368 if(f->f2){
10369 WRAP_PUTC(f, c, 0);
10370 }
10371 else
10372 so_writec(c, WRAP_SPACES(f));
10373
10374 if(!(WRAP_EMBED(f) -= 1)){
10375 state = WRAP_STATE(f);
10376 }
10377
10378 break;
10379 }
10380 }
10381
10382 f->f1 = state;
10383 GF_END(f, f->next);
10384 }
10385 else if(flg == GF_EOD){
10386 wrap_flush(f, &ip, &eib, &op, &eob);
10387 if(WRAP_COLOR(f))
10388 free_color_pair(&WRAP_COLOR(f));
10389
10390 fs_give((void **) &f->line); /* free temp line buffer */
10391 so_give(&WRAP_SPACES(f));
10392 fs_give((void **) &f->opt); /* free wrap widths struct */
10393 (void) GF_FLUSH(f->next);
10394 (*f->next->f)(f->next, GF_EOD);
10395 }
10396 else if(flg == GF_RESET){
10397 dprint((9, "-- gf_reset wrap\n"));
10398 f->f1 = BOL;
10399 f->n = 0L; /* displayed length of line so far */
10400 f->f2 = 0; /* displayed length of buffered chars */
10401 WRAP_HARD(f) = 1; /* starting at beginning of line */
10402 if(! (WRAP_S *) f->opt)
10403 f->opt = gf_wrap_filter_opt(75, 80, NULL, 0, 0);
10404
10405 while(WRAP_INDENT(f) >= WRAP_MAX_COL(f))
10406 WRAP_INDENT(f) /= 2;
10407
10408 f->line = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char));
10409 f->linep = f->line;
10410 WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f) - 1];
10411
10412 for(i = 0; i < 256; i++)
10413 ((WRAP_S *) f->opt)->special[i] = ((i == '\"' && WRAP_COMMA(f))
10414 || i == '\015'
10415 || i == '\012'
10416 || (i == (unsigned char) TAG_EMBED
10417 && WRAP_TAGS(f))
10418 || (i == ',' && WRAP_COMMA(f)
10419 && !WRAP_QUOTED(f))
10420 || ASCII_ISSPACE(i));
10421 WRAP_SPACES(f) = so_get(CharStar, NULL, EDIT_ACCESS);
10422 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10423 }
10424 }
10425
10426 int
wrap_flush(FILTER_S * f,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10427 wrap_flush(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10428 unsigned char **opp, unsigned char **eobp)
10429 {
10430 register char *s;
10431 register int n;
10432
10433 s = (char *)so_text(WRAP_SPACES(f));
10434 n = so_tell(WRAP_SPACES(f));
10435 so_seek(WRAP_SPACES(f), 0L, 0);
10436 wrap_flush_s(f, s, n, WRAP_SPC_LEN(f), ipp, eibp, opp, eobp, WFE_NONE);
10437 so_truncate(WRAP_SPACES(f), 0L);
10438 WRAP_SPC_LEN(f) = 0;
10439 WRAP_TRL_SPC(f) = 0;
10440 s = f->line;
10441 n = f->linep - f->line;
10442 wrap_flush_s(f, s, n, f->f2, ipp, eibp, opp, eobp, WFE_NONE);
10443 f->f2 = 0;
10444 f->linep = f->line;
10445 WRAP_PB_OFF(f) = 0;
10446 WRAP_PB_LEN(f) = 0;
10447
10448 return 0;
10449 }
10450
10451 int
wrap_flush_embed(FILTER_S * f,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10452 wrap_flush_embed(FILTER_S *f, unsigned char **ipp, unsigned char **eibp, unsigned char **opp, unsigned char **eobp)
10453 {
10454 register char *s;
10455 register int n;
10456 s = (char *)so_text(WRAP_SPACES(f));
10457 n = so_tell(WRAP_SPACES(f));
10458 so_seek(WRAP_SPACES(f), 0L, 0);
10459 wrap_flush_s(f, s, n, 0, ipp, eibp, opp, eobp, WFE_CNT_HANDLE);
10460 so_truncate(WRAP_SPACES(f), 0L);
10461 WRAP_SPC_LEN(f) = 0;
10462 WRAP_TRL_SPC(f) = 0;
10463
10464 return 0;
10465 }
10466
10467 int
wrap_flush_s(FILTER_S * f,char * s,int n,int w,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp,int flags)10468 wrap_flush_s(FILTER_S *f, char *s, int n, int w, unsigned char **ipp,
10469 unsigned char **eibp, unsigned char **opp, unsigned char **eobp, int flags)
10470 {
10471 f->n += w;
10472
10473 for(; n > 0; n--,s++){
10474 if(*s == TAG_EMBED){
10475 if(n-- > 0){
10476 switch(*++s){
10477 case TAG_BOLDON :
10478 GF_PUTC_GLO(f->next,TAG_EMBED);
10479 GF_PUTC_GLO(f->next,TAG_BOLDON);
10480 WRAP_BOLD(f) = 1;
10481 break;
10482 case TAG_BOLDOFF :
10483 GF_PUTC_GLO(f->next,TAG_EMBED);
10484 GF_PUTC_GLO(f->next,TAG_BOLDOFF);
10485 WRAP_BOLD(f) = 0;
10486 break;
10487 case TAG_ULINEON :
10488 GF_PUTC_GLO(f->next,TAG_EMBED);
10489 GF_PUTC_GLO(f->next,TAG_ULINEON);
10490 WRAP_ULINE(f) = 1;
10491 break;
10492 case TAG_ULINEOFF :
10493 GF_PUTC_GLO(f->next,TAG_EMBED);
10494 GF_PUTC_GLO(f->next,TAG_ULINEOFF);
10495 WRAP_ULINE(f) = 0;
10496 break;
10497 case TAG_INVOFF :
10498 GF_PUTC_GLO(f->next,TAG_EMBED);
10499 GF_PUTC_GLO(f->next,TAG_INVOFF);
10500 WRAP_ANCHOR(f) = 0;
10501 break;
10502 case TAG_HANDLE :
10503 if((flags & WFE_CNT_HANDLE) == 0)
10504 GF_PUTC_GLO(f->next,TAG_EMBED);
10505
10506 if(n-- > 0){
10507 int i = *++s;
10508
10509 if((flags & WFE_CNT_HANDLE) == 0)
10510 GF_PUTC_GLO(f->next, TAG_HANDLE);
10511
10512 if(i <= n){
10513 n -= i;
10514
10515 if((flags & WFE_CNT_HANDLE) == 0)
10516 GF_PUTC_GLO(f->next, i);
10517
10518 WRAP_ANCHOR(f) = 0;
10519 while(i-- > 0){
10520 WRAP_ANCHOR(f) = (WRAP_ANCHOR(f) * 10) + (*++s-'0');
10521
10522 if((flags & WFE_CNT_HANDLE) == 0)
10523 GF_PUTC_GLO(f->next,*s);
10524 }
10525
10526 }
10527 }
10528 break;
10529 case TAG_FGCOLOR :
10530 if(pico_usingcolor() && n >= RGBLEN){
10531 int i;
10532 GF_PUTC_GLO(f->next,TAG_EMBED);
10533 GF_PUTC_GLO(f->next,TAG_FGCOLOR);
10534 if(!WRAP_COLOR(f))
10535 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10536 strncpy(WRAP_COLOR(f)->fg, s+1, RGBLEN);
10537 WRAP_COLOR(f)->fg[RGBLEN]='\0';
10538 i = RGBLEN;
10539 n -= i;
10540 while(i-- > 0)
10541 GF_PUTC_GLO(f->next,
10542 (*++s) & 0xff);
10543 }
10544 break;
10545 case TAG_BGCOLOR :
10546 if(pico_usingcolor() && n >= RGBLEN){
10547 int i;
10548 GF_PUTC_GLO(f->next,TAG_EMBED);
10549 GF_PUTC_GLO(f->next,TAG_BGCOLOR);
10550 if(!WRAP_COLOR(f))
10551 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10552 strncpy(WRAP_COLOR(f)->bg, s+1, RGBLEN);
10553 WRAP_COLOR(f)->bg[RGBLEN]='\0';
10554 i = RGBLEN;
10555 n -= i;
10556 while(i-- > 0)
10557 GF_PUTC_GLO(f->next,
10558 (*++s) & 0xff);
10559 }
10560 break;
10561 default :
10562 break;
10563 }
10564 }
10565 }
10566 else if(w){
10567
10568 if(f->n <= WRAP_MAX_COL(f)){
10569 GF_PUTC_GLO(f->next, (*s) & 0xff);
10570 }
10571 else{
10572 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s) & 0xff));
10573 }
10574
10575 WRAP_ALLWSP(f) = 0;
10576 }
10577 }
10578
10579 return 0;
10580 }
10581
10582 int
wrap_eol(FILTER_S * f,int c,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10583 wrap_eol(FILTER_S *f, int c, unsigned char **ipp, unsigned char **eibp,
10584 unsigned char **opp, unsigned char **eobp)
10585 {
10586 if(WRAP_SAW_SOFT_HYPHEN(f)){
10587 WRAP_SAW_SOFT_HYPHEN(f) = 0;
10588 GF_PUTC_GLO(f->next, '-'); /* real hyphen */
10589 }
10590
10591 if(c && WRAP_LV_FLD(f))
10592 GF_PUTC_GLO(f->next, ' ');
10593
10594 if(WRAP_BOLD(f)){
10595 GF_PUTC_GLO(f->next, TAG_EMBED);
10596 GF_PUTC_GLO(f->next, TAG_BOLDOFF);
10597 }
10598
10599 if(WRAP_ULINE(f)){
10600 GF_PUTC_GLO(f->next, TAG_EMBED);
10601 GF_PUTC_GLO(f->next, TAG_ULINEOFF);
10602 }
10603
10604 if(WRAP_INVERSE(f) || WRAP_ANCHOR(f)){
10605 GF_PUTC_GLO(f->next, TAG_EMBED);
10606 GF_PUTC_GLO(f->next, TAG_INVOFF);
10607 }
10608
10609 if(WRAP_COLOR_SET(f)){
10610 char *p;
10611 char cb[RGBLEN+1];
10612 GF_PUTC_GLO(f->next, TAG_EMBED);
10613 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10614 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR), sizeof(cb));
10615 cb[sizeof(cb)-1] = '\0';
10616 p = cb;
10617 for(; *p; p++)
10618 GF_PUTC_GLO(f->next, *p);
10619 GF_PUTC_GLO(f->next, TAG_EMBED);
10620 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10621 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR), sizeof(cb));
10622 cb[sizeof(cb)-1] = '\0';
10623 p = cb;
10624 for(; *p; p++)
10625 GF_PUTC_GLO(f->next, *p);
10626 }
10627
10628 GF_PUTC_GLO(f->next, '\015');
10629 GF_PUTC_GLO(f->next, '\012');
10630 f->n = 0L;
10631 so_truncate(WRAP_SPACES(f), 0L);
10632 WRAP_SPC_LEN(f) = 0;
10633 WRAP_TRL_SPC(f) = 0;
10634
10635 return 0;
10636 }
10637
10638 int
wrap_bol(FILTER_S * f,int ivar,int q,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10639 wrap_bol(FILTER_S *f, int ivar, int q, unsigned char **ipp, unsigned char **eibp,
10640 unsigned char **opp, unsigned char **eobp)
10641 {
10642 int n = WRAP_MARG_L(f) + (ivar ? WRAP_INDENT(f) : 0);
10643
10644 if(WRAP_HDR_CLR(f)){
10645 char *p;
10646 char cbuf[RGBLEN+1];
10647 int k;
10648
10649 if((k = WRAP_MARG_L(f)) > 0)
10650 while(k-- > 0){
10651 n--;
10652 f->n++;
10653 GF_PUTC_GLO(f->next, ' ');
10654 }
10655
10656 GF_PUTC_GLO(f->next, TAG_EMBED);
10657 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10658 strncpy(cbuf,
10659 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_FORE_COLOR),
10660 sizeof(cbuf));
10661 cbuf[sizeof(cbuf)-1] = '\0';
10662 p = cbuf;
10663 for(; *p; p++)
10664 GF_PUTC_GLO(f->next, *p);
10665 GF_PUTC_GLO(f->next, TAG_EMBED);
10666 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10667 strncpy(cbuf,
10668 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_BACK_COLOR),
10669 sizeof(cbuf));
10670 cbuf[sizeof(cbuf)-1] = '\0';
10671 p = cbuf;
10672 for(; *p; p++)
10673 GF_PUTC_GLO(f->next, *p);
10674 }
10675
10676 while(n-- > 0){
10677 f->n++;
10678 GF_PUTC_GLO(f->next, ' ');
10679 }
10680
10681 WRAP_ALLWSP(f) = 1;
10682
10683 if(q)
10684 wrap_quote_insert(f, ipp, eibp, opp, eobp);
10685
10686 if(WRAP_BOLD(f)){
10687 GF_PUTC_GLO(f->next, TAG_EMBED);
10688 GF_PUTC_GLO(f->next, TAG_BOLDON);
10689 }
10690 if(WRAP_ULINE(f)){
10691 GF_PUTC_GLO(f->next, TAG_EMBED);
10692 GF_PUTC_GLO(f->next, TAG_ULINEON);
10693 }
10694 if(WRAP_INVERSE(f)){
10695 GF_PUTC_GLO(f->next, TAG_EMBED);
10696 GF_PUTC_GLO(f->next, TAG_INVON);
10697 }
10698 if(WRAP_COLOR_SET(f)){
10699 char *p;
10700 if(WRAP_COLOR(f)->fg[0]){
10701 char cb[RGBLEN+1];
10702 GF_PUTC_GLO(f->next, TAG_EMBED);
10703 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10704 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->fg), sizeof(cb));
10705 cb[sizeof(cb)-1] = '\0';
10706 p = cb;
10707 for(; *p; p++)
10708 GF_PUTC_GLO(f->next, *p);
10709 }
10710 if(WRAP_COLOR(f)->bg[0]){
10711 char cb[RGBLEN+1];
10712 GF_PUTC_GLO(f->next, TAG_EMBED);
10713 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10714 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->bg), sizeof(cb));
10715 cb[sizeof(cb)-1] = '\0';
10716 p = cb;
10717 for(; *p; p++)
10718 GF_PUTC_GLO(f->next, *p);
10719 }
10720 }
10721 if(WRAP_ANCHOR(f)){
10722 char buf[64]; int i;
10723 GF_PUTC_GLO(f->next, TAG_EMBED);
10724 GF_PUTC_GLO(f->next, TAG_HANDLE);
10725 snprintf(buf, sizeof(buf), "%d", WRAP_ANCHOR(f));
10726 GF_PUTC_GLO(f->next, (int) strlen(buf));
10727 for(i = 0; buf[i]; i++)
10728 GF_PUTC_GLO(f->next, buf[i]);
10729 }
10730
10731 return 0;
10732 }
10733
10734 int
wrap_quote_insert(FILTER_S * f,unsigned char ** ipp,unsigned char ** eibp,unsigned char ** opp,unsigned char ** eobp)10735 wrap_quote_insert(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10736 unsigned char **opp, unsigned char **eobp)
10737 {
10738 int j, i;
10739 COLOR_PAIR *col = NULL;
10740 char *prefix = NULL, *last_prefix = NULL;
10741
10742 if(ps_global->VAR_QUOTE_REPLACE_STRING){
10743 get_pair(ps_global->VAR_QUOTE_REPLACE_STRING, &prefix, &last_prefix, 0, 0);
10744 if(!prefix && last_prefix){
10745 prefix = last_prefix;
10746 last_prefix = NULL;
10747 }
10748 }
10749
10750 for(j = 0; j < WRAP_FL_QD(f); j++){
10751 if(WRAP_USE_CLR(f)){
10752 if((j % 3) == 0
10753 && ps_global->VAR_QUOTE1_FORE_COLOR
10754 && ps_global->VAR_QUOTE1_BACK_COLOR
10755 && (col = new_color_pair(ps_global->VAR_QUOTE1_FORE_COLOR,
10756 ps_global->VAR_QUOTE1_BACK_COLOR))
10757 && pico_is_good_colorpair(col)){
10758 GF_COLOR_PUTC(f, col);
10759 }
10760 else if((j % 3) == 1
10761 && ps_global->VAR_QUOTE2_FORE_COLOR
10762 && ps_global->VAR_QUOTE2_BACK_COLOR
10763 && (col = new_color_pair(ps_global->VAR_QUOTE2_FORE_COLOR,
10764 ps_global->VAR_QUOTE2_BACK_COLOR))
10765 && pico_is_good_colorpair(col)){
10766 GF_COLOR_PUTC(f, col);
10767 }
10768 else if((j % 3) == 2
10769 && ps_global->VAR_QUOTE3_FORE_COLOR
10770 && ps_global->VAR_QUOTE3_BACK_COLOR
10771 && (col = new_color_pair(ps_global->VAR_QUOTE3_FORE_COLOR,
10772 ps_global->VAR_QUOTE3_BACK_COLOR))
10773 && pico_is_good_colorpair(col)){
10774 GF_COLOR_PUTC(f, col);
10775 }
10776 if(col){
10777 free_color_pair(&col);
10778 col = NULL;
10779 }
10780 }
10781
10782 if(!WRAP_LV_FLD(f)){
10783 if(!WRAP_FOR_CMPS(f) && ps_global->VAR_QUOTE_REPLACE_STRING && prefix){
10784 for(i = 0; prefix[i]; i++)
10785 GF_PUTC_GLO(f->next, prefix[i]);
10786 f->n += utf8_width(prefix);
10787 }
10788 else if(ps_global->VAR_REPLY_STRING
10789 && (!strcmp(ps_global->VAR_REPLY_STRING, ">")
10790 || !strcmp(ps_global->VAR_REPLY_STRING, "\">\""))){
10791 GF_PUTC_GLO(f->next, '>');
10792 f->n += 1;
10793 }
10794 else{
10795 GF_PUTC_GLO(f->next, '>');
10796 GF_PUTC_GLO(f->next, ' ');
10797 f->n += 2;
10798 }
10799 }
10800 else{
10801 GF_PUTC_GLO(f->next, '>');
10802 f->n += 1;
10803 }
10804 }
10805 if(j && WRAP_LV_FLD(f)){
10806 GF_PUTC_GLO(f->next, ' ');
10807 f->n++;
10808 }
10809 else if(j && last_prefix){
10810 for(i = 0; last_prefix[i]; i++)
10811 GF_PUTC_GLO(f->next, last_prefix[i]);
10812 f->n += utf8_width(last_prefix);
10813 }
10814
10815 if(prefix)
10816 fs_give((void **)&prefix);
10817 if(last_prefix)
10818 fs_give((void **)&last_prefix);
10819
10820 return 0;
10821 }
10822
10823
10824 /*
10825 * function called from the outside to set
10826 * wrap filter's width option
10827 */
10828 void *
gf_wrap_filter_opt(int width,int width_max,int * margin,int indent,int flags)10829 gf_wrap_filter_opt(int width, int width_max, int *margin, int indent, int flags)
10830 {
10831 WRAP_S *wrap;
10832
10833 /* NOTE: variables MUST be sanity checked before they get here */
10834 wrap = (WRAP_S *) fs_get(sizeof(WRAP_S));
10835 memset(wrap, 0, sizeof(WRAP_S));
10836 wrap->wrap_col = width;
10837 wrap->wrap_max = width_max;
10838 wrap->indent = indent;
10839 wrap->margin_l = (margin) ? margin[0] : 0;
10840 wrap->margin_r = (margin) ? margin[1] : 0;
10841 wrap->tags = (GFW_HANDLES & flags) == GFW_HANDLES;
10842 wrap->on_comma = (GFW_ONCOMMA & flags) == GFW_ONCOMMA;
10843 wrap->flowed = (GFW_FLOWED & flags) == GFW_FLOWED;
10844 wrap->leave_flowed = (GFW_FLOW_RESULT & flags) == GFW_FLOW_RESULT;
10845 wrap->delsp = (GFW_DELSP & flags) == GFW_DELSP;
10846 wrap->use_color = (GFW_USECOLOR & flags) == GFW_USECOLOR;
10847 wrap->hdr_color = (GFW_HDRCOLOR & flags) == GFW_HDRCOLOR;
10848 wrap->for_compose = (GFW_FORCOMPOSE & flags) == GFW_FORCOMPOSE;
10849 wrap->handle_soft_hyphen = (GFW_SOFTHYPHEN & flags) == GFW_SOFTHYPHEN;
10850
10851 return((void *) wrap);
10852 }
10853
10854
10855 void *
gf_url_hilite_opt(URL_HILITE_S * uh,HANDLE_S ** handlesp,int flags)10856 gf_url_hilite_opt(URL_HILITE_S *uh, HANDLE_S **handlesp, int flags)
10857 {
10858 if(uh){
10859 memset(uh, 0, sizeof(URL_HILITE_S));
10860 uh->handlesp = handlesp;
10861 uh->hdr_color = (URH_HDRCOLOR & flags) == URH_HDRCOLOR;
10862 }
10863
10864 return((void *) uh);
10865 }
10866
10867
10868 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10869 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10870 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10871
10872 typedef struct preflow_s {
10873 int quote_depth,
10874 quote_count,
10875 sig;
10876 } PREFLOW_S;
10877
10878 /*
10879 * This would normally be handled in gf_wrap. If there is a possibility
10880 * that a url we want to recognize is cut in half by a soft newline we
10881 * want to fix that up by putting the halves back together. We do that
10882 * by deleting the soft newline and putting it all in one line. It will
10883 * still get wrapped later in gf_wrap. It isn't pretty with all the
10884 * goto's, but whatta ya gonna do?
10885 */
10886 void
gf_preflow(FILTER_S * f,int flg)10887 gf_preflow(FILTER_S *f, int flg)
10888 {
10889 GF_INIT(f, f->next);
10890
10891 if(flg == GF_DATA){
10892 register unsigned char c;
10893 register int state = f->f1;
10894 register int pending = f->f2;
10895
10896 while(GF_GETC(f, c)){
10897 switch(state){
10898 case DFL:
10899 default_case:
10900 switch(c){
10901 case ' ':
10902 state = WSPACE;
10903 break;
10904
10905 case '\015':
10906 state = CCR;
10907 break;
10908
10909 default:
10910 GF_PUTC(f->next, c);
10911 break;
10912 }
10913
10914 break;
10915
10916 case CCR:
10917 switch(c){
10918 case '\012':
10919 pending = 1;
10920 state = BOL;
10921 break;
10922
10923 default:
10924 GF_PUTC(f->next, '\012');
10925 state = DFL;
10926 goto default_case;
10927 break;
10928 }
10929
10930 break;
10931
10932 case WSPACE:
10933 switch(c){
10934 case '\015':
10935 state = SPACECR;
10936 break;
10937
10938 default:
10939 GF_PUTC(f->next, ' ');
10940 state = DFL;
10941 goto default_case;
10942 break;
10943 }
10944
10945 break;
10946
10947 case SPACECR:
10948 switch(c){
10949 case '\012':
10950 pending = 2;
10951 state = BOL;
10952 break;
10953
10954 default:
10955 GF_PUTC(f->next, ' ');
10956 GF_PUTC(f->next, '\012');
10957 state = DFL;
10958 goto default_case;
10959 break;
10960 }
10961
10962 break;
10963
10964 case BOL:
10965 PF_QC(f) = 0;
10966 if(c == '>'){ /* count quote level */
10967 PF_QC(f)++;
10968 state = FL_QLEV;
10969 }
10970 else{
10971 done_counting_quotes:
10972 if(c == ' '){ /* eat stuffed space */
10973 state = FL_STF;
10974 break;
10975 }
10976
10977 done_with_stuffed_space:
10978 if(c == '-'){ /* look for signature */
10979 PF_SIG(f) = 1;
10980 state = FL_SIG;
10981 break;
10982 }
10983
10984 done_with_sig:
10985 if(pending == 2){
10986 if(PF_QD(f) == PF_QC(f) && PF_SIG(f) < 4){
10987 /* delete pending */
10988
10989 PF_QD(f) = PF_QC(f);
10990
10991 /* suppress quotes, too */
10992 PF_QC(f) = 0;
10993 }
10994 else{
10995 /*
10996 * This should have been a hard new line
10997 * instead so leave out the trailing space.
10998 */
10999 GF_PUTC(f->next, '\015');
11000 GF_PUTC(f->next, '\012');
11001
11002 PF_QD(f) = PF_QC(f);
11003 }
11004 }
11005 else if(pending == 1){
11006 GF_PUTC(f->next, '\015');
11007 GF_PUTC(f->next, '\012');
11008 PF_QD(f) = PF_QC(f);
11009 }
11010 else{
11011 PF_QD(f) = PF_QC(f);
11012 }
11013
11014 pending = 0;
11015 state = DFL;
11016 while(PF_QC(f)-- > 0)
11017 GF_PUTC(f->next, '>');
11018
11019 switch(PF_SIG(f)){
11020 case 0:
11021 default:
11022 break;
11023
11024 case 1:
11025 GF_PUTC(f->next, '-');
11026 break;
11027
11028 case 2:
11029 GF_PUTC(f->next, '-');
11030 GF_PUTC(f->next, '-');
11031 break;
11032
11033 case 3:
11034 case 4:
11035 GF_PUTC(f->next, '-');
11036 GF_PUTC(f->next, '-');
11037 GF_PUTC(f->next, ' ');
11038 break;
11039 }
11040
11041 PF_SIG(f) = 0;
11042 goto default_case; /* to handle c */
11043 }
11044
11045 break;
11046
11047 case FL_QLEV: /* count quote level */
11048 if(c == '>')
11049 PF_QC(f)++;
11050 else
11051 goto done_counting_quotes;
11052
11053 break;
11054
11055 case FL_STF: /* eat stuffed space */
11056 goto done_with_stuffed_space;
11057 break;
11058
11059 case FL_SIG: /* deal with sig indicator */
11060 switch(PF_SIG(f)){
11061 case 1: /* saw '-' */
11062 if(c == '-')
11063 PF_SIG(f) = 2;
11064 else
11065 goto done_with_sig;
11066
11067 break;
11068
11069 case 2: /* saw '--' */
11070 if(c == ' ')
11071 PF_SIG(f) = 3;
11072 else
11073 goto done_with_sig;
11074
11075 break;
11076
11077 case 3: /* saw '-- ' */
11078 if(c == '\015')
11079 PF_SIG(f) = 4; /* it really is a sig line */
11080
11081 goto done_with_sig;
11082 break;
11083 }
11084
11085 break;
11086 }
11087 }
11088
11089 f->f1 = state;
11090 f->f2 = pending;
11091 GF_END(f, f->next);
11092 }
11093 else if(flg == GF_EOD){
11094 fs_give((void **) &f->opt);
11095 (void) GF_FLUSH(f->next);
11096 (*f->next->f)(f->next, GF_EOD);
11097 }
11098 else if(flg == GF_RESET){
11099 PREFLOW_S *pf;
11100
11101 pf = (PREFLOW_S *) fs_get(sizeof(*pf));
11102 memset(pf, 0, sizeof(*pf));
11103 f->opt = (void *) pf;
11104
11105 f->f1 = BOL; /* state */
11106 f->f2 = 0; /* pending */
11107 PF_QD(f) = 0; /* quote depth */
11108 PF_QC(f) = 0; /* quote count */
11109 PF_SIG(f) = 0; /* sig level */
11110 }
11111 }
11112
11113
11114
11115
11116 /*
11117 * LINE PREFIX FILTER - insert given text at beginning of each
11118 * line
11119 */
11120
11121
11122 #define GF_PREFIX_WRITE(s) { \
11123 register char *p; \
11124 if((p = (s)) != NULL) \
11125 while(*p) \
11126 GF_PUTC(f->next, *p++); \
11127 }
11128
11129
11130 /*
11131 * the simple filter, prepends each line with the requested prefix.
11132 * if prefix is null, does nothing, and as with all filters, assumes
11133 * NVT end of lines.
11134 */
11135 void
gf_prefix(FILTER_S * f,int flg)11136 gf_prefix(FILTER_S *f, int flg)
11137 {
11138 GF_INIT(f, f->next);
11139
11140 if(flg == GF_DATA){
11141 register unsigned char c;
11142 register int state = f->f1;
11143 register int first = f->f2;
11144
11145 while(GF_GETC(f, c)){
11146
11147 if(first){ /* write initial prefix!! */
11148 first = 0; /* but just once */
11149 GF_PREFIX_WRITE((char *) f->opt);
11150 }
11151
11152 /*
11153 * State == 0 is the starting state and the usual state.
11154 * State == 1 means we saw a CR and haven't acted on it yet.
11155 * We are looking for a LF to get the CRLF end of line.
11156 * However, we also treat bare CR and bare LF as if they
11157 * were CRLF sequences. What else could it mean in text?
11158 * This filter is only used for text so that is probably
11159 * a reasonable interpretation of the bad input.
11160 */
11161 if(c == '\015'){ /* CR */
11162 if(state){ /* Treat pending CR as endofline, */
11163 GF_PUTC(f->next, '\015'); /* and remain in saw-a-CR state. */
11164 GF_PUTC(f->next, '\012');
11165 GF_PREFIX_WRITE((char *) f->opt);
11166 }
11167 else{
11168 state = 1;
11169 }
11170 }
11171 else if(c == '\012'){ /* LF */
11172 GF_PUTC(f->next, '\015'); /* Got either a CRLF or a bare LF, */
11173 GF_PUTC(f->next, '\012'); /* treat both as if a CRLF. */
11174 GF_PREFIX_WRITE((char *) f->opt);
11175 state = 0;
11176 }
11177 else{ /* any other character */
11178 if(state){
11179 GF_PUTC(f->next, '\015'); /* Treat pending CR as endofline. */
11180 GF_PUTC(f->next, '\012');
11181 GF_PREFIX_WRITE((char *) f->opt);
11182 state = 0;
11183 }
11184
11185 GF_PUTC(f->next, c);
11186 }
11187 }
11188
11189 f->f1 = state; /* save state for next chunk of data */
11190 f->f2 = first;
11191 GF_END(f, f->next);
11192 }
11193 else if(flg == GF_EOD){
11194 (void) GF_FLUSH(f->next);
11195 (*f->next->f)(f->next, GF_EOD);
11196 }
11197 else if(flg == GF_RESET){
11198 dprint((9, "-- gf_reset prefix\n"));
11199 f->f1 = 0;
11200 f->f2 = 1; /* nothing written yet */
11201 }
11202 }
11203
11204
11205 /*
11206 * function called from the outside to set
11207 * prefix filter's prefix string
11208 */
11209 void *
gf_prefix_opt(char * prefix)11210 gf_prefix_opt(char *prefix)
11211 {
11212 return((void *) prefix);
11213 }
11214
11215
11216 /*
11217 * LINE TEST FILTER - accumulate lines and offer each to the provided
11218 * test function.
11219 */
11220
11221 typedef struct _linetest_s {
11222 linetest_t f;
11223 void *local;
11224 } LINETEST_S;
11225
11226
11227 /* accumulator growth increment */
11228 #define LINE_TEST_BLOCK 1024
11229
11230 #define GF_LINE_TEST_EOB(f) \
11231 ((f)->line + ((f)->f2 - 1))
11232
11233 #define GF_LINE_TEST_ADD(f, c) \
11234 { \
11235 if(p >= eobuf){ \
11236 f->f2 += LINE_TEST_BLOCK; \
11237 fs_resize((void **)&f->line, \
11238 (size_t) f->f2 * sizeof(char)); \
11239 eobuf = GF_LINE_TEST_EOB(f); \
11240 p = eobuf - LINE_TEST_BLOCK; \
11241 } \
11242 *p++ = c; \
11243 }
11244
11245 #define GF_LINE_TEST_TEST(F, D) \
11246 { \
11247 unsigned char c; \
11248 register char *cp; \
11249 register int l; \
11250 LT_INS_S *ins = NULL, *insp; \
11251 *p = '\0'; \
11252 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11253 (F)->line, &ins, \
11254 ((LINETEST_S *) (F)->opt)->local); \
11255 if((D) < 2){ \
11256 if((D) < 0){ \
11257 if((F)->line) \
11258 fs_give((void **) &(F)->line); \
11259 if((F)->opt) \
11260 fs_give((void **) &(F)->opt); \
11261 gf_error(_("translation error")); \
11262 /* NO RETURN */ \
11263 } \
11264 for(insp = ins, cp = (F)->line; cp < p; ){ \
11265 if(insp && cp == insp->where){ \
11266 if(insp->len > 0){ \
11267 for(l = 0; l < insp->len; l++){ \
11268 c = (unsigned char) insp->text[l]; \
11269 GF_PUTC((F)->next, c); \
11270 } \
11271 insp = insp->next; \
11272 continue; \
11273 } else if(insp->len < 0){ \
11274 cp -= insp->len; \
11275 insp = insp->next; \
11276 continue; \
11277 } \
11278 } \
11279 GF_PUTC((F)->next, *cp); \
11280 cp++; \
11281 } \
11282 while(insp){ \
11283 for(l = 0; l < insp->len; l++){ \
11284 c = (unsigned char) insp->text[l]; \
11285 GF_PUTC((F)->next, c); \
11286 } \
11287 insp = insp->next; \
11288 } \
11289 gf_line_test_free_ins(&ins); \
11290 } \
11291 }
11292
11293
11294
11295 /*
11296 * this simple filter accumulates characters until a newline, offers it
11297 * to the provided test function, and then passes it on. It assumes
11298 * NVT EOLs.
11299 */
11300 void
gf_line_test(FILTER_S * f,int flg)11301 gf_line_test(FILTER_S *f, int flg)
11302 {
11303 register char *p = f->linep;
11304 register char *eobuf = GF_LINE_TEST_EOB(f);
11305 GF_INIT(f, f->next);
11306
11307 if(flg == GF_DATA){
11308 register unsigned char c;
11309 register int state = f->f1;
11310
11311 while(GF_GETC(f, c)){
11312
11313 if(state){
11314 state = 0;
11315 if(c == '\012'){
11316 int done;
11317
11318 GF_LINE_TEST_TEST(f, done);
11319
11320 p = (f)->line;
11321
11322 if(done == 2) /* skip this line! */
11323 continue;
11324
11325 GF_PUTC(f->next, '\015');
11326 GF_PUTC(f->next, '\012');
11327 /*
11328 * if the line tester returns TRUE, it's
11329 * telling us its seen enough and doesn't
11330 * want to see any more. Remove ourself
11331 * from the pipeline...
11332 */
11333 if(done){
11334 if(gf_master == f){
11335 gf_master = f->next;
11336 }
11337 else{
11338 FILTER_S *fprev;
11339
11340 for(fprev = gf_master;
11341 fprev && fprev->next != f;
11342 fprev = fprev->next)
11343 ;
11344
11345 if(fprev) /* wha??? */
11346 fprev->next = f->next;
11347 else
11348 continue;
11349 }
11350
11351 while(GF_GETC(f, c)) /* pass input */
11352 GF_PUTC(f->next, c);
11353
11354 (void) GF_FLUSH(f->next); /* and drain queue */
11355 fs_give((void **)&f->line);
11356 fs_give((void **)&f); /* wax our data */
11357 return;
11358 }
11359 else
11360 continue;
11361 }
11362 else /* add CR to buffer */
11363 GF_LINE_TEST_ADD(f, '\015');
11364 } /* fall thru to handle 'c' */
11365
11366 if(c == '\015') /* newline? */
11367 state = 1;
11368 else
11369 GF_LINE_TEST_ADD(f, c);
11370 }
11371
11372 f->f1 = state;
11373 GF_END(f, f->next);
11374 }
11375 else if(flg == GF_EOD){
11376 int i;
11377
11378 GF_LINE_TEST_TEST(f, i); /* examine remaining data */
11379 fs_give((void **) &f->line); /* free line buffer */
11380 fs_give((void **) &f->opt); /* free test struct */
11381 (void) GF_FLUSH(f->next);
11382 (*f->next->f)(f->next, GF_EOD);
11383 }
11384 else if(flg == GF_RESET){
11385 dprint((9, "-- gf_reset line_test\n"));
11386 f->f1 = 0; /* state */
11387 f->n = 0L; /* line number */
11388 f->f2 = LINE_TEST_BLOCK; /* size of alloc'd line */
11389 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
11390 }
11391
11392 f->linep = p;
11393 }
11394
11395
11396 /*
11397 * function called from the outside to operate on accumulated line.
11398 */
11399 void *
gf_line_test_opt(linetest_t test_f,void * local)11400 gf_line_test_opt(linetest_t test_f, void *local)
11401 {
11402 LINETEST_S *ltp;
11403
11404 ltp = (LINETEST_S *) fs_get(sizeof(LINETEST_S));
11405 memset(ltp, 0, sizeof(LINETEST_S));
11406 ltp->f = test_f;
11407 ltp->local = local;
11408 return((void *) ltp);
11409 }
11410
11411
11412
11413 LT_INS_S **
gf_line_test_new_ins(LT_INS_S ** ins,char * p,char * s,int n)11414 gf_line_test_new_ins(LT_INS_S **ins, char *p, char *s, int n)
11415 {
11416 *ins = (LT_INS_S *) fs_get(sizeof(LT_INS_S));
11417 if(((*ins)->len = n) > 0)
11418 strncpy((*ins)->text = (char *) fs_get(n * sizeof(char)), s, n);
11419 else
11420 (*ins)->text = NULL;
11421
11422 (*ins)->where = p;
11423 (*ins)->next = NULL;
11424 return(&(*ins)->next);
11425 }
11426
11427
11428 void
gf_line_test_free_ins(LT_INS_S ** ins)11429 gf_line_test_free_ins(LT_INS_S **ins)
11430 {
11431 if(ins && *ins){
11432 if((*ins)->next)
11433 gf_line_test_free_ins(&(*ins)->next);
11434
11435 if((*ins)->text)
11436 fs_give((void **) &(*ins)->text);
11437
11438 fs_give((void **) ins);
11439 }
11440 }
11441
11442
11443 /*
11444 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11445 * with editorial comment
11446 */
11447
11448 typedef struct _preped_s {
11449 prepedtest_t f;
11450 char *text;
11451 } PREPED_S;
11452
11453
11454 /*
11455 * gf_prepend_editorial - accumulate filtered text and prepend its
11456 * output with given text
11457 *
11458 *
11459 */
11460 void
gf_prepend_editorial(FILTER_S * f,int flg)11461 gf_prepend_editorial(FILTER_S *f, int flg)
11462 {
11463 GF_INIT(f, f->next);
11464
11465 if(flg == GF_DATA){
11466 register unsigned char c;
11467
11468 while(GF_GETC(f, c)){
11469 so_writec(c, (STORE_S *) f->data);
11470 }
11471
11472 GF_END(f, f->next);
11473 }
11474 else if(flg == GF_EOD){
11475 unsigned char c;
11476
11477 if(!((PREPED_S *)(f)->opt)->f || (*((PREPED_S *)(f)->opt)->f)()){
11478 char *p = ((PREPED_S *)(f)->opt)->text;
11479
11480 for( ; p && *p; p++)
11481 GF_PUTC(f->next, *p);
11482 }
11483
11484 so_seek((STORE_S *) f->data, 0L, 0);
11485 while(so_readc(&c, (STORE_S *) f->data)){
11486 GF_PUTC(f->next, c);
11487 }
11488
11489 so_give((STORE_S **) &f->data);
11490 fs_give((void **) &f->opt);
11491 (void) GF_FLUSH(f->next);
11492 (*f->next->f)(f->next, GF_EOD);
11493 }
11494 else if(flg == GF_RESET){
11495 dprint((9, "-- gf_reset line_test\n"));
11496 f->data = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
11497 }
11498 }
11499
11500
11501 /*
11502 * function called from the outside to setup prepending editorial
11503 * to output text
11504 */
11505 void *
gf_prepend_editorial_opt(prepedtest_t test_f,char * text)11506 gf_prepend_editorial_opt(prepedtest_t test_f, char *text)
11507 {
11508 PREPED_S *pep;
11509
11510 pep = (PREPED_S *) fs_get(sizeof(PREPED_S));
11511 memset(pep, 0, sizeof(PREPED_S));
11512 pep->f = test_f;
11513 pep->text = text;
11514 return((void *) pep);
11515 }
11516
11517
11518 /*
11519 * Network virtual terminal to local newline convention filter
11520 */
11521 void
gf_nvtnl_local(FILTER_S * f,int flg)11522 gf_nvtnl_local(FILTER_S *f, int flg)
11523 {
11524 GF_INIT(f, f->next);
11525
11526 if(flg == GF_DATA){
11527 register unsigned char c;
11528 register int state = f->f1;
11529
11530 while(GF_GETC(f, c)){
11531 if(state){
11532 state = 0;
11533 if(c == '\012'){
11534 GF_PUTC(f->next, '\012');
11535 continue;
11536 }
11537 else
11538 GF_PUTC(f->next, '\015');
11539 /* fall thru to deal with 'c' */
11540 }
11541
11542 if(c == '\015')
11543 state = 1;
11544 else
11545 GF_PUTC(f->next, c);
11546 }
11547
11548 f->f1 = state;
11549 GF_END(f, f->next);
11550 }
11551 else if(flg == GF_EOD){
11552 (void) GF_FLUSH(f->next);
11553 (*f->next->f)(f->next, GF_EOD);
11554 }
11555 else if(flg == GF_RESET){
11556 dprint((9, "-- gf_reset nvtnl_local\n"));
11557 f->f1 = 0;
11558 }
11559 }
11560
11561
11562 /*
11563 * local to network newline convention filter
11564 */
11565 void
gf_local_nvtnl(FILTER_S * f,int flg)11566 gf_local_nvtnl(FILTER_S *f, int flg)
11567 {
11568 GF_INIT(f, f->next);
11569
11570 if(flg == GF_DATA){
11571 register unsigned char c;
11572
11573 while(GF_GETC(f, c)){
11574 if(c == '\012'){
11575 GF_PUTC(f->next, '\015');
11576 GF_PUTC(f->next, '\012');
11577 }
11578 else if(c != '\015') /* do not copy isolated \015 into source */
11579 GF_PUTC(f->next, c);
11580 }
11581
11582 GF_END(f, f->next);
11583 }
11584 else if(flg == GF_EOD){
11585 (void) GF_FLUSH(f->next);
11586 (*f->next->f)(f->next, GF_EOD);
11587 }
11588 else if(GF_RESET){
11589 dprint((9, "-- gf_reset local_nvtnl\n"));
11590 /* no op */
11591 }
11592
11593 }
11594
11595 void
free_filter_module_globals(void)11596 free_filter_module_globals(void)
11597 {
11598 FILTER_S *flt, *fltn = gf_master;
11599
11600 while((flt = fltn) != NULL){ /* free list of old filters */
11601 fltn = flt->next;
11602 fs_give((void **)&flt);
11603 }
11604 }
11605