1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1996-2011 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22  * Glenn Fowler
23  * AT&T Research
24  *
25  * html to rtf filter
26  */
27 
28 static const char usage[] =
29 "[-?\n@(#)$Id: html2rtf (AT&T Research) 1999-01-01 $\n]"
30 USAGE_LICENSE
31 "[+NAME?html2rtf - html to rtf filter]"
32 "[+DESCRIPTION?\bhtml2rtf\b converts input \bhtml\b documents to an \bRTF\b"
33 "	document on the standard output. \bhtml2rtf\b expects properly nested"
34 "	begin/end tags in the input \bhtml\b and warns about imbalance.]"
35 
36 "[d:debug?Set the debug trace level to \alevel\a. Higher levels produce"
37 "	more output.]#[level]"
38 "[f:font-size?Set the initial font size to \asize\a points.]#[size:=12]"
39 "[p:project-file?Appends MS HELP project information to the help project file"
40 "	\afile\a. This file combines individual RTF files into a"
41 "	hyper-linked collection. Note that MS expects \afile\a to have a"
42 "	\b.hlp\b extension.]:[file]"
43 "[v:verbose?Enable verbose error and warning messages. Some \bhtml\b source"
44 "	can't stand the heat.]"
45 
46 "\n"
47 "\n[ file ... ]\n"
48 "\n"
49 
50 "[+SEE ALSO?\bman\b(1), \bmm\b(1), \bmm2html\b(1), \btroff\b(1),"
51 "	\btroff2html\b(1)]"
52 ;
53 
54 #include "html2rtf.h"
55 
56 #include <error.h>
57 
58 #define LIST_INDENT		140
59 #define STK_LIST_COMPACT	(STK_TAG<<0)
60 
61 #define a_close			data[0].number
62 #define a_label			data[1].string
63 
64 #define font_size		data[0].number
65 
66 #define list_counter		data[0].number
67 #define list_hanging		data[1].number
68 #define list_indent		data[2].number
69 #define list_label		data[3].string
70 #define list_type		data[4].number
71 
72 #define title_cc		data[0].number
73 #define title_lastlastc		data[1].number
74 #define title_op		data[2].io
75 #define title_tc		data[3].number
76 
77 State_t			state;
78 
79 /*
80  * return the attribute pointer for name in ap
81  */
82 
83 static Attribute_t*
attribute(register Attribute_t * ap,const char * name)84 attribute(register Attribute_t* ap, const char* name)
85 {
86 	if (ap)
87 		for (; ap->name; ap++)
88 			if (!strcasecmp(ap->name, name))
89 				return ap;
90 	return 0;
91 }
92 
93 /*
94  * new paragraph with optional hanging indent
95  */
96 
97 static void
par(int hanging,const char * tail)98 par(int hanging, const char* tail)
99 {
100 	if (hanging && !(state.sp->flags & STK_LIST_COMPACT))
101 		sfputr(state.out, "\\line", -1);
102 	sfputr(state.out, "\\par\\pard", -1);
103 	if (state.center)
104 		sfputr(state.out, "\\qc", -1);
105 	if (hanging)
106 		sfprintf(state.out, "\\fi%d", twips(state.hanging - state.indent));
107 	sfprintf(state.out, "\\li%d\\tx%d\\tx20000%s", twips(state.indent), twips(state.indent), tail ? tail : "");
108 	state.sep = 1;
109 }
110 
111 static void
anchor(int ref,register char * s)112 anchor(int ref, register char* s)
113 {
114 	register int	c;
115 
116 	if (s)
117 	{
118 		if (ref)
119 		{
120 			if (*s != '#')
121 			{
122 				error(1, "%s: unknown link", s);
123 				return;
124 			}
125 			s++;
126 			sfprintf(state.out, "{\\uldb %s}{\\v", s);
127 		}
128 		else
129 			sfputr(state.out, "#{\\footnote", -1);
130 		sfprintf(state.out, " %s.", state.prefix);
131 		while (c = *s++)
132 			sfputc(state.out, isalnum(c) ? c : '.');
133 		sfputc(state.out, '}');
134 		if (ref)
135 		{
136 			sfputr(state.out, "{\\*\\comment", -1);
137 			state.sep = 1;
138 			state.sp->a_close = 1;
139 		}
140 	}
141 }
142 
143 static int
start_a(Tag_t * tp,Attribute_t * ap)144 start_a(Tag_t* tp, Attribute_t* ap)
145 {
146 	Attribute_t*	op;
147 
148 	NoP(tp);
149 	state.sp->a_close = 0;
150 	if (op = attribute(ap, "HREF"))
151 		anchor(1, op->value);
152 	if (op = attribute(ap, "NAME"))
153 		anchor(0, op->value);
154 	return 1;
155 }
156 
157 static int
end_a(Tag_t * tp,Attribute_t * ap)158 end_a(Tag_t* tp, Attribute_t* ap)
159 {
160 	NoP(tp);
161 	NoP(ap);
162 	if (state.sp->a_close)
163 	{
164 		sfputc(state.out, '}');
165 		state.sep = 0;
166 	}
167 	return 0;
168 }
169 
170 static int
start_b(Tag_t * tp,Attribute_t * ap)171 start_b(Tag_t* tp, Attribute_t* ap)
172 {
173 	NoP(tp);
174 	NoP(ap);
175 	sfputr(state.out, "{\\b", -1);
176 	state.sep = 1;
177 	return 1;
178 }
179 
180 static int
start_bq(Tag_t * tp,Attribute_t * ap)181 start_bq(Tag_t* tp, Attribute_t* ap)
182 {
183 	NoP(tp);
184 	NoP(ap);
185 	state.sp->list_hanging = state.hanging;
186 	state.sp->list_indent = state.indent;
187 	state.hanging += LIST_INDENT;
188 	state.indent = state.hanging;
189 	par(0, NiL);
190 	sfprintf(state.out, "\\ri%d", state.indent);
191 	state.sep = 1;
192 	return 1;
193 }
194 
195 static int
end_bq(Tag_t * tp,Attribute_t * ap)196 end_bq(Tag_t* tp, Attribute_t* ap)
197 {
198 	NoP(tp);
199 	NoP(ap);
200 	state.hanging = state.sp->list_hanging;
201 	state.indent = state.sp->list_indent;
202 	sfprintf(state.out, "\\ri0");
203 	state.sep = 1;
204 	return 1;
205 }
206 
207 static int
start_body(Tag_t * tp,Attribute_t * ap)208 start_body(Tag_t* tp, Attribute_t* ap)
209 {
210 	NoP(tp);
211 	NoP(ap);
212 	return 0;
213 }
214 
215 static int
start_br(Tag_t * tp,Attribute_t * ap)216 start_br(Tag_t* tp, Attribute_t* ap)
217 {
218 	NoP(tp);
219 	NoP(ap);
220 	sfputr(state.out, "\\line", -1);
221 	state.sep = 1;
222 	return 0;
223 }
224 
225 static int
start_caption(Tag_t * tp,Attribute_t * ap)226 start_caption(Tag_t* tp, Attribute_t* ap)
227 {
228 	NoP(tp);
229 	NoP(ap);
230 	return 0;
231 }
232 
233 static int
start_center(Tag_t * tp,Attribute_t * ap)234 start_center(Tag_t* tp, Attribute_t* ap)
235 {
236 	NoP(tp);
237 	NoP(ap);
238 	state.center++;
239 	state.pre++;
240 	par(0, NiL);
241 	return 1;
242 }
243 
244 static int
end_center(Tag_t * tp,Attribute_t * ap)245 end_center(Tag_t* tp, Attribute_t* ap)
246 {
247 	NoP(tp);
248 	NoP(ap);
249 	if (state.center > 0)
250 		state.center--;
251 	if (state.pre > 0)
252 		state.pre--;
253 	return 1;
254 }
255 
256 static int
start_dd(Tag_t * tp,Attribute_t * ap)257 start_dd(Tag_t* tp, Attribute_t* ap)
258 {
259 	NoP(tp);
260 	NoP(ap);
261 	if (state.sp->flags & STK_HEADING)
262 		par(0, NiL);
263 	else
264 		sfputr(state.out, "\\tab", -1);
265 	state.sep = 1;
266 	return 1;
267 }
268 
269 static int
start_dl(Tag_t * tp,Attribute_t * ap)270 start_dl(Tag_t* tp, Attribute_t* ap)
271 {
272 	NoP(tp);
273 	if (attribute(ap, "COMPACT"))
274 		state.sp->flags |= STK_LIST_COMPACT;
275 	state.sp->list_hanging = state.hanging;
276 	state.sp->list_indent = state.indent;
277 	state.hanging += LIST_INDENT;
278 	state.indent = state.hanging + LIST_INDENT * 2;
279 	return 1;
280 }
281 
282 static int
end_LIST(Tag_t * tp,Attribute_t * ap)283 end_LIST(Tag_t* tp, Attribute_t* ap)
284 {
285 	NoP(tp);
286 	NoP(ap);
287 	state.hanging = state.sp->list_hanging;
288 	state.indent = state.sp->list_indent;
289 	return 1;
290 }
291 
292 static int
start_dt(Tag_t * tp,Attribute_t * ap)293 start_dt(Tag_t* tp, Attribute_t* ap)
294 {
295 	NoP(tp);
296 	NoP(ap);
297 	par(1, NiL);
298 	return 1;
299 }
300 
301 static int
start_fn(Tag_t * tp,Attribute_t * ap)302 start_fn(Tag_t* tp, Attribute_t* ap)
303 {
304 	NoP(tp);
305 	NoP(ap);
306 	sfputr(state.out, "\\~[\\~", -1);
307 	return 1;
308 }
309 
310 static int
end_fn(Tag_t * tp,Attribute_t * ap)311 end_fn(Tag_t* tp, Attribute_t* ap)
312 {
313 	NoP(tp);
314 	NoP(ap);
315 	sfputr(state.out, "\\~]\\~", -1);
316 	return 0;
317 }
318 
319 static int
start_font(Tag_t * tp,Attribute_t * ap)320 start_font(Tag_t* tp, Attribute_t* ap)
321 {
322 	char*		s;
323 	char*		e;
324 	int		n;
325 	Attribute_t*	op;
326 
327 	NoP(tp);
328 	if ((op = attribute(ap, "SIZE")) && (s = op->value) && (n = strtol(s, &e, 10)) && !*e)
329 	{
330 		if (*s == '+' || *s == '-')
331 			n += state.fontsize;
332 		state.sp->font_size = state.fontsize;
333 		state.fontsize = n;
334 		sfprintf(state.out, "{\\fs%d", twips(n));
335 		state.sep = 1;
336 		return 1;
337 	}
338 	return 0;
339 }
340 
341 static int
end_font(Tag_t * tp,Attribute_t * ap)342 end_font(Tag_t* tp, Attribute_t* ap)
343 {
344 	NoP(tp);
345 	NoP(ap);
346 	state.fontsize = state.sp->font_size;
347 	sfputc(state.out, '}');
348 	state.sep = 0;
349 	return 1;
350 }
351 
352 static int
start_H(Tag_t * tp,Attribute_t * ap)353 start_H(Tag_t* tp, Attribute_t* ap)
354 {
355 	NoP(tp);
356 	NoP(ap);
357 	(state.sp - 1)->flags |= STK_HEADING;
358 	state.sp->font_size = state.fontsize;
359 	state.fontsize += (7 - (tp->name[1] - '0')) * 1;
360 	sfprintf(state.out, "{\\b\\fs%d", twips(state.fontsize));
361 	state.sep = 1;
362 	return 1;
363 }
364 
365 static int
end_H(Tag_t * tp,Attribute_t * ap)366 end_H(Tag_t* tp, Attribute_t* ap)
367 {
368 	NoP(tp);
369 	NoP(ap);
370 	state.fontsize = state.sp->font_size;
371 	sfputc(state.out, '}');
372 	state.sep = 0;
373 	return 1;
374 }
375 
376 static int
start_head(Tag_t * tp,Attribute_t * ap)377 start_head(Tag_t* tp, Attribute_t* ap)
378 {
379 	NoP(tp);
380 	NoP(ap);
381 	return 0;
382 }
383 
384 static int
end_head(Tag_t * tp,Attribute_t * ap)385 end_head(Tag_t* tp, Attribute_t* ap)
386 {
387 	NoP(tp);
388 	NoP(ap);
389 	return 0;
390 }
391 
392 static int
start_hr(Tag_t * tp,Attribute_t * ap)393 start_hr(Tag_t* tp, Attribute_t* ap)
394 {
395 	NoP(tp);
396 	NoP(ap);
397 	sfputr(state.out, "{\\brdrt\\brdrsh\\par}", -1);
398 	state.sep = 0;
399 	return 0;
400 }
401 
402 static int
start_html(Tag_t * tp,Attribute_t * ap)403 start_html(Tag_t* tp, Attribute_t* ap)
404 {
405 	char*	s;
406 
407 	NoP(tp);
408 	NoP(ap);
409 	sfputr(state.out, "{\\rtf1 \\ansi \\deff0", '\n');
410 	s = strchr(usage, '\n') + 5;
411 	sfprintf(state.out, "{\\*\\comment generator: %-.*s}\n", strchr(usage, '\n') - s, s);
412 	sfputr(state.out, "{\\fonttbl", '\n');
413 	sfputr(state.out, "{\\f0 \\froman Times New Roman;}", '\n');
414 	sfputr(state.out, "{\\f1 \\fmodern Line Printer;}", '\n');
415 	sfputr(state.out, "{\\f2 \\froman Symbol;}", '\n');
416 	sfputr(state.out, "{\\f3 \\fswiss Ariel;}", '\n');
417 	sfputr(state.out, "}", '\n');
418 	sfprintf(state.out, "\\fs%d\n", twips(state.fontsize));
419 	return 1;
420 }
421 
422 static int
end_html(Tag_t * tp,Attribute_t * ap)423 end_html(Tag_t* tp, Attribute_t* ap)
424 {
425 	NoP(tp);
426 	NoP(ap);
427 	sfputr(state.out, "\n}", '\n');
428 	return 1;
429 }
430 
431 static int
start_i(Tag_t * tp,Attribute_t * ap)432 start_i(Tag_t* tp, Attribute_t* ap)
433 {
434 	NoP(tp);
435 	NoP(ap);
436 	sfputr(state.out, "{\\i", -1);
437 	state.sep = 1;
438 	return 1;
439 }
440 
441 static int
start_img(Tag_t * tp,Attribute_t * ap)442 start_img(Tag_t* tp, Attribute_t* ap)
443 {
444 	NoP(tp);
445 	NoP(ap);
446 	return 0;
447 }
448 
449 /*
450  * NOTE: roman() transcribed from GNU groff
451  */
452 
453 static void
roman(register int n,int format)454 roman(register int n, int format)
455 {
456 	register char*	dig;
457 	register int	i;
458 	register int	m;
459 
460 	dig = islower(format) ? "zwmdclxvi" : "ZWMDCLXVI";
461 	if (n <= -40000 || n >= 40000)
462 	{
463 		sfprintf(state.out, "<%d>", n);
464 		return;
465 	}
466 	if (n == 0)
467 	{
468 		sfputc(state.out, '0');
469 		return;
470 	}
471 	if (n < 0)
472 	{
473 		n = -n;
474 		sfputc(state.out, '-');
475 	}
476 	while (n >= 10000)
477 	{
478 		n -= 10000;
479 		sfputc(state.out, dig[0]);
480 	}
481 	for (i = 1000; i > 0; i /= 10, dig += 2)
482 	{
483 		m = n / i;
484 		n -= m * i;
485 		switch (m)
486 		{
487 		case 9:
488 			sfputc(state.out, dig[2]);
489 			sfputc(state.out, dig[0]);
490 			break;
491 		case 8:
492 			sfputc(state.out, dig[1]);
493 			sfputc(state.out, dig[2]);
494 			sfputc(state.out, dig[2]);
495 			sfputc(state.out, dig[2]);
496 			break;
497 		case 7:
498 			sfputc(state.out, dig[1]);
499 			sfputc(state.out, dig[2]);
500 			sfputc(state.out, dig[2]);
501 			break;
502 		case 6:
503 			sfputc(state.out, dig[1]);
504 			sfputc(state.out, dig[2]);
505 			break;
506 		case 5:
507 			sfputc(state.out, dig[1]);
508 			break;
509 		case 4:
510 			sfputc(state.out, dig[2]);
511 			sfputc(state.out, dig[1]);
512 			break;
513 		case 3:
514 			sfputc(state.out, dig[2]);
515 			/*FALLTHROUGH*/
516 		case 2:
517 			sfputc(state.out, dig[2]);
518 			/*FALLTHROUGH*/
519 		case 1:
520 			sfputc(state.out, dig[2]);
521 			break;
522 		}
523 	}
524 }
525 
526 static int
start_li(Tag_t * tp,Attribute_t * ap)527 start_li(Tag_t* tp, Attribute_t* ap)
528 {
529 	NoP(tp);
530 	NoP(ap);
531 	par(1, "{\\b ");
532 	switch (state.sp->list_type)
533 	{
534 	case '1':
535 		sfprintf(state.out, "%d.", state.sp->list_counter);
536 		break;
537 	case 'A':
538 		sfprintf(state.out, "%c)", 'A' + state.sp->list_counter);
539 		break;
540 	case 'a':
541 		sfprintf(state.out, "%c)", 'a' + state.sp->list_counter);
542 		break;
543 	case 'I':
544 	case 'i':
545 		roman(state.sp->list_counter, state.sp->list_type);
546 		sfputc(state.out, ')');
547 		break;
548 	default:
549 		sfputr(state.out, state.sp && state.sp->list_label ? state.sp->list_label : "\\bullet", -1);
550 		break;
551 	}
552 	state.sp->list_counter++;
553 	sfputr(state.out, "}\\tab", -1);
554 	state.sep = 1;
555 	return 1;
556 }
557 
558 static int
start_meta(Tag_t * tp,Attribute_t * ap)559 start_meta(Tag_t* tp, Attribute_t* ap)
560 {
561 	Attribute_t*	op;
562 
563 	NoP(tp);
564 	if ((op = attribute(ap, "NAME")) && op->value)
565 	{
566 		sfprintf(state.out, "{\\*\\comment %s", op->value);
567 		if ((op = attribute(ap, "CONTENT")) && op->value)
568 			sfprintf(state.out, ": %s", op->value);
569 		sfputr(state.out, "}", '\n');
570 	}
571 	return 0;
572 }
573 
574 static int
start_ol(Tag_t * tp,Attribute_t * ap)575 start_ol(Tag_t* tp, Attribute_t* ap)
576 {
577 	char*		e;
578 	Attribute_t*	op;
579 
580 	NoP(tp);
581 	if (attribute(ap, "COMPACT"))
582 		state.sp->flags |= STK_LIST_COMPACT;
583 	if (!(op = attribute(ap, "START")) || !op->value || (state.sp->list_counter = strtol(op->value, &e, 10)) < 0 || *e)
584 		state.sp->list_counter = 1;
585 	state.sp->list_type = (op = attribute(ap, "TYPE")) && op->value ? *op->value : '1';
586 	state.sp->list_hanging = state.hanging;
587 	state.sp->list_indent = state.indent;
588 	state.hanging += LIST_INDENT;
589 	state.indent = state.hanging + LIST_INDENT;
590 	return 1;
591 }
592 
593 static int
start_p(Tag_t * tp,Attribute_t * ap)594 start_p(Tag_t* tp, Attribute_t* ap)
595 {
596 	register char*	s;
597 	Attribute_t*	op;
598 
599 	NoP(tp);
600 	par(0, NiL);
601 	if ((op = attribute(ap, "ALIGN")) && (s = op->value))
602 	{
603 		if (!strcasecmp(s, "CENTER"))
604 			sfputr(state.out, "\\qc", -1);
605 		else if (!strcasecmp(s, "LEFT"))
606 			sfputr(state.out, "\\ql", -1);
607 		else if (!strcasecmp(s, "RIGHT"))
608 			sfputr(state.out, "\\qr", -1);
609 	}
610 	return 1;
611 }
612 
613 static int
start_pre(Tag_t * tp,Attribute_t * ap)614 start_pre(Tag_t* tp, Attribute_t* ap)
615 {
616 	NoP(tp);
617 	NoP(ap);
618 	state.pre++;
619 	return 1;
620 }
621 
622 static int
end_pre(Tag_t * tp,Attribute_t * ap)623 end_pre(Tag_t* tp, Attribute_t* ap)
624 {
625 	NoP(tp);
626 	NoP(ap);
627 	if (state.pre > 0)
628 		state.pre--;
629 	return 1;
630 }
631 
632 static int
start_rendering(register Tag_t * tp,Attribute_t * ap)633 start_rendering(register Tag_t* tp, Attribute_t* ap)
634 {
635 	register Render_t*	rp;
636 	register int		i;
637 
638 	if (rp = (Render_t*)tp->data)
639 		for (i = 0; i < rp->tags; i++)
640 			if ((tp = rp->tag[i]) && tp->start)
641 				(*tp->start)(tp, ap);
642 	return 1;
643 }
644 
645 static int
end_rendering(register Tag_t * tp,Attribute_t * ap)646 end_rendering(register Tag_t* tp, Attribute_t* ap)
647 {
648 	register Render_t*	rp;
649 	register int		i;
650 
651 	if (rp = (Render_t*)tp->data)
652 		for (i = rp->tags - 1; i > 0; i--)
653 			if ((tp = rp->tag[i]) && tp->end)
654 				(*tp->end)(tp, ap);
655 	return 1;
656 }
657 
658 static int
start_render(register Tag_t * tp,Attribute_t * ap)659 start_render(register Tag_t* tp, Attribute_t* ap)
660 {
661 	register Render_t*	rp;
662 	register char*		s;
663 	register char*		e;
664 	register int		n;
665 	Attribute_t*		op;
666 
667 	if ((op = attribute(ap, "TAG")) && (s = op->value))
668 	{
669 		if (tp = (Tag_t*)hashget(state.tags, s))
670 		{
671 			if (tp->data)
672 				free(tp->data);
673 			tp->start = 0;
674 			tp->end = 0;
675 			tp->data = 0;
676 		}
677 		else if (!(tp = newof(NiL, Tag_t, 1, 0)) || !(tp->name = hashput(state.tags, 0, tp)))
678 			error(ERROR_SYSTEM|3, "out of space [tag]");
679 		if ((op = attribute(ap, "STYLE")) && (s = op->value))
680 		{
681 			for (n = 0, e = s; e && (e = strchr(e, ',')); n++, e++);
682 			if (!(rp = newof(NiL, Render_t, 1, n * sizeof(Tag_t*))))
683 				error(ERROR_SYSTEM|3, "out of space [render]");
684 			n = 0;
685 			do
686 			{
687 				if (e = strchr(s, ','))
688 					*e++ = 0;
689 				if (rp->tag[n] = (Tag_t*)hashget(state.tags, s))
690 					n++;
691 			} while (s = e);
692 			if (!(rp->tags = n))
693 				free(rp);
694 			else
695 			{
696 				tp->start = start_rendering;
697 				tp->end = end_rendering;
698 				tp->data = (void*)rp;
699 			}
700 		}
701 	}
702 	return 0;
703 }
704 
705 static int
start_sub(Tag_t * tp,Attribute_t * ap)706 start_sub(Tag_t* tp, Attribute_t* ap)
707 {
708 	NoP(tp);
709 	NoP(ap);
710 	return 0;
711 }
712 
713 static int
end_sub(Tag_t * tp,Attribute_t * ap)714 end_sub(Tag_t* tp, Attribute_t* ap)
715 {
716 	NoP(tp);
717 	NoP(ap);
718 	return 0;
719 }
720 
721 static int
start_sup(Tag_t * tp,Attribute_t * ap)722 start_sup(Tag_t* tp, Attribute_t* ap)
723 {
724 	NoP(tp);
725 	NoP(ap);
726 	return 0;
727 }
728 
729 static int
end_sup(Tag_t * tp,Attribute_t * ap)730 end_sup(Tag_t* tp, Attribute_t* ap)
731 {
732 	NoP(tp);
733 	NoP(ap);
734 	return 0;
735 }
736 
737 static int
start_table(Tag_t * tp,Attribute_t * ap)738 start_table(Tag_t* tp, Attribute_t* ap)
739 {
740 	NoP(tp);
741 	NoP(ap);
742 	state.center++;
743 	par(0, NiL);
744 	return 1;
745 }
746 
747 static int
end_table(Tag_t * tp,Attribute_t * ap)748 end_table(Tag_t* tp, Attribute_t* ap)
749 {
750 	NoP(tp);
751 	NoP(ap);
752 	if (state.center > 0)
753 		state.center++;
754 	sfputr(state.out, "}", '\n');
755 	return 0;
756 }
757 
758 static int
start_td(Tag_t * tp,Attribute_t * ap)759 start_td(Tag_t* tp, Attribute_t* ap)
760 {
761 	NoP(tp);
762 	NoP(ap);
763 	return 0;
764 }
765 
766 static int
end_td(Tag_t * tp,Attribute_t * ap)767 end_td(Tag_t* tp, Attribute_t* ap)
768 {
769 	NoP(tp);
770 	NoP(ap);
771 	return 1;
772 }
773 
774 static int
start_th(Tag_t * tp,Attribute_t * ap)775 start_th(Tag_t* tp, Attribute_t* ap)
776 {
777 	register Attribute_t*	op;
778 
779 	NoP(tp);
780 	if (!(op = attribute(ap, "ALIGN")) || !op->value || *op->value != 'l' && *op->value != 'L')
781 		sfputr(state.out, "\\~\\~\\~\\~\\~\\~\\~\\~\\~\\~\\~\\~", -1);
782 	return 0;
783 }
784 
785 static int
start_title(Tag_t * tp,Attribute_t * ap)786 start_title(Tag_t* tp, Attribute_t* ap)
787 {
788 	NoP(tp);
789 	NoP(ap);
790 	state.pre++;
791 	return 1;
792 }
793 
794 static int
end_title(Tag_t * tp,Attribute_t * ap)795 end_title(Tag_t* tp, Attribute_t* ap)
796 {
797 	NoP(tp);
798 	NoP(ap);
799 	if (state.pre > 0)
800 		state.pre--;
801 	return 1;
802 }
803 
804 static int
start_tr(Tag_t * tp,Attribute_t * ap)805 start_tr(Tag_t* tp, Attribute_t* ap)
806 {
807 	NoP(tp);
808 	NoP(ap);
809 	return 0;
810 }
811 
812 static int
end_tr(Tag_t * tp,Attribute_t * ap)813 end_tr(Tag_t* tp, Attribute_t* ap)
814 {
815 	NoP(tp);
816 	NoP(ap);
817 	return 1;
818 }
819 
820 static int
start_tt(Tag_t * tp,Attribute_t * ap)821 start_tt(Tag_t* tp, Attribute_t* ap)
822 {
823 	NoP(tp);
824 	NoP(ap);
825 	sfputr(state.out, "{\\f1", -1);
826 	state.sep = 1;
827 	return 1;
828 }
829 
830 static int
start_ul(Tag_t * tp,Attribute_t * ap)831 start_ul(Tag_t* tp, Attribute_t* ap)
832 {
833 	Attribute_t*	op;
834 
835 	NoP(tp);
836 	if (attribute(ap, "COMPACT"))
837 		state.sp->flags |= STK_LIST_COMPACT;
838 	state.sp->list_type = 0;
839 	switch ((op = attribute(ap, "TYPE")) && op->value ? *op->value : 0)
840 	{
841 	case 'c':
842 		state.sp->list_label = "\\'b0";
843 		break;
844 	case 's':
845 		state.sp->list_label = "\\'a4";
846 		break;
847 	default:
848 		state.sp->list_label = "\\bullet";
849 		break;
850 	}
851 	state.sp->list_hanging = state.hanging;
852 	state.sp->list_indent = state.indent;
853 	state.hanging += LIST_INDENT;
854 	state.indent = state.hanging + LIST_INDENT;
855 	return 1;
856 }
857 
858 static int
start_var(Tag_t * tp,Attribute_t * ap)859 start_var(Tag_t* tp, Attribute_t* ap)
860 {
861 	NoP(tp);
862 	NoP(ap);
863 	sfputr(state.out, "{\\f3\\i", -1);
864 	state.sep = 1;
865 	return 1;
866 }
867 
868 /*
869  * generic tag end
870  */
871 
872 static int
end(Tag_t * tp,Attribute_t * ap)873 end(Tag_t* tp, Attribute_t* ap)
874 {
875 	sfputc(state.out, '}');
876 	state.sep = 0;
877 	return 1;
878 }
879 
880 /*
881  * convert html file in to rtf file out
882  */
883 
884 #define COMMENT	1
885 #define PUN	4
886 #define STRING	2
887 
888 static void
process(char * file,register Sfio_t * ip,register Sfio_t * op)889 process(char* file, register Sfio_t* ip, register Sfio_t* op)
890 {
891 	register int	c;
892 	register int	lastc;
893 	register int	item;
894 	register int	cc;
895 	register int	tc;
896 	register char*	s;
897 	int		lastlastc;
898 	int		quote;
899 	int		n;
900 	Entity_t*	ep;
901 	Tag_t*		tp;
902 	Attribute_t	attributes[16];
903 	Attribute_t*	ap;
904 	Stack_t*	sp;
905 
906 	error_info.file = file;
907 	error_info.line = 1;
908 	state.center = 0;
909 	state.in = ip;
910 	state.out = op;
911 	state.pre = 0;
912 	state.sp = state.sp_min;
913 	ap = 0;
914 	item = 0;
915 	lastc = 0;
916 	cc = tc = 0;
917 	for (;;)
918 	{
919 		switch (c = sfgetc(ip))
920 		{
921 		case EOF:
922 			goto done;
923 		case '<':
924 			if (!item)
925 			{
926 				item = c;
927 				lastlastc = lastc;
928 				quote = 0;
929 				ap = attributes;
930 				ap->name = 0;
931 				ap->value = 0;
932 				op = state.tmp;
933 				if ((c = sfgetc(ip)) != EOF)
934 				{
935 					sfungetc(ip, c);
936 					if (c == '!')
937 						quote |= COMMENT;
938 				}
939 				continue;
940 			}
941 			break;
942 		case '>':
943 			if (item == '<' && !(quote & STRING))
944 			{
945 				item = 0;
946 				if (!(s = sfstruse(op)))
947 					error(ERROR_SYSTEM|3, "out of space");
948 				op = state.out;
949 				if (*s == '!')
950 				{
951 					if ((cc -= strlen(s)) <= 0)
952 					{
953 						cc = 0;
954 						if ((c = sfgetc(ip)) != EOF)
955 						{
956 							if (c == '\n')
957 								error_info.line++;
958 							else
959 								sfungetc(ip, c);
960 						}
961 					}
962 					continue;
963 				}
964 				(ap + 1)->name = 0;
965 				for (;;)
966 				{
967 					ap->name = s + (((unsigned int)ap->name) >> PUN);
968 					if (!*ap->name)
969 						ap->name = 0;
970 					else if (ap->value)
971 					{
972 						ap->value = s + (((unsigned int)ap->value) >> PUN);
973 						if (!*ap->value)
974 							ap->value = 0;
975 					}
976 					if (ap == attributes)
977 						break;
978 					ap--;
979 				}
980 				if (c = *s == '/')
981 					s++;
982 				if (!(tp = (Tag_t*)hashget(state.tags, s)))
983 					error(1, "<%s>: unknown tag", s);
984 				else if (!c)
985 				{
986 					if (tp->end)
987 					{
988 						if (state.sp >= state.sp_max)
989 						{
990 							c = state.sp - state.sp_min;
991 							n = (state.sp_max - state.sp_min + 1) * 2;
992 							if (!(state.sp_min = oldof(state.sp_min, Stack_t, n, 0)))
993 								error(ERROR_SYSTEM|3, "out of space [tag stack]");
994 							state.sp_max = state.sp_min + n - 1;
995 							state.sp = state.sp_min + c;
996 						}
997 						state.sp++;
998 						state.sp->tag = tp;
999 						state.sp->line = error_info.line;
1000 						state.sp->flags = 0;
1001 						if (tp->flags & TAG_IGNORE)
1002 						{
1003 							state.sp->title_cc = cc;
1004 							state.sp->title_lastlastc = lastlastc;
1005 							state.sp->title_op = op;
1006 							state.sp->title_tc = tc;
1007 							op = state.nul;
1008 							sfstrseek(op, 0, SEEK_SET);
1009 						}
1010 					}
1011 					if (tp->start && !(*tp->start)(tp, ap) && tp->end)
1012 						state.sp->flags |= STK_NOEND;
1013 				}
1014 				else
1015 				{
1016 					sp = state.sp;
1017 					if (state.sp->tag != tp)
1018 					{
1019 						for (;;)
1020 						{
1021 							if (sp == state.sp_min)
1022 							{
1023 								if (!(tp->flags & TAG_UNBALANCED))
1024 									error(1, "</%s> has no matching <%s>", tp->name, tp->name);
1025 								sp = 0;
1026 								break;
1027 							}
1028 							if (sp->tag == tp)
1029 								break;
1030 							sp--;
1031 						}
1032 						if (sp)
1033 						{
1034 							while (state.sp > sp)
1035 							{
1036 								if (state.sp->tag->end && !(state.sp->flags & STK_NOEND))
1037 								{
1038 									if (!(state.sp->tag->flags & TAG_UNBALANCED))
1039 										error(1, "<%s> on line %d has no matching </%s>", state.sp->tag->name, state.sp->line, state.sp->tag->name);
1040 									(*state.sp->tag->end)(state.sp->tag, NiL);
1041 								}
1042 								state.sp--;
1043 							}
1044 						}
1045 					}
1046 					if (sp)
1047 					{
1048 						if (tp->end && !(state.sp->flags & STK_NOEND))
1049 							(*tp->end)(tp, ap);
1050 						if (tp->flags & TAG_IGNORE)
1051 						{
1052 							cc = state.sp->title_cc;
1053 							lastlastc = state.sp->title_lastlastc;
1054 							op = state.sp->title_op;
1055 							tc = state.sp->title_tc;
1056 						}
1057 						state.sp--;
1058 					}
1059 				}
1060 				ap = 0;
1061 				lastc = lastlastc;
1062 				continue;
1063 			}
1064 			break;
1065 		case '=':
1066 			if (ap && !ap->value)
1067 			{
1068 				sfputc(op, 0);
1069 				ap->value = (char*)(sfstrtell(op) << PUN);
1070 				continue;
1071 			}
1072 			break;
1073 		case '"':
1074 			if (ap)
1075 			{
1076 				quote ^= STRING;
1077 				if (!(quote & COMMENT))
1078 					continue;
1079 			}
1080 			break;
1081 		case '&':
1082 			if (!item)
1083 			{
1084 				item = c;
1085 				op = state.tmp;
1086 				continue;
1087 			}
1088 			break;
1089 		case ';':
1090 			if (item == '&')
1091 			{
1092 				item = 0;
1093 				if (!(s = sfstruse(op)))
1094 					error(ERROR_SYSTEM|3, "out of space");
1095 				op = state.out;
1096 				if (*s == '#')
1097 				{
1098 					n = (int)strtol(s + 1, NiL, 10) & 0377;
1099 					cc += sfprintf(op, "\\'%02x", n);
1100 					tc++;
1101 					if (isspace(n))
1102 						lastc = ' ';
1103 				}
1104 				else if (ep = (Entity_t*)hashget(state.entities, s))
1105 				{
1106 					cc += sfputr(op, ep->value, -1);
1107 					tc++;
1108 					if (ep->flags & ENT_SPACE)
1109 						lastc = ' ';
1110 				}
1111 				else
1112 				{
1113 					error(1, "&%s;: unknown entity reference", s);
1114 					cc += sfprintf(op, "&%s;", s);
1115 					tc++;
1116 				}
1117 				continue;
1118 			}
1119 			break;
1120 		case '{':
1121 		case '}':
1122 		case '\\':
1123 			sfputc(op, '\\');
1124 			cc++;
1125 			state.sep = 0;
1126 			break;
1127 		case '\n':
1128 			error_info.line++;
1129 			if (state.pre && !item)
1130 			{
1131 				state.sep = 0;
1132 				sfputr(op, "\\line", -1);
1133 				cc += 5;
1134 				tc = 0;
1135 				break;
1136 			}
1137 			/*FALLTHROUGH*/
1138 		case ' ':
1139 		case '\t':
1140 		case '\v':
1141 			if (ap)
1142 			{
1143 				if (!quote)
1144 				{
1145 					if (lastc != ' ' && ap < &attributes[elementsof(attributes) - 1])
1146 					{
1147 						sfputc(op, 0);
1148 						ap++;
1149 						ap->name = (char*)(sfstrtell(op) << PUN);
1150 						ap->value = 0;
1151 						lastc = ' ';
1152 					}
1153 					continue;
1154 				}
1155 			}
1156 			else if (!state.pre)
1157 			{
1158 				if (lastc == ' ')
1159 					continue;
1160 				c = ' ';
1161 				if (cc >= 72)
1162 				{
1163 					cc = 0;
1164 					sfputc(op, '\n');
1165 				}
1166 			}
1167 			else if (c == ' ')
1168 			{
1169 				sfputr(op, "\\~", -1);
1170 				cc += 2;
1171 				tc++;
1172 				state.sep = 0;
1173 				continue;
1174 			}
1175 			else if (c == '\t')
1176 			{
1177 				do
1178 				{
1179 					sfputr(op, "\\~", -1);
1180 					cc += 2;
1181 					tc++;
1182 				} while (tc % 8);
1183 				state.sep = 0;
1184 				continue;
1185 			}
1186 			break;
1187 		default:
1188 			if (iscntrl(c))
1189 				continue;
1190 			if (c > 0177)
1191 			{
1192 				cc += sfprintf(op, "\\'%02x", c & 0377);
1193 				tc++;
1194 				continue;
1195 			}
1196 			break;
1197 		}
1198 		if (state.sep && op == state.out)
1199 		{
1200 			state.sep = 0;
1201 			if (c != ' ')
1202 			{
1203 				sfputc(op, ' ');
1204 				cc++;
1205 				tc++;
1206 			}
1207 		}
1208 		lastc = c;
1209 		sfputc(op, c);
1210 		cc++;
1211 		tc++;
1212 	}
1213  done:
1214 	while (state.sp > state.sp_min)
1215 	{
1216 		error(1, "<%s> on line %d has no matching </%s>", state.sp->tag->name, state.sp->line, state.sp->tag->name);
1217 		state.sp--;
1218 	}
1219 	error_info.file = 0;
1220 	error_info.line = 0;
1221 }
1222 
1223 /*
1224  * return 1 if project file must be updated
1225  */
1226 
1227 static int
project_update(const char * s,char * v,void * h)1228 project_update(const char* s, char* v, void* h)
1229 {
1230 	NoP(s);
1231 	return v == (char*)h;
1232 }
1233 
1234 /*
1235  * list project file names
1236  */
1237 
1238 static int
project_list(const char * s,char * v,void * h)1239 project_list(const char* s, char* v, void* h)
1240 {
1241 	NoP(v);
1242 	sfputr((Sfio_t*)h, s, '\n');
1243 	return 0;
1244 }
1245 
1246 /*
1247  * create/update help project file
1248  */
1249 
1250 static void
project(char * file)1251 project(char* file)
1252 {
1253 	register char*	s;
1254 	Sfio_t*		fp;
1255 
1256 	if (state.files)
1257 	{
1258 		if (fp = sfopen(NiL, file, "r"))
1259 		{
1260 			while (s = sfgetr(fp, '\n', 1))
1261 			{
1262 				if (*s == '[' && !strncasecmp(s, "[FILES]", 7))
1263 				{
1264 					while ((s = sfgetr(fp, '\n', 1)) && *s != '[')
1265 						hashput(state.files, s, &state);
1266 					if (!s)
1267 						break;
1268 				}
1269 				sfputr(state.tmp, s, '\n');
1270 			}
1271 			sfclose(fp);
1272 			if (!(s = sfstruse(state.tmp)))
1273 				error(ERROR_SYSTEM|3, "out of space");
1274 		}
1275 		else
1276 			s = "\
1277 [OPTIONS]\n\
1278 COMPRESS=TRUE\n\
1279 REPORT=ON\n\
1280 TITLE=Manual\n\
1281 ";
1282 		if (hashwalk(state.files, 0, project_update, state.files))
1283 		{
1284 			if (!(fp = sfopen(NiL, file, "w")))
1285 				error(ERROR_SYSTEM|2, "%s: cannot write", file);
1286 			else
1287 			{
1288 				sfputr(fp, s, -1);
1289 				sfputr(fp, "[FILES]", '\n');
1290 				hashwalk(state.files, 0, project_list, fp);
1291 				sfclose(fp);
1292 			}
1293 		}
1294 	}
1295 }
1296 
1297 /*
1298  * html to rtf entity reference map
1299  */
1300 
1301 static const Entity_t entities[] =
1302 {
1303 	"AElig",	"\\'c6",	0,
1304 	"Aacute",	"\\'c1",	0,
1305 	"Acirc",	"\\'c2",	0,
1306 	"Agrave",	"\\'c0",	0,
1307 	"Aring",	"\\'c5",	0,
1308 	"Atilde",	"\\'c3",	0,
1309 	"Auml",		"\\'c4",	0,
1310 	"Ccedil",	"\\'c7",	0,
1311 	"ETH",		"\\'d0",	0,
1312 	"Eacute",	"\\'c9",	0,
1313 	"Ecirc",	"\\'ca",	0,
1314 	"Egrave",	"\\'c8",	0,
1315 	"Euml",		"\\'cb",	0,
1316 	"Iacute",	"\\'cd",	0,
1317 	"Icirc",	"\\'ce",	0,
1318 	"Igrave",	"\\'cc",	0,
1319 	"Iuml",		"\\'cf",	0,
1320 	"Ntilde",	"\\'d1",	0,
1321 	"Oacute",	"\\'d3",	0,
1322 	"Ocirc",	"\\'d4",	0,
1323 	"Ograve",	"\\'d2",	0,
1324 	"Oslash",	"\\'d8",	0,
1325 	"Otilde",	"\\'d5",	0,
1326 	"Ouml",		"\\'d6",	0,
1327 	"THORN",	"\\'de",	0,
1328 	"Uacute",	"\\'da",	0,
1329 	"Ucirc",	"\\'db",	0,
1330 	"Ugrave",	"\\'d9",	0,
1331 	"Uuml",		"\\'dc",	0,
1332 	"Yacute",	"\\'dd",	0,
1333 	"aacute",	"\\'e1",	0,
1334 	"acirc",	"\\'e2",	0,
1335 	"acute",	"\\'b4",	0,
1336 	"aelig",	"\\'e6",	0,
1337 	"agrave",	"\\'e0",	0,
1338 	"amp",		"&",		0,
1339 	"aring",	"\\'e5",	0,
1340 	"atilde",	"\\'e3",	0,
1341 	"auml",		"\\'e4",	0,
1342 	"brvbar",	"\\'a6",	0,
1343 	"ccedil",	"\\'e7",	0,
1344 	"cedil",	"\\'b8",	0,
1345 	"cent",		"\\'a2",	0,
1346 	"copy",		"\\'a9",	0,
1347 	"curren",	"\\'a4",	0,
1348 	"deg",		"\\'b0",	0,
1349 	"divide",	"\\'f7",	0,
1350 	"eacute",	"\\'e9",	0,
1351 	"ecirc",	"\\'ea",	0,
1352 	"egrave",	"\\'e8",	0,
1353 	"emdash",	"\\emdash",	0,
1354 	"emspace",	"\\emspace",	ENT_SPACE,
1355 	"endash",	"\\endash",	0,
1356 	"enspace",	"\\enspace",	ENT_SPACE,
1357 	"eth",		"\\'f0",	0,
1358 	"euml",		"\\'eb",	0,
1359 	"frac12",	"\\'bd",	0,
1360 	"frac14",	"\\'bc",	0,
1361 	"frac34",	"\\'be",	0,
1362 	"gt",		">",		0,
1363 	"iacute",	"\\'ed",	0,
1364 	"icirc",	"\\'ee",	0,
1365 	"iexcl",	"\\'a1",	0,
1366 	"igrave",	"\\'ec",	0,
1367 	"iquest",	"\\'bf",	0,
1368 	"iuml",		"\\'ef",	0,
1369 	"laquo",	"\\'ab",	0,
1370 	"lt",		"<",		0,
1371 	"macr",		"\\'af",	0,
1372 	"micro",	"\\'b5",	0,
1373 	"middot",	"\\bullet",	0,
1374 	"nbsp",		"\\~",		ENT_SPACE,
1375 	"not",		"\\'ac",	0,
1376 	"ntilde",	"\\'f1",	0,
1377 	"oacute",	"\\'f3",	0,
1378 	"ocirc",	"\\'f4",	0,
1379 	"ograve",	"\\'f2",	0,
1380 	"ordf",		"\\'aa",	0,
1381 	"ordm",		"\\'ba",	0,
1382 	"oslash",	"\\'f8",	0,
1383 	"otilde",	"\\'f5",	0,
1384 	"ouml",		"\\'f6",	0,
1385 	"para",		"\\'b6",	0,
1386 	"plusmn",	"\\'b1",	0,
1387 	"pound",	"\\'a3",	0,
1388 	"quot",		"\"",		0,
1389 	"raquo",	"\\'bb",	0,
1390 	"reg",		"\\'ae",	0,
1391 	"sect",		"\\'a7",	0,
1392 	"shy",		"\\'ad",	0,
1393 	"sup1",		"\\'b9",	0,
1394 	"sup2",		"\\'b2",	0,
1395 	"sup3",		"\\'b3",	0,
1396 	"szlig",	"\\'df",	0,
1397 	"thorn",	"\\'fe",	0,
1398 	"times",	"\\'d7",	0,
1399 	"uacute",	"\\'fa",	0,
1400 	"ucirc",	"\\'fb",	0,
1401 	"ugrave",	"\\'f9",	0,
1402 	"uml",		"\\'a8",	0,
1403 	"uuml",		"\\'fc",	0,
1404 	"yacute",	"\\'fd",	0,
1405 	"yen",		"\\'a5",	0,
1406 	"yuml",		"\\'ff",	0,
1407 #if 0
1408 	"trademark",	"",		0,
1409 #endif
1410 };
1411 
1412 /*
1413  * html tag table
1414  */
1415 
1416 static const Tag_t tags[] =
1417 {
1418 	"A",		start_a,	end_a,		0,0,
1419 	"ADDRESS",	start_i,	end,		0,0,
1420 	"B",		start_b,	end,		0,0,
1421 	"BLOCKQUOTE",	start_bq,	end_bq,		0,0,
1422 	"BQ",		start_bq,	end_bq,		0,0,
1423 	"BODY",		start_body,	end,		0,0,
1424 	"BR",		start_br,	0,		0,0,
1425 	"CAPTION",	start_caption,	end,		0,0,
1426 	"CENTER",	start_center,	end_center,	0,0,
1427 	"CITE",		start_i,	end,		0,0,
1428 	"CODE",		start_tt,	end,		0,0,
1429 	"DD",		start_dd,	0,		0,0,
1430 	"DIR",		start_ul,	end_LIST,	0,0,
1431 	"DL",		start_dl,	end_LIST,	0,0,
1432 	"DT",		start_dt,	0,		0,0,
1433 	"EM",		start_i,	end,		0,0,
1434 	"FN",		start_fn,	end_fn,		0,0,
1435 	"FONT",		start_font,	end_font,	0,0,
1436 	"H1",		start_H,	end_H,		0,0,
1437 	"H2",		start_H,	end_H,		0,0,
1438 	"H3",		start_H,	end_H,		0,0,
1439 	"H4",		start_H,	end_H,		0,0,
1440 	"H5",		start_H,	end_H,		0,0,
1441 	"H6",		start_H,	end_H,		0,0,
1442 	"HEAD",		start_head,	end_head,	0,TAG_UNBALANCED,
1443 	"HR",		start_hr,	0,		0,0,
1444 	"HTML",		start_html,	end_html,	0,0,
1445 	"I",		start_i,	end,		0,0,
1446 	"IMG",		start_img,	0,		0,0,
1447 	"KBD",		start_tt,	end,		0,0,
1448 	"LI",		start_li,	0,		0,TAG_UNBALANCED,
1449 	"META",		start_meta,	0,		0,0,
1450 	"MENU",		start_ul,	end_LIST,	0,0,
1451 	"NULL",		0,		0,		0,0,
1452 	"OL",		start_ol,	end_LIST,	0,0,
1453 	"P",		start_p,	0,		0,TAG_UNBALANCED,
1454 	"PRE",		start_pre,	end_pre,	0,0,
1455 	"RENDER",	start_render,	0,		0,0,
1456 	"SAMP",		start_tt,	end,		0,0,
1457 	"STRONG",	start_b,	end,		0,0,
1458 	"SUB",		start_sub,	end_sub,	0,0,
1459 	"SUP",		start_sup,	end_sup,	0,0,
1460 	"TABLE",	start_table,	end_table,	0,0,
1461 	"TD",		start_td,	end_td,		0,0,
1462 	"TH",		start_th,	0,		0,0,
1463 	"TITLE",	start_title,	end_title,	0,TAG_IGNORE,
1464 	"TR",		start_tr,	end_tr,		0,0,
1465 	"TT",		start_tt,	end,		0,0,
1466 	"UL",		start_ul,	end_LIST,	0,0,
1467 	"UNKNOWN",	0,		0,		0,0,
1468 	"VAR",		start_var,	end,		0,0,
1469 };
1470 
1471 /*
1472  * case insensitive hash
1473  */
1474 
1475 static unsigned int
strcasehash(const char * s)1476 strcasehash(const char* s)
1477 {
1478 	register const unsigned char*	p = (const unsigned char*)s;
1479 	register unsigned int		h = 0;
1480 	register unsigned int		c;
1481 
1482 	while (c = *p++)
1483 	{
1484 		if (isupper(c))
1485 			c = tolower(c);
1486 		HASHPART(h, c);
1487 	}
1488 	return h;
1489 }
1490 
1491 /*
1492  * initialize the global data
1493  */
1494 
1495 static void
init(void)1496 init(void)
1497 {
1498 	register int	i;
1499 
1500 	if (!state.nul && !(state.nul = sfstropen()))
1501 		error(ERROR_SYSTEM|3, "out of space [nul buffer]");
1502 	if (!state.tmp && !(state.tmp = sfstropen()))
1503 		error(ERROR_SYSTEM|3, "out of space [tmp buffer]");
1504 	i = 1024;
1505 	if (!(state.sp_min = oldof(NiL, Stack_t, i, 0)))
1506 		error(ERROR_SYSTEM|3, "out of space [tag stack]");
1507 	state.sp_max = state.sp_min + i - 1;
1508 	if (!(state.entities = hashalloc(NiL, HASH_name, "entities", 0)))
1509 		error(ERROR_SYSTEM|3, "out of space [entity hash]");
1510 	if (!(state.tags = hashalloc(NiL, HASH_compare, strcasecmp, HASH_hash, strcasehash, HASH_name, "tags", 0)))
1511 		error(ERROR_SYSTEM|3, "out of space [tag hash]");
1512 	if (state.project && !(state.files = hashalloc(state.tags, HASH_set, HASH_ALLOCATE, HASH_name, "files", 0)))
1513 		error(ERROR_SYSTEM|3, "out of space [file hash]");
1514 	for (i = 0; i < elementsof(entities); i++)
1515 		if (!(hashput(state.entities, entities[i].name, &entities[i])))
1516 			error(ERROR_SYSTEM|3, "out of space [entity hash put]");
1517 	for (i = 0; i < elementsof(tags); i++)
1518 		if (!(hashput(state.tags, tags[i].name, &tags[i])))
1519 			error(ERROR_SYSTEM|3, "out of space [tag hash put]");
1520 	hashset(state.tags, HASH_ALLOCATE);
1521 }
1522 
1523 int
main(int argc,char ** argv)1524 main(int argc, char** argv)
1525 {
1526 	register int		c;
1527 	register char*		s;
1528 	register char*		t;
1529 	register char*		u;
1530 	register Sfio_t*	ip;
1531 	register Sfio_t*	op;
1532 
1533 	NoP(argc);
1534 	error_info.id = "html2rtf";
1535 	state.fontsize = FONTSIZE;
1536 	for (;;)
1537 	{
1538 		switch (optget(argv, usage))
1539 		{
1540 		case 'd':
1541 			error_info.trace = -opt_info.num;
1542 			continue;
1543 		case 'f':
1544 			state.fontsize = opt_info.num;
1545 			continue;
1546 		case 'p':
1547 			state.project = opt_info.arg;
1548 			continue;
1549 		case 'v':
1550 			state.verbose = 1;
1551 			continue;
1552 		case '?':
1553 			error(ERROR_USAGE|4, "%s", opt_info.arg);
1554 			continue;
1555 		case ':':
1556 			error(2, "%s", opt_info.arg);
1557 			continue;
1558 		}
1559 		break;
1560 	}
1561 	argv += opt_info.index;
1562 	if (error_info.errors)
1563 		error(ERROR_USAGE|4, "%s", optusage(NiL));
1564 	init();
1565 	if (!*argv)
1566 	{
1567 		if (state.project)
1568 			error(ERROR_SYSTEM|3, "%s: input files required when project file specified", state.project);
1569 		process(NiL, sfstdin, sfstdout);
1570 	}
1571 	else while (s = *argv++)
1572 	{
1573 		if (ip = sfopen(NiL, s, "r"))
1574 		{
1575 			if (state.project)
1576 			{
1577 				if (!(t = strrchr(s, '/')))
1578 					t = s;
1579 				if (u = strrchr(t, '.'))
1580 					c = u - t;
1581 				else
1582 					c = strlen(t);
1583 				sfprintf(state.tmp, "%-.*s.rtf", c, t);
1584 				if (!(u = sfstruse(state.tmp)))
1585 					error(ERROR_SYSTEM|3, "out of space");
1586 				if (!(op = sfopen(NiL, u, "w")))
1587 				{
1588 					error(ERROR_SYSTEM|2, "%s: cannot write", u);
1589 					sfclose(ip);
1590 					continue;
1591 				}
1592 				hashput(state.files, u, state.files);
1593 				while (c = *t++)
1594 					sfputc(state.tmp, isalnum(c) ? c : '.');
1595 				if (!(state.prefix = strdup(sfstruse(state.tmp))))
1596 					error(ERROR_SYSTEM|3, "out of space");
1597 			}
1598 			else
1599 			{
1600 				state.prefix = "HTML2RTF";
1601 				op = sfstdout;
1602 			}
1603 			process(s, ip, op);
1604 			sfclose(ip);
1605 			if (state.project)
1606 			{
1607 				sfclose(op);
1608 				free(state.prefix);
1609 			}
1610 		}
1611 		else error(ERROR_SYSTEM|2, "%s: cannot read", s);
1612 	}
1613 	if (state.project)
1614 		project(state.project);
1615 	exit(error_info.errors != 0);
1616 }
1617