xref: /illumos-gate/usr/src/cmd/deroff/deroff.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <assert.h>
34 #include <errno.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <locale.h>
39 #include <sys/varargs.h>
40 
41 /*
42  * Deroff command -- strip troff, eqn, and Tbl sequences from a file.
43  * Has three flags argument, -w, to cause output one word per line
44  * rather than in the original format.
45  * -mm (or -ms) causes the corresponding macro's to be interpreted
46  * so that just sentences are output
47  * -ml  also gets rid of lists.
48  * -i causes deroff to ignore .so and .nx commands.
49  * Deroff follows .so and .nx commands, removes contents of macro
50  * definitions, equations (both .EQ ... .EN and $...$),
51  * Tbl command sequences, and Troff backslash constructions.
52  *
53  * All input is through the C macro; the most recently read character
54  * is in c.
55  */
56 
57 #define	C	((c = getc(infile)) == EOF ? eof() : \
58 		    ((c == ldelim) && (filesp == files) ? skeqn() : c))
59 #define	C1	((c = getc(infile)) == EOF ? eof() : c)
60 #define	SKIP	while (C != '\n')
61 #define	SKIP_TO_COM	SKIP; SKIP; pc = c; \
62 			while ((C != '.') || (pc != '\n') || \
63 			    (C > 'Z')) { \
64 				pc = c; \
65 			}
66 
67 #define	YES 1
68 #define	NO 0
69 #define	MS 0
70 #define	MM 1
71 #define	ONE 1
72 #define	TWO 2
73 
74 #define	NOCHAR -2
75 #define	SPECIAL 0
76 #define	APOS 1
77 #define	DIGIT 2
78 #define	LETTER 3
79 
80 #define	MAXLINESZ	512
81 
82 static int wordflag = NO;
83 static int msflag = NO;
84 static int iflag = NO;
85 static int mac = MM;
86 static int disp = 0;
87 static int inmacro = NO;
88 static int intable = NO;
89 static int lindx;
90 static size_t linesize = MAXLINESZ;
91 
92 static char chars[128];  /* SPECIAL, APOS, DIGIT, or LETTER */
93 
94 static char *line = NULL;
95 
96 static char c;
97 static int pc;
98 static int ldelim	= NOCHAR;
99 static int rdelim	= NOCHAR;
100 
101 static int argc;
102 static char **argv;
103 
104 extern int optind;
105 extern char *optarg;
106 static char fname[50];
107 static FILE *files[15];
108 static FILE **filesp;
109 static FILE *infile;
110 
111 static void backsl(void);
112 static void comline(void);
113 static char *copys(char *);
114 static int eof(void);
115 static void eqn(void);
116 static void fatal(const char *, ...);
117 static void fatal_msg(char *);
118 static void getfname(void);
119 static void macro(void);
120 static FILE *opn(char *);
121 static void putmac(char *, int);
122 static void putwords(int);
123 static void regline(int, int);
124 static void sce(void);
125 static int skeqn();
126 static void sdis(char, char);
127 static void stbl(void);
128 static void tbl(void);
129 static void usage(void);
130 static void work(void);
131 
132 void
133 main(int ac, char **av)
134 {
135 	int i;
136 	int errflg = 0;
137 	int optchar;
138 
139 	(void) setlocale(LC_ALL, "");
140 #if !defined(TEXT_DOMAIN)
141 #define	TEXT_DOMAIN "SYS_TEST"
142 #endif
143 	(void) textdomain(TEXT_DOMAIN);
144 	argc = ac;
145 	argv = av;
146 	while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
147 		switch (optchar) {
148 		case 'w':
149 			wordflag = YES;
150 			break;
151 		case 'm':
152 			msflag = YES;
153 			if (*optarg == 'm')
154 				mac = MM;
155 			else if (*optarg == 's')
156 				mac = MS;
157 			else if (*optarg == 'l')
158 				disp = 1;
159 			else
160 				errflg++;
161 			break;
162 		case 'i':
163 			iflag = YES;
164 			break;
165 		case '?':
166 			errflg++;
167 		}
168 	}
169 	if (errflg)
170 		usage();
171 	if (optind == argc)
172 		infile = stdin;
173 	else
174 		infile = opn(argv[optind++]);
175 	files[0] = infile;
176 	filesp = &files[0];
177 
178 	for (i = 'a'; i <= 'z'; ++i)
179 		chars[i] = LETTER;
180 	for (i = 'A'; i <= 'Z'; ++i)
181 		chars[i] = LETTER;
182 	for (i = '0'; i <= '9'; ++i)
183 		chars[i] = DIGIT;
184 	chars['\''] = APOS;
185 	chars['&'] = APOS;
186 	work();
187 }
188 
189 
190 
191 
192 
193 
194 static int
195 skeqn()
196 {
197 	while ((c = getc(infile)) != rdelim) {
198 		if (c == EOF) {
199 			c = eof();
200 		} else if (c == '"') {
201 			while ((c = getc(infile)) != '"') {
202 				if (c == EOF) {
203 					c = eof();
204 				} else if (c == '\\') {
205 					if ((c = getc(infile)) == EOF) {
206 						c = eof();
207 					}
208 				}
209 			}
210 		}
211 	}
212 	if (msflag) {
213 		return (c = 'x');
214 	}
215 	return (c = ' ');
216 }
217 
218 
219 /* Functions calling opn() should ensure 'p' is non-null */
220 static FILE *
221 opn(char *p)
222 {
223 	FILE *fd;
224 
225 	assert(p != NULL);
226 	if ((fd = fopen(p, "r")) == NULL)
227 		fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
228 
229 	return (fd);
230 }
231 
232 
233 
234 static int
235 eof(void)
236 {
237 	if (infile != stdin)
238 		(void) fclose(infile);
239 	if (filesp > files) {
240 		infile = *--filesp;
241 	} else if (optind < argc) {
242 		infile = opn(argv[optind++]);
243 	} else {
244 		exit(0);
245 	}
246 
247 	return (C);
248 }
249 
250 
251 
252 static void
253 getfname(void)
254 {
255 	char *p;
256 	struct chain {
257 		struct chain *nextp;
258 		char *datap;
259 	};
260 	struct chain *q;
261 	static struct chain *namechain = NULL;
262 
263 	while (C == ' ')
264 		;
265 
266 	for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
267 	    (c != '\\'); ++p) {
268 		(void) C;
269 	}
270 	*p = '\0';
271 	while (c != '\n') {
272 		(void) C;
273 	}
274 
275 	/* see if this name has already been used */
276 	for (q = namechain; q; q = q->nextp)
277 		if (strcmp(fname, q->datap) != 0) {
278 			fname[0] = '\0';
279 			return;
280 		}
281 
282 	q = (struct chain *)calloc(1, sizeof (*namechain));
283 	q->nextp = namechain;
284 	q->datap = copys(fname);
285 	namechain = q;
286 }
287 
288 
289 /*
290  * Functions calling fatal() should ensure 'format' and
291  * arguments are non-null.
292  */
293 static void
294 fatal(const char *format, ...)
295 {
296 	va_list	alist;
297 
298 	assert(format != NULL);
299 	(void) fputs(gettext("deroff: "), stderr);
300 	va_start(alist, format);
301 	(void) vfprintf(stderr, format, alist);
302 	exit(1);
303 }
304 
305 /* Functions calling fatal_msg() should ensure 's' is non-null */
306 static void
307 fatal_msg(char *s)
308 {
309 	assert(s != NULL);
310 	(void) fprintf(stderr, gettext("deroff: %s\n"), s);
311 	exit(1);
312 }
313 
314 static void
315 usage(void)
316 {
317 	(void) fputs(gettext(
318 	    "usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
319 	    "[ file ] ... \n"), stderr);
320 	exit(1);
321 }
322 
323 static void
324 work(void)
325 {
326 
327 	for (;;) {
328 		if ((C == '.') || (c == '\''))
329 			comline();
330 		else
331 			regline(NO, TWO);
332 	}
333 }
334 
335 
336 static void
337 regline(int macline, int cnst)
338 {
339 
340 	if (line == NULL) {
341 		if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
342 			fatal_msg(gettext("Cannot allocate memory"));
343 		}
344 	}
345 
346 	lindx = 0;
347 	line[lindx] = c;
348 	for (;;) {
349 		if (c == '\\') {
350 			line[lindx] = ' ';
351 			backsl();
352 			if (c == '%') {	/* no blank for hyphenation char */
353 				lindx--;
354 			}
355 		}
356 		if (c == '\n') {
357 			break;
358 		}
359 		/*
360 		 * We're just about to add another character to the line
361 		 * buffer so ensure we don't overrun it.
362 		 */
363 		if (++lindx >= linesize - 1) {
364 			linesize = linesize * 2;
365 			if ((line = (char *)realloc(line,
366 			    linesize * sizeof (char))) == NULL) {
367 				fatal_msg(gettext("Cannot allocate memory"));
368 			}
369 		}
370 		if (intable && (c == 'T')) {
371 			line[lindx] = C;
372 			if ((c == '{') || (c == '}')) {
373 				line[lindx - 1] = ' ';
374 				line[lindx] = C;
375 			}
376 		} else {
377 			line[lindx] = C;
378 		}
379 	}
380 
381 	line[lindx] = '\0';
382 
383 	if (line[0] != '\0') {
384 		if (wordflag) {
385 			putwords(macline);
386 		} else if (macline) {
387 			putmac(line, cnst);
388 		} else {
389 			(void) puts(line);
390 		}
391 	}
392 }
393 
394 
395 
396 
397 static void
398 putmac(char *s, int cnst)
399 {
400 	char *t;
401 
402 	while (*s) {
403 		while ((*s == ' ') || (*s == '\t')) {
404 			(void) putchar(*s++);
405 		}
406 		for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
407 			;
408 		if (*s == '\"')
409 			s++;
410 		if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
411 		    (chars[s[1]] == LETTER)) {
412 			while (s < t) {
413 				if (*s == '\"')
414 					s++;
415 				else
416 					(void) putchar(*s++);
417 			}
418 		} else {
419 			s = t;
420 		}
421 	}
422 	(void) putchar('\n');
423 }
424 
425 
426 
427 static void
428 putwords(int macline)	/* break into words for -w option */
429 {
430 	char *p, *p1;
431 	int i, nlet;
432 
433 	for (p1 = line; ; ) {
434 		/* skip initial specials ampersands and apostrophes */
435 		while (chars[*p1] < DIGIT) {
436 			if (*p1++ == '\0')
437 				return;
438 		}
439 		nlet = 0;
440 		for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
441 			if (i == LETTER)
442 				++nlet;
443 		}
444 
445 		if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
446 		    (macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
447 		    (chars[p1[1]] == LETTER))) {
448 			/* delete trailing ampersands and apostrophes */
449 			while ((p[-1] == '\'') || (p[-1] == '&')) {
450 				--p;
451 			}
452 			while (p1 < p) {
453 				(void) putchar(*p1++);
454 			}
455 			(void) putchar('\n');
456 		} else {
457 			p1 = p;
458 		}
459 	}
460 }
461 
462 
463 
464 static void
465 comline(void)
466 {
467 	int c1, c2;
468 
469 com:
470 	while ((C == ' ') || (c == '\t'))
471 		;
472 comx:
473 	if ((c1 = c) == '\n')
474 		return;
475 	c2 = C;
476 	if ((c1 == '.') && (c2 != '.'))
477 		inmacro = NO;
478 	if (c2 == '\n')
479 		return;
480 
481 	if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
482 		eqn();
483 	} else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
484 	    (c2 == '&')) && (filesp == files)) {
485 		if (msflag) {
486 			stbl();
487 		} else {
488 			tbl();
489 		}
490 	} else if ((c1 == 'T') && (c2 == 'E')) {
491 		intable = NO;
492 	} else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
493 		macro();
494 	} else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
495 		macro();
496 	} else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
497 		macro();
498 	} else if ((c1 == 's') && (c2 == 'o')) {
499 		if (iflag) {
500 			SKIP;
501 		} else {
502 			getfname();
503 			if (fname[0]) {
504 				infile = *++filesp = opn(fname);
505 			}
506 		}
507 	} else if ((c1 == 'n') && (c2 == 'x')) {
508 		if (iflag) {
509 			SKIP;
510 		} else {
511 			getfname();
512 			if (fname[0] == '\0') {
513 				exit(0);
514 			}
515 			if (infile != stdin) {
516 				(void) fclose(infile);
517 			}
518 			infile = *filesp = opn(fname);
519 		}
520 	} else if ((c1 == 'h') && (c2 == 'w')) {
521 		SKIP;
522 	} else if (msflag && (c1 == 'T') && (c2 == 'L')) {
523 		SKIP_TO_COM;
524 		goto comx;
525 	} else if (msflag && (c1 == 'N') && (c2 == 'R')) {
526 		SKIP;
527 	} else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
528 		if (mac == MM) {
529 			SKIP;
530 		} else {
531 			SKIP_TO_COM;
532 			goto comx;
533 		}
534 	} else if (msflag && (c1 == 'F') && (c2 == 'S')) {
535 		SKIP_TO_COM;
536 		goto comx;
537 	} else if (msflag && (c1 == 'S') && (c2 == 'H')) {
538 		SKIP_TO_COM;
539 		goto comx;
540 	} else if (msflag && (c1 == 'N') && (c2 == 'H')) {
541 		SKIP_TO_COM;
542 		goto comx;
543 	} else if (msflag && (c1 == 'O') && (c2 == 'K')) {
544 		SKIP_TO_COM;
545 		goto comx;
546 	} else if (msflag && (c1 == 'N') && (c2 == 'D')) {
547 		SKIP;
548 	} else if (msflag && (mac == MM) && (c1 == 'H') &&
549 	    ((c2 == ' ') || (c2 == 'U'))) {
550 		SKIP;
551 	} else if (msflag && (mac == MM) && (c2 == 'L')) {
552 		if (disp || (c1 == 'R')) {
553 			sdis('L', 'E');
554 		} else {
555 			SKIP;
556 			(void) putchar('.');
557 		}
558 	} else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
559 	    (c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
560 		sdis(c1, 'E');		/* removed RS-RE */
561 	} else if (msflag && (c1 == 'K' && c2 == 'F')) {
562 		sdis(c1, 'E');
563 	} else if (msflag && (c1 == 'n') && (c2 == 'f')) {
564 		sdis('f', 'i');
565 	} else if (msflag && (c1 == 'c') && (c2 == 'e')) {
566 		sce();
567 	} else {
568 		if ((c1 == '.') && (c2 == '.')) {
569 			while (C == '.')
570 				;
571 		}
572 		++inmacro;
573 		if ((c1 <= 'Z') && msflag) {
574 			regline(YES, ONE);
575 		} else {
576 			regline(YES, TWO);
577 		}
578 		--inmacro;
579 	}
580 }
581 
582 
583 
584 static void
585 macro(void)
586 {
587 	if (msflag) {
588 		/* look for  .. */
589 		do {
590 			SKIP;
591 		} while ((C != '.') || (C != '.') || (C == '.'));
592 		if (c != '\n') {
593 			SKIP;
594 		}
595 		return;
596 	}
597 	SKIP;
598 	inmacro = YES;
599 }
600 
601 
602 
603 
604 static void
605 sdis(char a1, char a2)
606 {
607 	int c1, c2;
608 	int eqnf;
609 	int notdone = 1;
610 	eqnf = 1;
611 	SKIP;
612 	while (notdone) {
613 		while (C != '.')
614 			SKIP;
615 		if ((c1 = C) == '\n')
616 			continue;
617 		if ((c2 = C) == '\n')
618 			continue;
619 		if ((c1 == a1) && (c2 == a2)) {
620 			SKIP;
621 			if (eqnf)
622 				(void) putchar('.');
623 			(void) putchar('\n');
624 			return;
625 		} else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
626 			eqn();
627 			eqnf = 0;
628 		} else {
629 			SKIP;
630 		}
631 	}
632 }
633 
634 static void
635 tbl(void)
636 {
637 	while (C != '.')
638 		;
639 	SKIP;
640 	intable = YES;
641 }
642 
643 static void
644 stbl(void)
645 {
646 	while (C != '.')
647 		;
648 	SKIP_TO_COM;
649 	if ((c != 'T') || (C != 'E')) {
650 		SKIP;
651 		pc = c;
652 		while ((C != '.') || (pc != '\n') ||
653 		    (C != 'T') || (C != 'E')) {
654 			pc = c;
655 		}
656 	}
657 }
658 
659 static void
660 eqn(void)
661 {
662 	int c1, c2;
663 	int dflg;
664 	int last;
665 
666 	last = 0;
667 	dflg = 1;
668 	SKIP;
669 
670 	for (;;) {
671 		if ((C1 == '.') || (c == '\'')) {
672 			while ((C1 == ' ') || (c == '\t'))
673 				;
674 			if ((c == 'E') && (C1 == 'N')) {
675 				SKIP;
676 				if (msflag && dflg) {
677 					(void) putchar('x');
678 					(void) putchar(' ');
679 					if (last) {
680 						(void) putchar('.');
681 						(void) putchar(' ');
682 					}
683 				}
684 				return;
685 			}
686 		} else if (c == 'd') {	/* look for delim */
687 			if ((C1 == 'e') && (C1 == 'l')) {
688 				if ((C1 == 'i') && (C1 == 'm')) {
689 					while (C1 == ' ')
690 						;
691 					if (((c1 = c) == '\n') ||
692 					    ((c2 = C1) == '\n') ||
693 					    ((c1 == 'o') && (c2 == 'f') &&
694 					    (C1 == 'f'))) {
695 						ldelim = NOCHAR;
696 						rdelim = NOCHAR;
697 					} else {
698 						ldelim = c1;
699 						rdelim = c2;
700 					}
701 				}
702 				dflg = 0;
703 			}
704 		}
705 
706 		if (c != '\n') {
707 			while (C1 != '\n') {
708 				if (c == '.') {
709 					last = 1;
710 				} else {
711 					last = 0;
712 				}
713 			}
714 		}
715 	}
716 }
717 
718 
719 
720 static void
721 backsl(void)	/* skip over a complete backslash construction */
722 {
723 	int bdelim;
724 
725 sw:	switch (C) {
726 	case '"':
727 		SKIP;
728 		return;
729 	case 's':
730 		if (C == '\\') {
731 			backsl();
732 		} else {
733 			while ((C >= '0') && (c <= '9'))
734 				;
735 			(void) ungetc(c, infile);
736 			c = '0';
737 		}
738 		lindx--;
739 		return;
740 
741 	case 'f':
742 	case 'n':
743 	case '*':
744 		if (C != '(')
745 			return;
746 		/* FALLTHROUGH */
747 
748 	case '(':
749 		if (C != '\n') {
750 			(void) C;
751 		}
752 		return;
753 
754 	case '$':
755 		(void) C;	/* discard argument number */
756 		return;
757 
758 	case 'b':
759 	case 'x':
760 	case 'v':
761 	case 'h':
762 	case 'w':
763 	case 'o':
764 	case 'l':
765 	case 'L':
766 		if ((bdelim = C) == '\n')
767 			return;
768 		while ((C != '\n') && (c != bdelim))
769 			if (c == '\\')
770 				backsl();
771 		return;
772 
773 	case '\\':
774 		if (inmacro)
775 			goto sw;
776 	default:
777 		return;
778 	}
779 }
780 
781 
782 
783 
784 static char *
785 copys(char *s)
786 {
787 	char *t, *t0;
788 
789 	if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
790 		fatal_msg(gettext("Cannot allocate memory"));
791 
792 	while (*t++ = *s++)
793 		;
794 	return (t0);
795 }
796 
797 static void
798 sce(void)
799 {
800 	char *ap;
801 	int n, i;
802 	char a[10];
803 
804 	for (ap = a; C != '\n'; ap++) {
805 		*ap = c;
806 		if (ap == &a[9]) {
807 			SKIP;
808 			ap = a;
809 			break;
810 		}
811 	}
812 	if (ap != a) {
813 		n = atoi(a);
814 	} else {
815 		n = 1;
816 	}
817 	for (i = 0; i < n; ) {
818 		if (C == '.') {
819 			if (C == 'c') {
820 				if (C == 'e') {
821 					while (C == ' ')
822 						;
823 					if (c == '0') {
824 						break;
825 					} else {
826 						SKIP;
827 					}
828 				} else {
829 					SKIP;
830 				}
831 			} else {
832 				SKIP;
833 			}
834 		} else {
835 			SKIP;
836 			i++;
837 		}
838 	}
839 }
840