xref: /openbsd/usr.bin/deroff/deroff.c (revision d89ec533)
1 /*	$OpenBSD: deroff.c,v 1.15 2016/09/04 15:29:21 tb Exp $	*/
2 
3 /*-
4  * Copyright (c) 1988, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Copyright (C) Caldera International Inc.  2001-2002.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code and documentation must retain the above
39  *    copyright notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed or owned by Caldera
46  *	International, Inc.
47  * 4. Neither the name of Caldera International, Inc. nor the names of other
48  *    contributors may be used to endorse or promote products derived from
49  *    this software without specific prior written permission.
50  *
51  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
52  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
53  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
56  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
57  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
58  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
60  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
61  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 #include <err.h>
66 #include <limits.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <unistd.h>
71 
72 /*
73  *	Deroff command -- strip troff, eqn, and Tbl sequences from
74  *	a file.  Has two flags argument, -w, to cause output one word per line
75  *	rather than in the original format.
76  *	-mm (or -ms) causes the corresponding macro's to be interpreted
77  *	so that just sentences are output
78  *	-ml  also gets rid of lists.
79  *	Deroff follows .so and .nx commands, removes contents of macro
80  *	definitions, equations (both .EQ ... .EN and $...$),
81  *	Tbl command sequences, and Troff backslash constructions.
82  *
83  *	All input is through the Cget macro;
84  *	the most recently read character is in c.
85  *
86  *	Modified by Robert Henry to process -me and -man macros.
87  */
88 
89 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
90 #define C1get ( (c=getc(infile)) == EOF ? eof() :  c)
91 
92 #ifdef DEBUG
93 #  define C	_C()
94 #  define C1	_C1()
95 #else /* not DEBUG */
96 #  define C	Cget
97 #  define C1	C1get
98 #endif /* not DEBUG */
99 
100 #define SKIP while (C != '\n')
101 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
102 
103 #define	YES 1
104 #define	NO 0
105 #define	MS 0	/* -ms */
106 #define	MM 1	/* -mm */
107 #define	ME 2	/* -me */
108 #define	MA 3	/* -man */
109 
110 #ifdef DEBUG
111 char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
112 #endif /* DEBUG */
113 
114 #define	ONE 1
115 #define	TWO 2
116 
117 #define NOCHAR -2
118 #define SPECIAL 0
119 #define APOS 1
120 #define PUNCT 2
121 #define DIGIT 3
122 #define LETTER 4
123 
124 #define MAXFILES 20
125 
126 int	iflag;
127 int	wordflag;
128 int	msflag;		/* processing a source written using a mac package */
129 int	mac;		/* which package */
130 int	disp;
131 int	parag;
132 int	inmacro;
133 int	intable;
134 int	keepblock;	/* keep blocks of text; normally false when msflag */
135 
136 char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
137 
138 char line[LINE_MAX];
139 char *lp;
140 
141 int c;
142 int pc;
143 int ldelim;
144 int rdelim;
145 
146 char fname[PATH_MAX];
147 FILE *files[MAXFILES];
148 FILE **filesp;
149 FILE *infile;
150 
151 int argc;
152 char **argv;
153 
154 /*
155  *	Macro processing
156  *
157  *	Macro table definitions
158  */
159 typedef	int pacmac;		/* compressed macro name */
160 int	argconcat = 0;		/* concat arguments together (-me only) */
161 
162 #define	tomac(c1, c2)		((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
163 #define	frommac(src, c1, c2)	(((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
164 
165 struct mactab{
166 	int	condition;
167 	pacmac	macname;
168 	int	(*func)();	/* XXX - args */
169 };
170 
171 struct	mactab	troffmactab[];
172 struct	mactab	ppmactab[];
173 struct	mactab	msmactab[];
174 struct	mactab	mmmactab[];
175 struct	mactab	memactab[];
176 struct	mactab	manmactab[];
177 
178 /*
179  *	Macro table initialization
180  */
181 #define	M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
182 
183 /*
184  *	Flags for matching conditions other than
185  *	the macro name
186  */
187 #define	NONE		0
188 #define	FNEST		1		/* no nested files */
189 #define	NOMAC		2		/* no macro */
190 #define	MAC		3		/* macro */
191 #define	PARAG		4		/* in a paragraph */
192 #define	MSF		5		/* msflag is on */
193 #define	NBLK		6		/* set if no blocks to be kept */
194 
195 /*
196  *	Return codes from macro minions, determine where to jump,
197  *	how to repeat/reprocess text
198  */
199 #define	COMX		1		/* goto comx */
200 #define	COM		2		/* goto com */
201 
202 int	 skeqn(void);
203 int	 eof(void);
204 int	 _C1(void);
205 int	 _C(void);
206 int	 EQ(void);
207 int	 domacro(void);
208 int	 PS(void);
209 int	 skip(void);
210 int	 intbl(void);
211 int	 outtbl(void);
212 int	 so(void);
213 int	 nx(void);
214 int	 skiptocom(void);
215 int	 PP(pacmac);
216 int	 AU(void);
217 int	 SH(pacmac);
218 int	 UX(void);
219 int	 MMHU(pacmac);
220 int	 mesnblock(pacmac);
221 int	 mssnblock(pacmac);
222 int	 nf(void);
223 int	 ce(void);
224 int	 meip(pacmac);
225 int	 mepp(pacmac);
226 int	 mesh(pacmac);
227 int	 mefont(pacmac);
228 int	 manfont(pacmac);
229 int	 manpp(pacmac);
230 int	 macsort(const void *, const void *);
231 int	 sizetab(struct mactab *);
232 void	 getfname(void);
233 void	 textline(char *, int);
234 void	 work(void);
235 void	 regline(void (*)(char *, int), int);
236 void	 macro(void);
237 void	 tbl(void);
238 void	 stbl(void);
239 void	 eqn(void);
240 void	 backsl(void);
241 void	 sce(void);
242 void	 refer(int);
243 void	 inpic(void);
244 void	 msputmac(char *, int);
245 void	 msputwords(int);
246 void	 meputmac(char *, int);
247 void	 meputwords(int);
248 void	 noblock(char, char);
249 void	 defcomline(pacmac);
250 void	 comline(void);
251 void	 buildtab(struct mactab **, int *);
252 FILE	*opn(char *);
253 struct mactab *macfill(struct mactab *, struct mactab *);
254 __dead void usage(void);
255 
256 int
257 main(int ac, char **av)
258 {
259 	int	i, ch;
260 	int	errflg = 0;
261 	int	kflag = NO;
262 
263 	if (pledge("stdio rpath", NULL) == -1)
264 		err(1, "pledge");
265 
266 	iflag = NO;
267 	wordflag = NO;
268 	msflag = NO;
269 	mac = ME;
270 	disp = NO;
271 	parag = NO;
272 	inmacro = NO;
273 	intable = NO;
274 	ldelim	= NOCHAR;
275 	rdelim	= NOCHAR;
276 	keepblock = YES;
277 
278 	while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
279 		switch (ch) {
280 		case 'i':
281 			iflag = YES;
282 			break;
283 		case 'k':
284 			kflag = YES;
285 			break;
286 		case 'm':
287 			msflag = YES;
288 			keepblock = NO;
289 			switch (optarg[0]) {
290 			case 'm':
291 				mac = MM;
292 				break;
293 			case 's':
294 				mac = MS;
295 				break;
296 			case 'e':
297 				mac = ME;
298 				break;
299 			case 'a':
300 				mac = MA;
301 				break;
302 			case 'l':
303 				disp = YES;
304 				break;
305 			default:
306 				errflg = 1;
307 				break;
308 			}
309 			if (optarg[1] != '\0')
310 				errflg = 1;
311 			break;
312 		case 'p':
313 			parag = YES;
314 			break;
315 		case 'w':
316 			wordflag = YES;
317 			kflag = YES;
318 			break;
319 		default:
320 			errflg = 1;
321 		}
322 	}
323 	argc = ac - optind;
324 	argv = av + optind;
325 
326 	if (kflag)
327 		keepblock = YES;
328 	if (errflg)
329 		usage();
330 
331 #ifdef DEBUG
332 	printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
333 		msflag, mactab[mac], keepblock, disp);
334 #endif /* DEBUG */
335 	if (argc == 0) {
336 		infile = stdin;
337 	} else {
338 		infile = opn(argv[0]);
339 		--argc;
340 		++argv;
341 	}
342 	files[0] = infile;
343 	filesp = &files[0];
344 
345 	for (i = 'a'; i <= 'z'; ++i)
346 		chars[i] = LETTER;
347 	for (i = 'A'; i <= 'Z'; ++i)
348 		chars[i] = LETTER;
349 	for (i = '0'; i <= '9'; ++i)
350 		chars[i] = DIGIT;
351 	chars['\''] = APOS;
352 	chars['&'] = APOS;
353 	chars['.'] = PUNCT;
354 	chars[','] = PUNCT;
355 	chars[';'] = PUNCT;
356 	chars['?'] = PUNCT;
357 	chars[':'] = PUNCT;
358 	work();
359 	exit(0);
360 }
361 
362 int
363 skeqn(void)
364 {
365 
366 	while ((c = getc(infile)) != rdelim) {
367 		if (c == EOF)
368 			c = eof();
369 		else if (c == '"') {
370 			while ((c = getc(infile)) != '"') {
371 				if (c == EOF ||
372 				    (c == '\\' && (c = getc(infile)) == EOF))
373 					c = eof();
374 			}
375 		}
376 	}
377 	if (msflag)
378 		return((c = 'x'));
379 	return((c = ' '));
380 }
381 
382 FILE *
383 opn(char *p)
384 {
385 	FILE *fd;
386 
387 	if ((fd = fopen(p, "r")) == NULL)
388 		err(1, "fopen %s", p);
389 
390 	return(fd);
391 }
392 
393 int
394 eof(void)
395 {
396 
397 	if (infile != stdin)
398 		fclose(infile);
399 	if (filesp > files)
400 		infile = *--filesp;
401 	else if (argc > 0) {
402 		infile = opn(argv[0]);
403 		--argc;
404 		++argv;
405 	} else
406 		exit(0);
407 	return(C);
408 }
409 
410 void
411 getfname(void)
412 {
413 	char *p;
414 	struct chain {
415 		struct chain *nextp;
416 		char *datap;
417 	} *q;
418 	static struct chain *namechain= NULL;
419 
420 	while (C == ' ')
421 		;	/* nothing */
422 
423 	for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' &&
424 	    c != ' ' && c != '\t' && c != '\\'; ++p)
425 		C;
426 	*p = '\0';
427 	while (c != '\n')
428 		C;
429 
430 	/* see if this name has already been used */
431 	for (q = namechain ; q; q = q->nextp)
432 		if (strcmp(fname, q->datap) == 0) {
433 			fname[0] = '\0';
434 			return;
435 		}
436 
437 	q = malloc(sizeof(struct chain));
438 	if (q == NULL)
439 		err(1, NULL);
440 	q->nextp = namechain;
441 	q->datap = strdup(fname);
442 	if (q->datap == NULL)
443 		err(1, NULL);
444 	namechain = q;
445 }
446 
447 /*ARGSUSED*/
448 void
449 textline(char *str, int constant)
450 {
451 
452 	if (wordflag) {
453 		msputwords(0);
454 		return;
455 	}
456 	puts(str);
457 }
458 
459 void
460 work(void)
461 {
462 
463 	for (;;) {
464 		C;
465 #ifdef FULLDEBUG
466 		printf("Starting work with `%c'\n", c);
467 #endif /* FULLDEBUG */
468 		if (c == '.' || c == '\'')
469 			comline();
470 		else
471 			regline(textline, TWO);
472 	}
473 }
474 
475 void
476 regline(void (*pfunc)(char *, int), int constant)
477 {
478 
479 	line[0] = c;
480 	lp = line;
481 	while (lp - line < sizeof(line)) {
482 		if (c == '\\') {
483 			*lp = ' ';
484 			backsl();
485 		}
486 		if (c == '\n')
487 			break;
488 		if (intable && c == 'T') {
489 			*++lp = C;
490 			if (c == '{' || c == '}') {
491 				lp[-1] = ' ';
492 				*lp = C;
493 			}
494 		} else {
495 			*++lp = C;
496 		}
497 	}
498 	*lp = '\0';
499 
500 	if (line[0] != '\0')
501 		(*pfunc)(line, constant);
502 }
503 
504 void
505 macro(void)
506 {
507 
508 	if (msflag) {
509 		do {
510 			SKIP;
511 		} while (C!='.' || C!='.' || C=='.');	/* look for  .. */
512 		if (c != '\n')
513 			SKIP;
514 		return;
515 	}
516 	SKIP;
517 	inmacro = YES;
518 }
519 
520 void
521 tbl(void)
522 {
523 
524 	while (C != '.')
525 		;	/* nothing */
526 	SKIP;
527 	intable = YES;
528 }
529 
530 void
531 stbl(void)
532 {
533 
534 	while (C != '.')
535 		;	/* nothing */
536 	SKIP_TO_COM;
537 	if (c != 'T' || C != 'E') {
538 		SKIP;
539 		pc = c;
540 		while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
541 			pc = c;
542 	}
543 }
544 
545 void
546 eqn(void)
547 {
548 	int c1, c2;
549 	int dflg;
550 	char last;
551 
552 	last=0;
553 	dflg = 1;
554 	SKIP;
555 
556 	for (;;) {
557 		if (C1 == '.'  || c == '\'') {
558 			while (C1 == ' ' || c == '\t')
559 				;
560 			if (c == 'E' && C1 == 'N') {
561 				SKIP;
562 				if (msflag && dflg) {
563 					putchar('x');
564 					putchar(' ');
565 					if (last) {
566 						putchar(last);
567 						putchar('\n');
568 					}
569 				}
570 				return;
571 			}
572 		} else if (c == 'd') {
573 			/* look for delim */
574 			if (C1 == 'e' && C1 == 'l')
575 				if (C1 == 'i' && C1 == 'm') {
576 					while (C1 == ' ')
577 						;	/* nothing */
578 
579 					if ((c1 = c) == '\n' ||
580 					    (c2 = C1) == '\n' ||
581 					    (c1 == 'o' && c2 == 'f' && C1=='f')) {
582 						ldelim = NOCHAR;
583 						rdelim = NOCHAR;
584 					} else {
585 						ldelim = c1;
586 						rdelim = c2;
587 					}
588 				}
589 			dflg = 0;
590 		}
591 
592 		if (c != '\n')
593 			while (C1 != '\n') {
594 				if (chars[c] == PUNCT)
595 					last = c;
596 				else if (c != ' ')
597 					last = 0;
598 			}
599 	}
600 }
601 
602 /* skip over a complete backslash construction */
603 void
604 backsl(void)
605 {
606 	int bdelim;
607 
608 sw:
609 	switch (C) {
610 	case '"':
611 		SKIP;
612 		return;
613 
614 	case 's':
615 		if (C == '\\')
616 			backsl();
617 		else {
618 			while (C >= '0' && c <= '9')
619 				;	/* nothing */
620 			ungetc(c, infile);
621 			c = '0';
622 		}
623 		--lp;
624 		return;
625 
626 	case 'f':
627 	case 'n':
628 	case '*':
629 		if (C != '(')
630 			return;
631 
632 	case '(':
633 		if (msflag) {
634 			if (C == 'e') {
635 				if (C == 'm') {
636 					*lp = '-';
637 					return;
638 				}
639 			}
640 			else if (c != '\n')
641 				C;
642 			return;
643 		}
644 		if (C != '\n')
645 			C;
646 		return;
647 
648 	case '$':
649 		C;	/* discard argument number */
650 		return;
651 
652 	case 'b':
653 	case 'x':
654 	case 'v':
655 	case 'h':
656 	case 'w':
657 	case 'o':
658 	case 'l':
659 	case 'L':
660 		if ((bdelim = C) == '\n')
661 			return;
662 		while (C != '\n' && c != bdelim)
663 			if (c == '\\')
664 				backsl();
665 		return;
666 
667 	case '\\':
668 		if (inmacro)
669 			goto sw;
670 
671 	default:
672 		return;
673 	}
674 }
675 
676 void
677 sce(void)
678 {
679 	char *ap;
680 	int n, i;
681 	char a[10];
682 
683 	for (ap = a; C != '\n'; ap++) {
684 		*ap = c;
685 		if (ap == &a[9]) {
686 			SKIP;
687 			ap = a;
688 			break;
689 		}
690 	}
691 	if (ap != a)
692 		n = atoi(a);
693 	else
694 		n = 1;
695 	for (i = 0; i < n;) {
696 		if (C == '.') {
697 			if (C == 'c') {
698 				if (C == 'e') {
699 					while (C == ' ')
700 						;	/* nothing */
701 					if (c == '0') {
702 						SKIP;
703 						break;
704 					} else
705 						SKIP;
706 				}
707 				else
708 					SKIP;
709 			} else if (c == 'P' || C == 'P') {
710 				if (c != '\n')
711 					SKIP;
712 				break;
713 			} else if (c != '\n')
714 				SKIP;
715 		} else {
716 			SKIP;
717 			i++;
718 		}
719 	}
720 }
721 
722 void
723 refer(int c1)
724 {
725 	int c2;
726 
727 	if (c1 != '\n')
728 		SKIP;
729 
730 	for (c2 = -1;;) {
731 		if (C != '.')
732 			SKIP;
733 		else {
734 			if (C != ']')
735 				SKIP;
736 			else {
737 				while (C != '\n')
738 					c2 = c;
739 				if (c2 != -1 && chars[c2] == PUNCT)
740 					putchar(c2);
741 				return;
742 			}
743 		}
744 	}
745 }
746 
747 void
748 inpic(void)
749 {
750 	int c1;
751 	char *p1, *ep;
752 
753 	SKIP;
754 	p1 = line;
755 	ep = line + sizeof(line) - 1;
756 	c = '\n';
757 	for (;;) {
758 		c1 = c;
759 		if (C == '.' && c1 == '\n') {
760 			if (C != 'P') {
761 				if (c == '\n')
762 					continue;
763 				else {
764 					SKIP;
765 					c = '\n';
766 					continue;
767 				}
768 			}
769 			if (C != 'E') {
770 				if (c == '\n')
771 					continue;
772 				else {
773 					SKIP;
774 					c = '\n';
775 					continue;
776 				}
777 			}
778 			SKIP;
779 			return;
780 		}
781 		else if (c == '\"') {
782 			while (C != '\"') {
783 				if (c == '\\') {
784 					if (C == '\"')
785 						continue;
786 					ungetc(c, infile);
787 					backsl();
788 				} else if (p1 + 1 >= ep) {
789 					errx(1, ".PS length exceeds limit");
790 				} else {
791 					*p1++ = c;
792 				}
793 			}
794 			*p1++ = ' ';
795 		}
796 		else if (c == '\n' && p1 != line) {
797 			*p1 = '\0';
798 			if (wordflag)
799 				msputwords(NO);
800 			else {
801 				puts(line);
802 				putchar('\n');
803 			}
804 			p1 = line;
805 		}
806 	}
807 }
808 
809 #ifdef DEBUG
810 int
811 _C1(void)
812 {
813 
814 	return(C1get);
815 }
816 
817 int
818 _C(void)
819 {
820 
821 	return(Cget);
822 }
823 #endif /* DEBUG */
824 
825 /*
826  *	Put out a macro line, using ms and mm conventions.
827  */
828 void
829 msputmac(char *s, int constant)
830 {
831 	char *t;
832 	int found;
833 	int last;
834 
835 	last = 0;
836 	found = 0;
837 	if (wordflag) {
838 		msputwords(YES);
839 		return;
840 	}
841 	while (*s) {
842 		while (*s == ' ' || *s == '\t')
843 			putchar(*s++);
844 		for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
845 			;	/* nothing */
846 		if (*s == '\"')
847 			s++;
848 		if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
849 		    chars[(unsigned char)s[1]] == LETTER) {
850 			while (s < t)
851 				if (*s == '\"')
852 					s++;
853 				else
854 					putchar(*s++);
855 			last = *(t-1);
856 			found++;
857 		} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
858 		    s[1] == '\0') {
859 			putchar(*s++);
860 		} else {
861 			last = *(t - 1);
862 			s = t;
863 		}
864 	}
865 	putchar('\n');
866 	if (msflag && chars[last] == PUNCT) {
867 		putchar(last);
868 		putchar('\n');
869 	}
870 }
871 
872 /*
873  *	put out words (for the -w option) with ms and mm conventions
874  */
875 void
876 msputwords(int macline)
877 {
878 	char *p, *p1;
879 	int i, nlet;
880 
881 	for (p1 = line;;) {
882 		/*
883 		 *	skip initial specials ampersands and apostrophes
884 		 */
885 		while (chars[(unsigned char)*p1] < DIGIT)
886 			if (*p1++ == '\0')
887 				return;
888 		nlet = 0;
889 		for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
890 			if (i == LETTER)
891 				++nlet;
892 
893 		if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
894 			/*
895 			 *	delete trailing ampersands and apostrophes
896 			 */
897 			while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
898 			    i == APOS )
899 				--p;
900 			while (p1 < p)
901 				putchar(*p1++);
902 			putchar('\n');
903 		} else {
904 			p1 = p;
905 		}
906 	}
907 }
908 
909 /*
910  *	put out a macro using the me conventions
911  */
912 #define SKIPBLANK(cp)	while (*cp == ' ' || *cp == '\t') { cp++; }
913 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
914 
915 void
916 meputmac(char *cp, int constant)
917 {
918 	char	*np;
919 	int	found;
920 	int	argno;
921 	int	last;
922 	int	inquote;
923 
924 	last = 0;
925 	found = 0;
926 	if (wordflag) {
927 		meputwords(YES);
928 		return;
929 	}
930 	for (argno = 0; *cp; argno++) {
931 		SKIPBLANK(cp);
932 		inquote = (*cp == '"');
933 		if (inquote)
934 			cp++;
935 		for (np = cp; *np; np++) {
936 			switch (*np) {
937 			case '\n':
938 			case '\0':
939 				break;
940 
941 			case '\t':
942 			case ' ':
943 				if (inquote)
944 					continue;
945 				else
946 					goto endarg;
947 
948 			case '"':
949 				if (inquote && np[1] == '"') {
950 					memmove(np, np + 1, strlen(np));
951 					np++;
952 					continue;
953 				} else {
954 					*np = ' ';	/* bye bye " */
955 					goto endarg;
956 				}
957 
958 			default:
959 				continue;
960 			}
961 		}
962 		endarg: ;
963 		/*
964 		 *	cp points at the first char in the arg
965 		 *	np points one beyond the last char in the arg
966 		 */
967 		if ((argconcat == 0) || (argconcat != argno))
968 			putchar(' ');
969 #ifdef FULLDEBUG
970 		{
971 			char	*p;
972 			printf("[%d,%d: ", argno, np - cp);
973 			for (p = cp; p < np; p++) {
974 				putchar(*p);
975 			}
976 			printf("]");
977 		}
978 #endif /* FULLDEBUG */
979 		/*
980 		 *	Determine if the argument merits being printed
981 		 *
982 		 *	constant is the cut off point below which something
983 		 *	is not a word.
984 		 */
985 		if (((np - cp) > constant) &&
986 		    (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
987 			for (cp = cp; cp < np; cp++)
988 				putchar(*cp);
989 			last = np[-1];
990 			found++;
991 		} else if (found && (np - cp == 1) &&
992 		    chars[(unsigned char)*cp] == PUNCT) {
993 			putchar(*cp);
994 		} else {
995 			last = np[-1];
996 		}
997 		cp = np;
998 	}
999 	if (msflag && chars[last] == PUNCT)
1000 		putchar(last);
1001 	putchar('\n');
1002 }
1003 
1004 /*
1005  *	put out words (for the -w option) with ms and mm conventions
1006  */
1007 void
1008 meputwords(int macline)
1009 {
1010 
1011 	msputwords(macline);
1012 }
1013 
1014 /*
1015  *
1016  *	Skip over a nested set of macros
1017  *
1018  *	Possible arguments to noblock are:
1019  *
1020  *	fi	end of unfilled text
1021  *	PE	pic ending
1022  *	DE	display ending
1023  *
1024  *	for ms and mm only:
1025  *		KE	keep ending
1026  *
1027  *		NE	undocumented match to NS (for mm?)
1028  *		LE	mm only: matches RL or *L (for lists)
1029  *
1030  *	for me:
1031  *		([lqbzcdf]
1032  */
1033 void
1034 noblock(char a1, char a2)
1035 {
1036 	int c1,c2;
1037 	int eqnf;
1038 	int lct;
1039 
1040 	lct = 0;
1041 	eqnf = 1;
1042 	SKIP;
1043 	for (;;) {
1044 		while (C != '.')
1045 			if (c == '\n')
1046 				continue;
1047 			else
1048 				SKIP;
1049 		if ((c1 = C) == '\n')
1050 			continue;
1051 		if ((c2 = C) == '\n')
1052 			continue;
1053 		if (c1 == a1 && c2 == a2) {
1054 			SKIP;
1055 			if (lct != 0) {
1056 				lct--;
1057 				continue;
1058 			}
1059 			if (eqnf)
1060 				putchar('.');
1061 			putchar('\n');
1062 			return;
1063 		} else if (a1 == 'L' && c2 == 'L') {
1064 			lct++;
1065 			SKIP;
1066 		}
1067 		/*
1068 		 *	equations (EQ) nested within a display
1069 		 */
1070 		else if (c1 == 'E' && c2 == 'Q') {
1071 			if ((mac == ME && a1 == ')')
1072 			    || (mac != ME && a1 == 'D')) {
1073 				eqn();
1074 				eqnf=0;
1075 			}
1076 		}
1077 		/*
1078 		 *	turning on filling is done by the paragraphing
1079 		 *	macros
1080 		 */
1081 		else if (a1 == 'f') {	/* .fi */
1082 			if  ((mac == ME && (c2 == 'h' || c2 == 'p'))
1083 			    || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1084 				SKIP;
1085 				return;
1086 			}
1087 		} else {
1088 			SKIP;
1089 		}
1090 	}
1091 }
1092 
1093 int
1094 EQ(void)
1095 {
1096 
1097 	eqn();
1098 	return(0);
1099 }
1100 
1101 int
1102 domacro(void)
1103 {
1104 
1105 	macro();
1106 	return(0);
1107 }
1108 
1109 int
1110 PS(void)
1111 {
1112 
1113 	for (C; c == ' ' || c == '\t'; C)
1114 		;	/* nothing */
1115 
1116 	if (c == '<') {		/* ".PS < file" -- don't expect a .PE */
1117 		SKIP;
1118 		return(0);
1119 	}
1120 	if (!msflag)
1121 		inpic();
1122 	else
1123 		noblock('P', 'E');
1124 	return(0);
1125 }
1126 
1127 int
1128 skip(void)
1129 {
1130 
1131 	SKIP;
1132 	return(0);
1133 }
1134 
1135 int
1136 intbl(void)
1137 {
1138 
1139 	if (msflag)
1140 		stbl();
1141 	else
1142 		tbl();
1143 	return(0);
1144 }
1145 
1146 int
1147 outtbl(void)
1148 {
1149 
1150 	intable = NO;
1151 	return(0);
1152 }
1153 
1154 int
1155 so(void)
1156 {
1157 
1158 	if (!iflag) {
1159 		getfname();
1160 		if (fname[0]) {
1161 			if (++filesp - &files[0] > MAXFILES)
1162 				err(1, "too many nested files (max %d)",
1163 				    MAXFILES);
1164 			infile = *filesp = opn(fname);
1165 		}
1166 	}
1167 	return(0);
1168 }
1169 
1170 int
1171 nx(void)
1172 {
1173 
1174 	if (!iflag) {
1175 		getfname();
1176 		if (fname[0] == '\0')
1177 			exit(0);
1178 		if (infile != stdin)
1179 			fclose(infile);
1180 		infile = *filesp = opn(fname);
1181 	}
1182 	return(0);
1183 }
1184 
1185 int
1186 skiptocom(void)
1187 {
1188 
1189 	SKIP_TO_COM;
1190 	return(COMX);
1191 }
1192 
1193 int
1194 PP(pacmac c12)
1195 {
1196 	int c1, c2;
1197 
1198 	frommac(c12, c1, c2);
1199 	printf(".%c%c", c1, c2);
1200 	while (C != '\n')
1201 		putchar(c);
1202 	putchar('\n');
1203 	return(0);
1204 }
1205 
1206 int
1207 AU(void)
1208 {
1209 
1210 	if (mac == MM)
1211 		return(0);
1212 	SKIP_TO_COM;
1213 	return(COMX);
1214 }
1215 
1216 int
1217 SH(pacmac c12)
1218 {
1219 	int c1, c2;
1220 
1221 	frommac(c12, c1, c2);
1222 
1223 	if (parag) {
1224 		printf(".%c%c", c1, c2);
1225 		while (C != '\n')
1226 			putchar(c);
1227 		putchar(c);
1228 		putchar('!');
1229 		for (;;) {
1230 			while (C != '\n')
1231 				putchar(c);
1232 			putchar('\n');
1233 			if (C == '.')
1234 				return(COM);
1235 			putchar('!');
1236 			putchar(c);
1237 		}
1238 		/*NOTREACHED*/
1239 	} else {
1240 		SKIP_TO_COM;
1241 		return(COMX);
1242 	}
1243 }
1244 
1245 int
1246 UX(void)
1247 {
1248 
1249 	if (wordflag)
1250 		printf("UNIX\n");
1251 	else
1252 		printf("UNIX ");
1253 	return(0);
1254 }
1255 
1256 int
1257 MMHU(pacmac c12)
1258 {
1259 	int c1, c2;
1260 
1261 	frommac(c12, c1, c2);
1262 	if (parag) {
1263 		printf(".%c%c", c1, c2);
1264 		while (C != '\n')
1265 			putchar(c);
1266 		putchar('\n');
1267 	} else {
1268 		SKIP;
1269 	}
1270 	return(0);
1271 }
1272 
1273 int
1274 mesnblock(pacmac c12)
1275 {
1276 	int c1, c2;
1277 
1278 	frommac(c12, c1, c2);
1279 	noblock(')', c2);
1280 	return(0);
1281 }
1282 
1283 int
1284 mssnblock(pacmac c12)
1285 {
1286 	int c1, c2;
1287 
1288 	frommac(c12, c1, c2);
1289 	noblock(c1, 'E');
1290 	return(0);
1291 }
1292 
1293 int
1294 nf(void)
1295 {
1296 
1297 	noblock('f', 'i');
1298 	return(0);
1299 }
1300 
1301 int
1302 ce(void)
1303 {
1304 
1305 	sce();
1306 	return(0);
1307 }
1308 
1309 int
1310 meip(pacmac c12)
1311 {
1312 
1313 	if (parag)
1314 		mepp(c12);
1315 	else if (wordflag)	/* save the tag */
1316 		regline(meputmac, ONE);
1317 	else
1318 		SKIP;
1319 	return(0);
1320 }
1321 
1322 /*
1323  *	only called for -me .pp or .sh, when parag is on
1324  */
1325 int
1326 mepp(pacmac c12)
1327 {
1328 
1329 	PP(c12);		/* eats the line */
1330 	return(0);
1331 }
1332 
1333 /*
1334  *	Start of a section heading; output the section name if doing words
1335  */
1336 int
1337 mesh(pacmac c12)
1338 {
1339 
1340 	if (parag)
1341 		mepp(c12);
1342 	else if (wordflag)
1343 		defcomline(c12);
1344 	else
1345 		SKIP;
1346 	return(0);
1347 }
1348 
1349 /*
1350  *	process a font setting
1351  */
1352 int
1353 mefont(pacmac c12)
1354 {
1355 
1356 	argconcat = 1;
1357 	defcomline(c12);
1358 	argconcat = 0;
1359 	return(0);
1360 }
1361 
1362 int
1363 manfont(pacmac c12)
1364 {
1365 
1366 	return(mefont(c12));
1367 }
1368 
1369 int
1370 manpp(pacmac c12)
1371 {
1372 
1373 	return(mepp(c12));
1374 }
1375 
1376 void
1377 defcomline(pacmac c12)
1378 {
1379 	int c1, c2;
1380 
1381 	frommac(c12, c1, c2);
1382 	if (msflag && mac == MM && c2 == 'L') {
1383 		if (disp || c1 == 'R') {
1384 			noblock('L', 'E');
1385 		} else {
1386 			SKIP;
1387 			putchar('.');
1388 		}
1389 	}
1390 	else if (c1 == '.' && c2 == '.') {
1391 		if (msflag) {
1392 			SKIP;
1393 			return;
1394 		}
1395 		while (C == '.')
1396 			/*VOID*/;
1397 	}
1398 	++inmacro;
1399 	/*
1400 	 *	Process the arguments to the macro
1401 	 */
1402 	switch (mac) {
1403 	default:
1404 	case MM:
1405 	case MS:
1406 		if (c1 <= 'Z' && msflag)
1407 			regline(msputmac, ONE);
1408 		else
1409 			regline(msputmac, TWO);
1410 		break;
1411 	case ME:
1412 		regline(meputmac, ONE);
1413 		break;
1414 	}
1415 	--inmacro;
1416 }
1417 
1418 void
1419 comline(void)
1420 {
1421 	int	c1;
1422 	int	c2;
1423 	pacmac	c12;
1424 	int	mid;
1425 	int	lb, ub;
1426 	int	hit;
1427 	static	int	tabsize = 0;
1428 	static	struct	mactab	*mactab = NULL;
1429 	struct	mactab	*mp;
1430 
1431 	if (mactab == 0)
1432 		 buildtab(&mactab, &tabsize);
1433 com:
1434 	while (C == ' ' || c == '\t')
1435 		;
1436 comx:
1437 	if ((c1 = c) == '\n')
1438 		return;
1439 	c2 = C;
1440 	if (c1 == '.' && c2 != '.')
1441 		inmacro = NO;
1442 	if (msflag && c1 == '[') {
1443 		refer(c2);
1444 		return;
1445 	}
1446 	if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1447 		printf(".P\n");
1448 		return;
1449 	}
1450 	if (c2 == '\n')
1451 		return;
1452 	/*
1453 	 *	Single letter macro
1454 	 */
1455 	if (mac == ME && (c2 == ' ' || c2 == '\t') )
1456 		c2 = ' ';
1457 	c12 = tomac(c1, c2);
1458 	/*
1459 	 *	binary search through the table of macros
1460 	 */
1461 	lb = 0;
1462 	ub = tabsize - 1;
1463 	while (lb <= ub) {
1464 		mid = (ub + lb) / 2;
1465 		mp = &mactab[mid];
1466 		if (mp->macname < c12)
1467 			lb = mid + 1;
1468 		else if (mp->macname > c12)
1469 			ub = mid - 1;
1470 		else {
1471 			hit = 1;
1472 #ifdef FULLDEBUG
1473 			printf("preliminary hit macro %c%c ", c1, c2);
1474 #endif /* FULLDEBUG */
1475 			switch (mp->condition) {
1476 			case NONE:
1477 				hit = YES;
1478 				break;
1479 			case FNEST:
1480 				hit = (filesp == files);
1481 				break;
1482 			case NOMAC:
1483 				hit = !inmacro;
1484 				break;
1485 			case MAC:
1486 				hit = inmacro;
1487 				break;
1488 			case PARAG:
1489 				hit = parag;
1490 				break;
1491 			case NBLK:
1492 				hit = !keepblock;
1493 				break;
1494 			default:
1495 				hit = 0;
1496 			}
1497 
1498 			if (hit) {
1499 #ifdef FULLDEBUG
1500 				printf("MATCH\n");
1501 #endif /* FULLDEBUG */
1502 				switch ((*(mp->func))(c12)) {
1503 				default:
1504 					return;
1505 				case COMX:
1506 					goto comx;
1507 				case COM:
1508 					goto com;
1509 				}
1510 			}
1511 #ifdef FULLDEBUG
1512 			printf("FAIL\n");
1513 #endif /* FULLDEBUG */
1514 			break;
1515 		}
1516 	}
1517 	defcomline(c12);
1518 }
1519 
1520 int
1521 macsort(const void *p1, const void *p2)
1522 {
1523 	struct mactab *t1 = (struct mactab *)p1;
1524 	struct mactab *t2 = (struct mactab *)p2;
1525 
1526 	return(t1->macname - t2->macname);
1527 }
1528 
1529 int
1530 sizetab(struct mactab *mp)
1531 {
1532 	int i;
1533 
1534 	i = 0;
1535 	if (mp) {
1536 		for (; mp->macname; mp++, i++)
1537 			/*VOID*/ ;
1538 	}
1539 	return(i);
1540 }
1541 
1542 struct mactab *
1543 macfill(struct mactab *dst, struct mactab *src)
1544 {
1545 
1546 	if (src) {
1547 		while (src->macname)
1548 			*dst++ = *src++;
1549 	}
1550 	return(dst);
1551 }
1552 
1553 __dead void
1554 usage(void)
1555 {
1556 	extern char *__progname;
1557 
1558 	fprintf(stderr, "usage: %s [-ikpw] [-m a | e | l | m | s] [file ...]\n", __progname);
1559 	exit(1);
1560 }
1561 
1562 void
1563 buildtab(struct mactab **r_back, int *r_size)
1564 {
1565 	int	size;
1566 	struct	mactab	*p, *p1, *p2;
1567 	struct	mactab	*back;
1568 
1569 	size = sizetab(troffmactab) + sizetab(ppmactab);
1570 	p1 = p2 = NULL;
1571 	if (msflag) {
1572 		switch (mac) {
1573 		case ME:
1574 			p1 = memactab;
1575 			break;
1576 		case MM:
1577 			p1 = msmactab;
1578 			p2 = mmmactab;
1579 			break;
1580 		case MS:
1581 			p1 = msmactab;
1582 			break;
1583 		case MA:
1584 			p1 = manmactab;
1585 			break;
1586 		default:
1587 			break;
1588 		}
1589 	}
1590 	size += sizetab(p1);
1591 	size += sizetab(p2);
1592 	back = calloc(size+2, sizeof(struct mactab));
1593 	if (back == NULL)
1594 		err(1, NULL);
1595 
1596 	p = macfill(back, troffmactab);
1597 	p = macfill(p, ppmactab);
1598 	p = macfill(p, p1);
1599 	p = macfill(p, p2);
1600 
1601 	qsort(back, size, sizeof(struct mactab), macsort);
1602 	*r_size = size;
1603 	*r_back = back;
1604 }
1605 
1606 /*
1607  *	troff commands
1608  */
1609 struct	mactab	troffmactab[] = {
1610 	M(NONE,		'\\','"',	skip),	/* comment */
1611 	M(NOMAC,	'd','e',	domacro),	/* define */
1612 	M(NOMAC,	'i','g',	domacro),	/* ignore till .. */
1613 	M(NOMAC,	'a','m',	domacro),	/* append macro */
1614 	M(NBLK,		'n','f',	nf),	/* filled */
1615 	M(NBLK,		'c','e',	ce),	/* centered */
1616 
1617 	M(NONE,		's','o',	so),	/* source a file */
1618 	M(NONE,		'n','x',	nx),	/* go to next file */
1619 
1620 	M(NONE,		't','m',	skip),	/* print string on tty */
1621 	M(NONE,		'h','w',	skip),	/* exception hyphen words */
1622 	M(NONE,		0,0,		0)
1623 };
1624 
1625 /*
1626  *	Preprocessor output
1627  */
1628 struct	mactab	ppmactab[] = {
1629 	M(FNEST,	'E','Q',	EQ),	/* equation starting */
1630 	M(FNEST,	'T','S',	intbl),	/* table starting */
1631 	M(FNEST,	'T','C',	intbl),	/* alternative table? */
1632 	M(FNEST,	'T','&',	intbl),	/* table reformatting */
1633 	M(NONE,		'T','E',	outtbl),/* table ending */
1634 	M(NONE,		'P','S',	PS),	/* picture starting */
1635 	M(NONE,		0,0,		0)
1636 };
1637 
1638 /*
1639  *	Particular to ms and mm
1640  */
1641 struct	mactab	msmactab[] = {
1642 	M(NONE,		'T','L',	skiptocom),	/* title follows */
1643 	M(NONE,		'F','S',	skiptocom),	/* start footnote */
1644 	M(NONE,		'O','K',	skiptocom),	/* Other kws */
1645 
1646 	M(NONE,		'N','R',	skip),	/* undocumented */
1647 	M(NONE,		'N','D',	skip),	/* use supplied date */
1648 
1649 	M(PARAG,	'P','P',	PP),	/* begin parag */
1650 	M(PARAG,	'I','P',	PP),	/* begin indent parag, tag x */
1651 	M(PARAG,	'L','P',	PP),	/* left blocked parag */
1652 
1653 	M(NONE,		'A','U',	AU),	/* author */
1654 	M(NONE,		'A','I',	AU),	/* authors institution */
1655 
1656 	M(NONE,		'S','H',	SH),	/* section heading */
1657 	M(NONE,		'S','N',	SH),	/* undocumented */
1658 	M(NONE,		'U','X',	UX),	/* unix */
1659 
1660 	M(NBLK,		'D','S',	mssnblock),	/* start display text */
1661 	M(NBLK,		'K','S',	mssnblock),	/* start keep */
1662 	M(NBLK,		'K','F',	mssnblock),	/* start float keep */
1663 	M(NONE,		0,0,		0)
1664 };
1665 
1666 struct	mactab	mmmactab[] = {
1667 	M(NONE,		'H',' ',	MMHU),	/* -mm ? */
1668 	M(NONE,		'H','U',	MMHU),	/* -mm ? */
1669 	M(PARAG,	'P',' ',	PP),	/* paragraph for -mm */
1670 	M(NBLK,		'N','S',	mssnblock),	/* undocumented */
1671 	M(NONE,		0,0,		0)
1672 };
1673 
1674 struct	mactab	memactab[] = {
1675 	M(PARAG,	'p','p',	mepp),
1676 	M(PARAG,	'l','p',	mepp),
1677 	M(PARAG,	'n','p',	mepp),
1678 	M(NONE,		'i','p',	meip),
1679 
1680 	M(NONE,		's','h',	mesh),
1681 	M(NONE,		'u','h',	mesh),
1682 
1683 	M(NBLK,		'(','l',	mesnblock),
1684 	M(NBLK,		'(','q',	mesnblock),
1685 	M(NBLK,		'(','b',	mesnblock),
1686 	M(NBLK,		'(','z',	mesnblock),
1687 	M(NBLK,		'(','c',	mesnblock),
1688 
1689 	M(NBLK,		'(','d',	mesnblock),
1690 	M(NBLK,		'(','f',	mesnblock),
1691 	M(NBLK,		'(','x',	mesnblock),
1692 
1693 	M(NONE,		'r',' ',	mefont),
1694 	M(NONE,		'i',' ',	mefont),
1695 	M(NONE,		'b',' ',	mefont),
1696 	M(NONE,		'u',' ',	mefont),
1697 	M(NONE,		'q',' ',	mefont),
1698 	M(NONE,		'r','b',	mefont),
1699 	M(NONE,		'b','i',	mefont),
1700 	M(NONE,		'b','x',	mefont),
1701 	M(NONE,		0,0,		0)
1702 };
1703 
1704 struct	mactab	manmactab[] = {
1705 	M(PARAG,	'B','I',	manfont),
1706 	M(PARAG,	'B','R',	manfont),
1707 	M(PARAG,	'I','B',	manfont),
1708 	M(PARAG,	'I','R',	manfont),
1709 	M(PARAG,	'R','B',	manfont),
1710 	M(PARAG,	'R','I',	manfont),
1711 
1712 	M(PARAG,	'P','P',	manpp),
1713 	M(PARAG,	'L','P',	manpp),
1714 	M(PARAG,	'H','P',	manpp),
1715 	M(NONE,		0,0,		0)
1716 };
1717