xref: /openbsd/usr.bin/deroff/deroff.c (revision cecf84d4)
1 /*	$OpenBSD: deroff.c,v 1.11 2015/02/09 11:39:17 tedu Exp $	*/
2 
3 /*-
4  * Copyright (c) 1988, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Copyright (C) Caldera International Inc.  2001-2002.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code and documentation must retain the above
39  *    copyright notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed or owned by Caldera
46  *	International, Inc.
47  * 4. Neither the name of Caldera International, Inc. nor the names of other
48  *    contributors may be used to endorse or promote products derived from
49  *    this software without specific prior written permission.
50  *
51  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
52  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
53  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
56  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
57  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
58  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
60  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
61  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 #include <err.h>
66 #include <limits.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <unistd.h>
71 
72 /*
73  *	Deroff command -- strip troff, eqn, and Tbl sequences from
74  *	a file.  Has two flags argument, -w, to cause output one word per line
75  *	rather than in the original format.
76  *	-mm (or -ms) causes the corresponding macro's to be interpreted
77  *	so that just sentences are output
78  *	-ml  also gets rid of lists.
79  *	Deroff follows .so and .nx commands, removes contents of macro
80  *	definitions, equations (both .EQ ... .EN and $...$),
81  *	Tbl command sequences, and Troff backslash constructions.
82  *
83  *	All input is through the Cget macro;
84  *	the most recently read character is in c.
85  *
86  *	Modified by Robert Henry to process -me and -man macros.
87  */
88 
89 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
90 #define C1get ( (c=getc(infile)) == EOF ? eof() :  c)
91 
92 #ifdef DEBUG
93 #  define C	_C()
94 #  define C1	_C1()
95 #else /* not DEBUG */
96 #  define C	Cget
97 #  define C1	C1get
98 #endif /* not DEBUG */
99 
100 #define SKIP while (C != '\n')
101 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
102 
103 #define	YES 1
104 #define	NO 0
105 #define	MS 0	/* -ms */
106 #define	MM 1	/* -mm */
107 #define	ME 2	/* -me */
108 #define	MA 3	/* -man */
109 
110 #ifdef DEBUG
111 char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
112 #endif /* DEBUG */
113 
114 #define	ONE 1
115 #define	TWO 2
116 
117 #define NOCHAR -2
118 #define SPECIAL 0
119 #define APOS 1
120 #define PUNCT 2
121 #define DIGIT 3
122 #define LETTER 4
123 
124 #define MAXFILES 20
125 
126 int	iflag;
127 int	wordflag;
128 int	msflag;		/* processing a source written using a mac package */
129 int	mac;		/* which package */
130 int	disp;
131 int	parag;
132 int	inmacro;
133 int	intable;
134 int	keepblock;	/* keep blocks of text; normally false when msflag */
135 
136 char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
137 
138 char line[LINE_MAX];
139 char *lp;
140 
141 int c;
142 int pc;
143 int ldelim;
144 int rdelim;
145 
146 char fname[PATH_MAX];
147 FILE *files[MAXFILES];
148 FILE **filesp;
149 FILE *infile;
150 
151 int argc;
152 char **argv;
153 
154 /*
155  *	Macro processing
156  *
157  *	Macro table definitions
158  */
159 typedef	int pacmac;		/* compressed macro name */
160 int	argconcat = 0;		/* concat arguments together (-me only) */
161 
162 #define	tomac(c1, c2)		((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
163 #define	frommac(src, c1, c2)	(((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
164 
165 struct mactab{
166 	int	condition;
167 	pacmac	macname;
168 	int	(*func)();	/* XXX - args */
169 };
170 
171 struct	mactab	troffmactab[];
172 struct	mactab	ppmactab[];
173 struct	mactab	msmactab[];
174 struct	mactab	mmmactab[];
175 struct	mactab	memactab[];
176 struct	mactab	manmactab[];
177 
178 /*
179  *	Macro table initialization
180  */
181 #define	M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
182 
183 /*
184  *	Flags for matching conditions other than
185  *	the macro name
186  */
187 #define	NONE		0
188 #define	FNEST		1		/* no nested files */
189 #define	NOMAC		2		/* no macro */
190 #define	MAC		3		/* macro */
191 #define	PARAG		4		/* in a paragraph */
192 #define	MSF		5		/* msflag is on */
193 #define	NBLK		6		/* set if no blocks to be kept */
194 
195 /*
196  *	Return codes from macro minions, determine where to jump,
197  *	how to repeat/reprocess text
198  */
199 #define	COMX		1		/* goto comx */
200 #define	COM		2		/* goto com */
201 
202 int	 skeqn(void);
203 int	 eof(void);
204 int	 _C1(void);
205 int	 _C(void);
206 int	 EQ(void);
207 int	 domacro(void);
208 int	 PS(void);
209 int	 skip(void);
210 int	 intbl(void);
211 int	 outtbl(void);
212 int	 so(void);
213 int	 nx(void);
214 int	 skiptocom(void);
215 int	 PP(pacmac);
216 int	 AU(void);
217 int	 SH(pacmac);
218 int	 UX(void);
219 int	 MMHU(pacmac);
220 int	 mesnblock(pacmac);
221 int	 mssnblock(pacmac);
222 int	 nf(void);
223 int	 ce(void);
224 int	 meip(pacmac);
225 int	 mepp(pacmac);
226 int	 mesh(pacmac);
227 int	 mefont(pacmac);
228 int	 manfont(pacmac);
229 int	 manpp(pacmac);
230 int	 macsort(const void *, const void *);
231 int	 sizetab(struct mactab *);
232 void	 getfname(void);
233 void	 textline(char *, int);
234 void	 work(void);
235 void	 regline(void (*)(char *, int), int);
236 void	 macro(void);
237 void	 tbl(void);
238 void	 stbl(void);
239 void	 eqn(void);
240 void	 backsl(void);
241 void	 sce(void);
242 void	 refer(int);
243 void	 inpic(void);
244 void	 msputmac(char *, int);
245 void	 msputwords(int);
246 void	 meputmac(char *, int);
247 void	 meputwords(int);
248 void	 noblock(char, char);
249 void	 defcomline(pacmac);
250 void	 comline(void);
251 void	 buildtab(struct mactab **, int *);
252 FILE	*opn(char *);
253 struct mactab *macfill(struct mactab *, struct mactab *);
254 __dead void usage(void);
255 
256 int
257 main(int ac, char **av)
258 {
259 	int	i, ch;
260 	int	errflg = 0;
261 	int	kflag = NO;
262 
263 	iflag = NO;
264 	wordflag = NO;
265 	msflag = NO;
266 	mac = ME;
267 	disp = NO;
268 	parag = NO;
269 	inmacro = NO;
270 	intable = NO;
271 	ldelim	= NOCHAR;
272 	rdelim	= NOCHAR;
273 	keepblock = YES;
274 
275 	while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
276 		switch (ch) {
277 		case 'i':
278 			iflag = YES;
279 			break;
280 		case 'k':
281 			kflag = YES;
282 			break;
283 		case 'm':
284 			msflag = YES;
285 			keepblock = NO;
286 			switch (optarg[0]) {
287 			case 'm':
288 				mac = MM;
289 				break;
290 			case 's':
291 				mac = MS;
292 				break;
293 			case 'e':
294 				mac = ME;
295 				break;
296 			case 'a':
297 				mac = MA;
298 				break;
299 			case 'l':
300 				disp = YES;
301 				break;
302 			default:
303 				errflg = 1;
304 				break;
305 			}
306 			if (optarg[1] != '\0')
307 				errflg = 1;
308 			break;
309 		case 'p':
310 			parag = YES;
311 			break;
312 		case 'w':
313 			wordflag = YES;
314 			kflag = YES;
315 			break;
316 		default:
317 			errflg = 1;
318 		}
319 	}
320 	argc = ac - optind;
321 	argv = av + optind;
322 
323 	if (kflag)
324 		keepblock = YES;
325 	if (errflg)
326 		usage();
327 
328 #ifdef DEBUG
329 	printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
330 		msflag, mactab[mac], keepblock, disp);
331 #endif /* DEBUG */
332 	if (argc == 0) {
333 		infile = stdin;
334 	} else {
335 		infile = opn(argv[0]);
336 		--argc;
337 		++argv;
338 	}
339 	files[0] = infile;
340 	filesp = &files[0];
341 
342 	for (i = 'a'; i <= 'z' ; ++i)
343 		chars[i] = LETTER;
344 	for (i = 'A'; i <= 'Z'; ++i)
345 		chars[i] = LETTER;
346 	for (i = '0'; i <= '9'; ++i)
347 		chars[i] = DIGIT;
348 	chars['\''] = APOS;
349 	chars['&'] = APOS;
350 	chars['.'] = PUNCT;
351 	chars[','] = PUNCT;
352 	chars[';'] = PUNCT;
353 	chars['?'] = PUNCT;
354 	chars[':'] = PUNCT;
355 	work();
356 	exit(0);
357 }
358 
359 int
360 skeqn(void)
361 {
362 
363 	while ((c = getc(infile)) != rdelim) {
364 		if (c == EOF)
365 			c = eof();
366 		else if (c == '"') {
367 			while ((c = getc(infile)) != '"') {
368 				if (c == EOF ||
369 				    (c == '\\' && (c = getc(infile)) == EOF))
370 					c = eof();
371 			}
372 		}
373 	}
374 	if (msflag)
375 		return((c = 'x'));
376 	return((c = ' '));
377 }
378 
379 FILE *
380 opn(char *p)
381 {
382 	FILE *fd;
383 
384 	if ((fd = fopen(p, "r")) == NULL)
385 		err(1, "fopen %s", p);
386 
387 	return(fd);
388 }
389 
390 int
391 eof(void)
392 {
393 
394 	if (infile != stdin)
395 		fclose(infile);
396 	if (filesp > files)
397 		infile = *--filesp;
398 	else if (argc > 0) {
399 		infile = opn(argv[0]);
400 		--argc;
401 		++argv;
402 	} else
403 		exit(0);
404 	return(C);
405 }
406 
407 void
408 getfname(void)
409 {
410 	char *p;
411 	struct chain {
412 		struct chain *nextp;
413 		char *datap;
414 	} *q;
415 	static struct chain *namechain= NULL;
416 
417 	while (C == ' ')
418 		;	/* nothing */
419 
420 	for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' &&
421 	    c != ' ' && c != '\t' && c != '\\'; ++p)
422 		C;
423 	*p = '\0';
424 	while (c != '\n')
425 		C;
426 
427 	/* see if this name has already been used */
428 	for (q = namechain ; q; q = q->nextp)
429 		if (strcmp(fname, q->datap) == 0) {
430 			fname[0] = '\0';
431 			return;
432 		}
433 
434 	q = malloc(sizeof(struct chain));
435 	if (q == NULL)
436 		err(1, NULL);
437 	q->nextp = namechain;
438 	q->datap = strdup(fname);
439 	if (q->datap == NULL)
440 		err(1, NULL);
441 	namechain = q;
442 }
443 
444 /*ARGSUSED*/
445 void
446 textline(char *str, int constant)
447 {
448 
449 	if (wordflag) {
450 		msputwords(0);
451 		return;
452 	}
453 	puts(str);
454 }
455 
456 void
457 work(void)
458 {
459 
460 	for (;;) {
461 		C;
462 #ifdef FULLDEBUG
463 		printf("Starting work with `%c'\n", c);
464 #endif /* FULLDEBUG */
465 		if (c == '.' || c == '\'')
466 			comline();
467 		else
468 			regline(textline, TWO);
469 	}
470 }
471 
472 void
473 regline(void (*pfunc)(char *, int), int constant)
474 {
475 
476 	line[0] = c;
477 	lp = line;
478 	while (lp - line < sizeof(line)) {
479 		if (c == '\\') {
480 			*lp = ' ';
481 			backsl();
482 		}
483 		if (c == '\n')
484 			break;
485 		if (intable && c == 'T') {
486 			*++lp = C;
487 			if (c == '{' || c == '}') {
488 				lp[-1] = ' ';
489 				*lp = C;
490 			}
491 		} else {
492 			*++lp = C;
493 		}
494 	}
495 	*lp = '\0';
496 
497 	if (line[0] != '\0')
498 		(*pfunc)(line, constant);
499 }
500 
501 void
502 macro(void)
503 {
504 
505 	if (msflag) {
506 		do {
507 			SKIP;
508 		} while (C!='.' || C!='.' || C=='.');	/* look for  .. */
509 		if (c != '\n')
510 			SKIP;
511 		return;
512 	}
513 	SKIP;
514 	inmacro = YES;
515 }
516 
517 void
518 tbl(void)
519 {
520 
521 	while (C != '.')
522 		;	/* nothing */
523 	SKIP;
524 	intable = YES;
525 }
526 
527 void
528 stbl(void)
529 {
530 
531 	while (C != '.')
532 		;	/* nothing */
533 	SKIP_TO_COM;
534 	if (c != 'T' || C != 'E') {
535 		SKIP;
536 		pc = c;
537 		while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
538 			pc = c;
539 	}
540 }
541 
542 void
543 eqn(void)
544 {
545 	int c1, c2;
546 	int dflg;
547 	char last;
548 
549 	last=0;
550 	dflg = 1;
551 	SKIP;
552 
553 	for (;;) {
554 		if (C1 == '.'  || c == '\'') {
555 			while (C1 == ' ' || c == '\t')
556 				;
557 			if (c == 'E' && C1 == 'N') {
558 				SKIP;
559 				if (msflag && dflg) {
560 					putchar('x');
561 					putchar(' ');
562 					if (last) {
563 						putchar(last);
564 						putchar('\n');
565 					}
566 				}
567 				return;
568 			}
569 		} else if (c == 'd') {
570 			/* look for delim */
571 			if (C1 == 'e' && C1 == 'l')
572 				if (C1 == 'i' && C1 == 'm') {
573 					while (C1 == ' ')
574 						;	/* nothing */
575 
576 					if ((c1 = c) == '\n' ||
577 					    (c2 = C1) == '\n' ||
578 					    (c1 == 'o' && c2 == 'f' && C1=='f')) {
579 						ldelim = NOCHAR;
580 						rdelim = NOCHAR;
581 					} else {
582 						ldelim = c1;
583 						rdelim = c2;
584 					}
585 				}
586 			dflg = 0;
587 		}
588 
589 		if (c != '\n')
590 			while (C1 != '\n') {
591 				if (chars[c] == PUNCT)
592 					last = c;
593 				else if (c != ' ')
594 					last = 0;
595 			}
596 	}
597 }
598 
599 /* skip over a complete backslash construction */
600 void
601 backsl(void)
602 {
603 	int bdelim;
604 
605 sw:
606 	switch (C) {
607 	case '"':
608 		SKIP;
609 		return;
610 
611 	case 's':
612 		if (C == '\\')
613 			backsl();
614 		else {
615 			while (C >= '0' && c <= '9')
616 				;	/* nothing */
617 			ungetc(c, infile);
618 			c = '0';
619 		}
620 		--lp;
621 		return;
622 
623 	case 'f':
624 	case 'n':
625 	case '*':
626 		if (C != '(')
627 			return;
628 
629 	case '(':
630 		if (msflag) {
631 			if (C == 'e') {
632 				if (C == 'm') {
633 					*lp = '-';
634 					return;
635 				}
636 			}
637 			else if (c != '\n')
638 				C;
639 			return;
640 		}
641 		if (C != '\n')
642 			C;
643 		return;
644 
645 	case '$':
646 		C;	/* discard argument number */
647 		return;
648 
649 	case 'b':
650 	case 'x':
651 	case 'v':
652 	case 'h':
653 	case 'w':
654 	case 'o':
655 	case 'l':
656 	case 'L':
657 		if ((bdelim = C) == '\n')
658 			return;
659 		while (C != '\n' && c != bdelim)
660 			if (c == '\\')
661 				backsl();
662 		return;
663 
664 	case '\\':
665 		if (inmacro)
666 			goto sw;
667 
668 	default:
669 		return;
670 	}
671 }
672 
673 void
674 sce(void)
675 {
676 	char *ap;
677 	int n, i;
678 	char a[10];
679 
680 	for (ap = a; C != '\n'; ap++) {
681 		*ap = c;
682 		if (ap == &a[9]) {
683 			SKIP;
684 			ap = a;
685 			break;
686 		}
687 	}
688 	if (ap != a)
689 		n = atoi(a);
690 	else
691 		n = 1;
692 	for (i = 0; i < n;) {
693 		if (C == '.') {
694 			if (C == 'c') {
695 				if (C == 'e') {
696 					while (C == ' ')
697 						;	/* nothing */
698 					if (c == '0') {
699 						SKIP;
700 						break;
701 					} else
702 						SKIP;
703 				}
704 				else
705 					SKIP;
706 			} else if (c == 'P' || C == 'P') {
707 				if (c != '\n')
708 					SKIP;
709 				break;
710 			} else if (c != '\n')
711 				SKIP;
712 		} else {
713 			SKIP;
714 			i++;
715 		}
716 	}
717 }
718 
719 void
720 refer(int c1)
721 {
722 	int c2;
723 
724 	if (c1 != '\n')
725 		SKIP;
726 
727 	for (c2 = -1;;) {
728 		if (C != '.')
729 			SKIP;
730 		else {
731 			if (C != ']')
732 				SKIP;
733 			else {
734 				while (C != '\n')
735 					c2 = c;
736 				if (c2 != -1 && chars[c2] == PUNCT)
737 					putchar(c2);
738 				return;
739 			}
740 		}
741 	}
742 }
743 
744 void
745 inpic(void)
746 {
747 	int c1;
748 	char *p1, *ep;
749 
750 	SKIP;
751 	p1 = line;
752 	ep = line + sizeof(line) - 1;
753 	c = '\n';
754 	for (;;) {
755 		c1 = c;
756 		if (C == '.' && c1 == '\n') {
757 			if (C != 'P') {
758 				if (c == '\n')
759 					continue;
760 				else {
761 					SKIP;
762 					c = '\n';
763 					continue;
764 				}
765 			}
766 			if (C != 'E') {
767 				if (c == '\n')
768 					continue;
769 				else {
770 					SKIP;
771 					c = '\n';
772 					continue;
773 				}
774 			}
775 			SKIP;
776 			return;
777 		}
778 		else if (c == '\"') {
779 			while (C != '\"') {
780 				if (c == '\\') {
781 					if (C == '\"')
782 						continue;
783 					ungetc(c, infile);
784 					backsl();
785 				} else if (p1 + 1 >= ep) {
786 					errx(1, ".PS length exceeds limit");
787 				} else {
788 					*p1++ = c;
789 				}
790 			}
791 			*p1++ = ' ';
792 		}
793 		else if (c == '\n' && p1 != line) {
794 			*p1 = '\0';
795 			if (wordflag)
796 				msputwords(NO);
797 			else {
798 				puts(line);
799 				putchar('\n');
800 			}
801 			p1 = line;
802 		}
803 	}
804 }
805 
806 #ifdef DEBUG
807 int
808 _C1(void)
809 {
810 
811 	return(C1get);
812 }
813 
814 int
815 _C(void)
816 {
817 
818 	return(Cget);
819 }
820 #endif /* DEBUG */
821 
822 /*
823  *	Put out a macro line, using ms and mm conventions.
824  */
825 void
826 msputmac(char *s, int constant)
827 {
828 	char *t;
829 	int found;
830 	int last;
831 
832 	last = 0;
833 	found = 0;
834 	if (wordflag) {
835 		msputwords(YES);
836 		return;
837 	}
838 	while (*s) {
839 		while (*s == ' ' || *s == '\t')
840 			putchar(*s++);
841 		for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
842 			;	/* nothing */
843 		if (*s == '\"')
844 			s++;
845 		if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
846 		    chars[(unsigned char)s[1]] == LETTER) {
847 			while (s < t)
848 				if (*s == '\"')
849 					s++;
850 				else
851 					putchar(*s++);
852 			last = *(t-1);
853 			found++;
854 		} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
855 		    s[1] == '\0') {
856 			putchar(*s++);
857 		} else {
858 			last = *(t - 1);
859 			s = t;
860 		}
861 	}
862 	putchar('\n');
863 	if (msflag && chars[last] == PUNCT) {
864 		putchar(last);
865 		putchar('\n');
866 	}
867 }
868 
869 /*
870  *	put out words (for the -w option) with ms and mm conventions
871  */
872 void
873 msputwords(int macline)
874 {
875 	char *p, *p1;
876 	int i, nlet;
877 
878 	for (p1 = line;;) {
879 		/*
880 		 *	skip initial specials ampersands and apostrophes
881 		 */
882 		while (chars[(unsigned char)*p1] < DIGIT)
883 			if (*p1++ == '\0')
884 				return;
885 		nlet = 0;
886 		for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
887 			if (i == LETTER)
888 				++nlet;
889 
890 		if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
891 			/*
892 			 *	delete trailing ampersands and apostrophes
893 			 */
894 			while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
895 			    i == APOS )
896 				--p;
897 			while (p1 < p)
898 				putchar(*p1++);
899 			putchar('\n');
900 		} else {
901 			p1 = p;
902 		}
903 	}
904 }
905 
906 /*
907  *	put out a macro using the me conventions
908  */
909 #define SKIPBLANK(cp)	while (*cp == ' ' || *cp == '\t') { cp++; }
910 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
911 
912 void
913 meputmac(char *cp, int constant)
914 {
915 	char	*np;
916 	int	found;
917 	int	argno;
918 	int	last;
919 	int	inquote;
920 
921 	last = 0;
922 	found = 0;
923 	if (wordflag) {
924 		meputwords(YES);
925 		return;
926 	}
927 	for (argno = 0; *cp; argno++) {
928 		SKIPBLANK(cp);
929 		inquote = (*cp == '"');
930 		if (inquote)
931 			cp++;
932 		for (np = cp; *np; np++) {
933 			switch (*np) {
934 			case '\n':
935 			case '\0':
936 				break;
937 
938 			case '\t':
939 			case ' ':
940 				if (inquote)
941 					continue;
942 				else
943 					goto endarg;
944 
945 			case '"':
946 				if (inquote && np[1] == '"') {
947 					memmove(np, np + 1, strlen(np));
948 					np++;
949 					continue;
950 				} else {
951 					*np = ' '; 	/* bye bye " */
952 					goto endarg;
953 				}
954 
955 			default:
956 				continue;
957 			}
958 		}
959 		endarg: ;
960 		/*
961 		 *	cp points at the first char in the arg
962 		 *	np points one beyond the last char in the arg
963 		 */
964 		if ((argconcat == 0) || (argconcat != argno))
965 			putchar(' ');
966 #ifdef FULLDEBUG
967 		{
968 			char	*p;
969 			printf("[%d,%d: ", argno, np - cp);
970 			for (p = cp; p < np; p++) {
971 				putchar(*p);
972 			}
973 			printf("]");
974 		}
975 #endif /* FULLDEBUG */
976 		/*
977 		 *	Determine if the argument merits being printed
978 		 *
979 		 *	constant is the cut off point below which something
980 		 *	is not a word.
981 		 */
982 		if (((np - cp) > constant) &&
983 		    (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
984 			for (cp = cp; cp < np; cp++)
985 				putchar(*cp);
986 			last = np[-1];
987 			found++;
988 		} else if (found && (np - cp == 1) &&
989 		    chars[(unsigned char)*cp] == PUNCT) {
990 			putchar(*cp);
991 		} else {
992 			last = np[-1];
993 		}
994 		cp = np;
995 	}
996 	if (msflag && chars[last] == PUNCT)
997 		putchar(last);
998 	putchar('\n');
999 }
1000 
1001 /*
1002  *	put out words (for the -w option) with ms and mm conventions
1003  */
1004 void
1005 meputwords(int macline)
1006 {
1007 
1008 	msputwords(macline);
1009 }
1010 
1011 /*
1012  *
1013  *	Skip over a nested set of macros
1014  *
1015  *	Possible arguments to noblock are:
1016  *
1017  *	fi	end of unfilled text
1018  *	PE	pic ending
1019  *	DE	display ending
1020  *
1021  *	for ms and mm only:
1022  *		KE	keep ending
1023  *
1024  *		NE	undocumented match to NS (for mm?)
1025  *		LE	mm only: matches RL or *L (for lists)
1026  *
1027  *	for me:
1028  *		([lqbzcdf]
1029  */
1030 void
1031 noblock(char a1, char a2)
1032 {
1033 	int c1,c2;
1034 	int eqnf;
1035 	int lct;
1036 
1037 	lct = 0;
1038 	eqnf = 1;
1039 	SKIP;
1040 	for (;;) {
1041 		while (C != '.')
1042 			if (c == '\n')
1043 				continue;
1044 			else
1045 				SKIP;
1046 		if ((c1 = C) == '\n')
1047 			continue;
1048 		if ((c2 = C) == '\n')
1049 			continue;
1050 		if (c1 == a1 && c2 == a2) {
1051 			SKIP;
1052 			if (lct != 0) {
1053 				lct--;
1054 				continue;
1055 			}
1056 			if (eqnf)
1057 				putchar('.');
1058 			putchar('\n');
1059 			return;
1060 		} else if (a1 == 'L' && c2 == 'L') {
1061 			lct++;
1062 			SKIP;
1063 		}
1064 		/*
1065 		 *	equations (EQ) nested within a display
1066 		 */
1067 		else if (c1 == 'E' && c2 == 'Q') {
1068 			if ((mac == ME && a1 == ')')
1069 			    || (mac != ME && a1 == 'D')) {
1070 				eqn();
1071 				eqnf=0;
1072 			}
1073 		}
1074 		/*
1075 		 *	turning on filling is done by the paragraphing
1076 		 *	macros
1077 		 */
1078 		else if (a1 == 'f') {	/* .fi */
1079 			if  ((mac == ME && (c2 == 'h' || c2 == 'p'))
1080 			    || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1081 				SKIP;
1082 				return;
1083 			}
1084 		} else {
1085 			SKIP;
1086 		}
1087 	}
1088 }
1089 
1090 int
1091 EQ(void)
1092 {
1093 
1094 	eqn();
1095 	return(0);
1096 }
1097 
1098 int
1099 domacro(void)
1100 {
1101 
1102 	macro();
1103 	return(0);
1104 }
1105 
1106 int
1107 PS(void)
1108 {
1109 
1110 	for (C; c == ' ' || c == '\t'; C)
1111 		;	/* nothing */
1112 
1113 	if (c == '<') {		/* ".PS < file" -- don't expect a .PE */
1114 		SKIP;
1115 		return(0);
1116 	}
1117 	if (!msflag)
1118 		inpic();
1119 	else
1120 		noblock('P', 'E');
1121 	return(0);
1122 }
1123 
1124 int
1125 skip(void)
1126 {
1127 
1128 	SKIP;
1129 	return(0);
1130 }
1131 
1132 int
1133 intbl(void)
1134 {
1135 
1136 	if (msflag)
1137 		stbl();
1138 	else
1139 		tbl();
1140 	return(0);
1141 }
1142 
1143 int
1144 outtbl(void)
1145 {
1146 
1147 	intable = NO;
1148 	return(0);
1149 }
1150 
1151 int
1152 so(void)
1153 {
1154 
1155 	if (!iflag) {
1156 		getfname();
1157 		if (fname[0]) {
1158 			if (++filesp - &files[0] > MAXFILES)
1159 				err(1, "too many nested files (max %d)",
1160 				    MAXFILES);
1161 			infile = *filesp = opn(fname);
1162 		}
1163 	}
1164 	return(0);
1165 }
1166 
1167 int
1168 nx(void)
1169 {
1170 
1171 	if (!iflag) {
1172 		getfname();
1173 		if (fname[0] == '\0')
1174 			exit(0);
1175 		if (infile != stdin)
1176 			fclose(infile);
1177 		infile = *filesp = opn(fname);
1178 	}
1179 	return(0);
1180 }
1181 
1182 int
1183 skiptocom(void)
1184 {
1185 
1186 	SKIP_TO_COM;
1187 	return(COMX);
1188 }
1189 
1190 int
1191 PP(pacmac c12)
1192 {
1193 	int c1, c2;
1194 
1195 	frommac(c12, c1, c2);
1196 	printf(".%c%c", c1, c2);
1197 	while (C != '\n')
1198 		putchar(c);
1199 	putchar('\n');
1200 	return(0);
1201 }
1202 
1203 int
1204 AU(void)
1205 {
1206 
1207 	if (mac == MM)
1208 		return(0);
1209 	SKIP_TO_COM;
1210 	return(COMX);
1211 }
1212 
1213 int
1214 SH(pacmac c12)
1215 {
1216 	int c1, c2;
1217 
1218 	frommac(c12, c1, c2);
1219 
1220 	if (parag) {
1221 		printf(".%c%c", c1, c2);
1222 		while (C != '\n')
1223 			putchar(c);
1224 		putchar(c);
1225 		putchar('!');
1226 		for (;;) {
1227 			while (C != '\n')
1228 				putchar(c);
1229 			putchar('\n');
1230 			if (C == '.')
1231 				return(COM);
1232 			putchar('!');
1233 			putchar(c);
1234 		}
1235 		/*NOTREACHED*/
1236 	} else {
1237 		SKIP_TO_COM;
1238 		return(COMX);
1239 	}
1240 }
1241 
1242 int
1243 UX(void)
1244 {
1245 
1246 	if (wordflag)
1247 		printf("UNIX\n");
1248 	else
1249 		printf("UNIX ");
1250 	return(0);
1251 }
1252 
1253 int
1254 MMHU(pacmac c12)
1255 {
1256 	int c1, c2;
1257 
1258 	frommac(c12, c1, c2);
1259 	if (parag) {
1260 		printf(".%c%c", c1, c2);
1261 		while (C != '\n')
1262 			putchar(c);
1263 		putchar('\n');
1264 	} else {
1265 		SKIP;
1266 	}
1267 	return(0);
1268 }
1269 
1270 int
1271 mesnblock(pacmac c12)
1272 {
1273 	int c1, c2;
1274 
1275 	frommac(c12, c1, c2);
1276 	noblock(')', c2);
1277 	return(0);
1278 }
1279 
1280 int
1281 mssnblock(pacmac c12)
1282 {
1283 	int c1, c2;
1284 
1285 	frommac(c12, c1, c2);
1286 	noblock(c1, 'E');
1287 	return(0);
1288 }
1289 
1290 int
1291 nf(void)
1292 {
1293 
1294 	noblock('f', 'i');
1295 	return(0);
1296 }
1297 
1298 int
1299 ce(void)
1300 {
1301 
1302 	sce();
1303 	return(0);
1304 }
1305 
1306 int
1307 meip(pacmac c12)
1308 {
1309 
1310 	if (parag)
1311 		mepp(c12);
1312 	else if (wordflag)	/* save the tag */
1313 		regline(meputmac, ONE);
1314 	else
1315 		SKIP;
1316 	return(0);
1317 }
1318 
1319 /*
1320  *	only called for -me .pp or .sh, when parag is on
1321  */
1322 int
1323 mepp(pacmac c12)
1324 {
1325 
1326 	PP(c12);		/* eats the line */
1327 	return(0);
1328 }
1329 
1330 /*
1331  *	Start of a section heading; output the section name if doing words
1332  */
1333 int
1334 mesh(pacmac c12)
1335 {
1336 
1337 	if (parag)
1338 		mepp(c12);
1339 	else if (wordflag)
1340 		defcomline(c12);
1341 	else
1342 		SKIP;
1343 	return(0);
1344 }
1345 
1346 /*
1347  *	process a font setting
1348  */
1349 int
1350 mefont(pacmac c12)
1351 {
1352 
1353 	argconcat = 1;
1354 	defcomline(c12);
1355 	argconcat = 0;
1356 	return(0);
1357 }
1358 
1359 int
1360 manfont(pacmac c12)
1361 {
1362 
1363 	return(mefont(c12));
1364 }
1365 
1366 int
1367 manpp(pacmac c12)
1368 {
1369 
1370 	return(mepp(c12));
1371 }
1372 
1373 void
1374 defcomline(pacmac c12)
1375 {
1376 	int c1, c2;
1377 
1378 	frommac(c12, c1, c2);
1379 	if (msflag && mac == MM && c2 == 'L') {
1380 		if (disp || c1 == 'R') {
1381 			noblock('L', 'E');
1382 		} else {
1383 			SKIP;
1384 			putchar('.');
1385 		}
1386 	}
1387 	else if (c1 == '.' && c2 == '.') {
1388 		if (msflag) {
1389 			SKIP;
1390 			return;
1391 		}
1392 		while (C == '.')
1393 			/*VOID*/;
1394 	}
1395 	++inmacro;
1396 	/*
1397 	 *	Process the arguments to the macro
1398 	 */
1399 	switch (mac) {
1400 	default:
1401 	case MM:
1402 	case MS:
1403 		if (c1 <= 'Z' && msflag)
1404 			regline(msputmac, ONE);
1405 		else
1406 			regline(msputmac, TWO);
1407 		break;
1408 	case ME:
1409 		regline(meputmac, ONE);
1410 		break;
1411 	}
1412 	--inmacro;
1413 }
1414 
1415 void
1416 comline(void)
1417 {
1418 	int	c1;
1419 	int	c2;
1420 	pacmac	c12;
1421 	int	mid;
1422 	int	lb, ub;
1423 	int	hit;
1424 	static	int	tabsize = 0;
1425 	static	struct	mactab	*mactab = (struct mactab *)0;
1426 	struct	mactab	*mp;
1427 
1428 	if (mactab == 0)
1429 		 buildtab(&mactab, &tabsize);
1430 com:
1431 	while (C == ' ' || c == '\t')
1432 		;
1433 comx:
1434 	if ((c1 = c) == '\n')
1435 		return;
1436 	c2 = C;
1437 	if (c1 == '.' && c2 != '.')
1438 		inmacro = NO;
1439 	if (msflag && c1 == '[') {
1440 		refer(c2);
1441 		return;
1442 	}
1443 	if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1444 		printf(".P\n");
1445 		return;
1446 	}
1447 	if (c2 == '\n')
1448 		return;
1449 	/*
1450 	 *	Single letter macro
1451 	 */
1452 	if (mac == ME && (c2 == ' ' || c2 == '\t') )
1453 		c2 = ' ';
1454 	c12 = tomac(c1, c2);
1455 	/*
1456 	 *	binary search through the table of macros
1457 	 */
1458 	lb = 0;
1459 	ub = tabsize - 1;
1460 	while (lb <= ub) {
1461 		mid = (ub + lb) / 2;
1462 		mp = &mactab[mid];
1463 		if (mp->macname < c12)
1464 			lb = mid + 1;
1465 		else if (mp->macname > c12)
1466 			ub = mid - 1;
1467 		else {
1468 			hit = 1;
1469 #ifdef FULLDEBUG
1470 			printf("preliminary hit macro %c%c ", c1, c2);
1471 #endif /* FULLDEBUG */
1472 			switch (mp->condition) {
1473 			case NONE:
1474 				hit = YES;
1475 				break;
1476 			case FNEST:
1477 				hit = (filesp == files);
1478 				break;
1479 			case NOMAC:
1480 				hit = !inmacro;
1481 				break;
1482 			case MAC:
1483 				hit = inmacro;
1484 				break;
1485 			case PARAG:
1486 				hit = parag;
1487 				break;
1488 			case NBLK:
1489 				hit = !keepblock;
1490 				break;
1491 			default:
1492 				hit = 0;
1493 			}
1494 
1495 			if (hit) {
1496 #ifdef FULLDEBUG
1497 				printf("MATCH\n");
1498 #endif /* FULLDEBUG */
1499 				switch ((*(mp->func))(c12)) {
1500 				default:
1501 					return;
1502 				case COMX:
1503 					goto comx;
1504 				case COM:
1505 					goto com;
1506 				}
1507 			}
1508 #ifdef FULLDEBUG
1509 			printf("FAIL\n");
1510 #endif /* FULLDEBUG */
1511 			break;
1512 		}
1513 	}
1514 	defcomline(c12);
1515 }
1516 
1517 int
1518 macsort(const void *p1, const void *p2)
1519 {
1520 	struct mactab *t1 = (struct mactab *)p1;
1521 	struct mactab *t2 = (struct mactab *)p2;
1522 
1523 	return(t1->macname - t2->macname);
1524 }
1525 
1526 int
1527 sizetab(struct mactab *mp)
1528 {
1529 	int i;
1530 
1531 	i = 0;
1532 	if (mp) {
1533 		for (; mp->macname; mp++, i++)
1534 			/*VOID*/ ;
1535 	}
1536 	return(i);
1537 }
1538 
1539 struct mactab *
1540 macfill(struct mactab *dst, struct mactab *src)
1541 {
1542 
1543 	if (src) {
1544 		while (src->macname)
1545 			*dst++ = *src++;
1546 	}
1547 	return(dst);
1548 }
1549 
1550 __dead void
1551 usage(void)
1552 {
1553 	extern char *__progname;
1554 
1555 	fprintf(stderr, "usage: %s [-ikpw] [-m a | e | l | m | s] [file ...]\n", __progname);
1556 	exit(1);
1557 }
1558 
1559 void
1560 buildtab(struct mactab **r_back, int *r_size)
1561 {
1562 	int	size;
1563 	struct	mactab	*p, *p1, *p2;
1564 	struct	mactab	*back;
1565 
1566 	size = sizetab(troffmactab) + sizetab(ppmactab);
1567 	p1 = p2 = NULL;
1568 	if (msflag) {
1569 		switch (mac) {
1570 		case ME:
1571 			p1 = memactab;
1572 			break;
1573 		case MM:
1574 			p1 = msmactab;
1575 			p2 = mmmactab;
1576 			break;
1577 		case MS:
1578 			p1 = msmactab;
1579 			break;
1580 		case MA:
1581 			p1 = manmactab;
1582 			break;
1583 		default:
1584 			break;
1585 		}
1586 	}
1587 	size += sizetab(p1);
1588 	size += sizetab(p2);
1589 	back = calloc(size+2, sizeof(struct mactab));
1590 	if (back == NULL)
1591 		err(1, NULL);
1592 
1593 	p = macfill(back, troffmactab);
1594 	p = macfill(p, ppmactab);
1595 	p = macfill(p, p1);
1596 	p = macfill(p, p2);
1597 
1598 	qsort(back, size, sizeof(struct mactab), macsort);
1599 	*r_size = size;
1600 	*r_back = back;
1601 }
1602 
1603 /*
1604  *	troff commands
1605  */
1606 struct	mactab	troffmactab[] = {
1607 	M(NONE,		'\\','"',	skip),	/* comment */
1608 	M(NOMAC,	'd','e',	domacro),	/* define */
1609 	M(NOMAC,	'i','g',	domacro),	/* ignore till .. */
1610 	M(NOMAC,	'a','m',	domacro),	/* append macro */
1611 	M(NBLK,		'n','f',	nf),	/* filled */
1612 	M(NBLK,		'c','e',	ce),	/* centered */
1613 
1614 	M(NONE,		's','o',	so),	/* source a file */
1615 	M(NONE,		'n','x',	nx),	/* go to next file */
1616 
1617 	M(NONE,		't','m',	skip),	/* print string on tty */
1618 	M(NONE,		'h','w',	skip),	/* exception hyphen words */
1619 	M(NONE,		0,0,		0)
1620 };
1621 
1622 /*
1623  *	Preprocessor output
1624  */
1625 struct	mactab	ppmactab[] = {
1626 	M(FNEST,	'E','Q',	EQ),	/* equation starting */
1627 	M(FNEST,	'T','S',	intbl),	/* table starting */
1628 	M(FNEST,	'T','C',	intbl),	/* alternative table? */
1629 	M(FNEST,	'T','&',	intbl),	/* table reformatting */
1630 	M(NONE,		'T','E',	outtbl),/* table ending */
1631 	M(NONE,		'P','S',	PS),	/* picture starting */
1632 	M(NONE,		0,0,		0)
1633 };
1634 
1635 /*
1636  *	Particular to ms and mm
1637  */
1638 struct	mactab	msmactab[] = {
1639 	M(NONE,		'T','L',	skiptocom),	/* title follows */
1640 	M(NONE,		'F','S',	skiptocom),	/* start footnote */
1641 	M(NONE,		'O','K',	skiptocom),	/* Other kws */
1642 
1643 	M(NONE,		'N','R',	skip),	/* undocumented */
1644 	M(NONE,		'N','D',	skip),	/* use supplied date */
1645 
1646 	M(PARAG,	'P','P',	PP),	/* begin parag */
1647 	M(PARAG,	'I','P',	PP),	/* begin indent parag, tag x */
1648 	M(PARAG,	'L','P',	PP),	/* left blocked parag */
1649 
1650 	M(NONE,		'A','U',	AU),	/* author */
1651 	M(NONE,		'A','I',	AU),	/* authors institution */
1652 
1653 	M(NONE,		'S','H',	SH),	/* section heading */
1654 	M(NONE,		'S','N',	SH),	/* undocumented */
1655 	M(NONE,		'U','X',	UX),	/* unix */
1656 
1657 	M(NBLK,		'D','S',	mssnblock),	/* start display text */
1658 	M(NBLK,		'K','S',	mssnblock),	/* start keep */
1659 	M(NBLK,		'K','F',	mssnblock),	/* start float keep */
1660 	M(NONE,		0,0,		0)
1661 };
1662 
1663 struct	mactab	mmmactab[] = {
1664 	M(NONE,		'H',' ',	MMHU),	/* -mm ? */
1665 	M(NONE,		'H','U',	MMHU),	/* -mm ? */
1666 	M(PARAG,	'P',' ',	PP),	/* paragraph for -mm */
1667 	M(NBLK,		'N','S',	mssnblock),	/* undocumented */
1668 	M(NONE,		0,0,		0)
1669 };
1670 
1671 struct	mactab	memactab[] = {
1672 	M(PARAG,	'p','p',	mepp),
1673 	M(PARAG,	'l','p',	mepp),
1674 	M(PARAG,	'n','p',	mepp),
1675 	M(NONE,		'i','p',	meip),
1676 
1677 	M(NONE,		's','h',	mesh),
1678 	M(NONE,		'u','h',	mesh),
1679 
1680 	M(NBLK,		'(','l',	mesnblock),
1681 	M(NBLK,		'(','q',	mesnblock),
1682 	M(NBLK,		'(','b',	mesnblock),
1683 	M(NBLK,		'(','z',	mesnblock),
1684 	M(NBLK,		'(','c',	mesnblock),
1685 
1686 	M(NBLK,		'(','d',	mesnblock),
1687 	M(NBLK,		'(','f',	mesnblock),
1688 	M(NBLK,		'(','x',	mesnblock),
1689 
1690 	M(NONE,		'r',' ',	mefont),
1691 	M(NONE,		'i',' ',	mefont),
1692 	M(NONE,		'b',' ',	mefont),
1693 	M(NONE,		'u',' ',	mefont),
1694 	M(NONE,		'q',' ',	mefont),
1695 	M(NONE,		'r','b',	mefont),
1696 	M(NONE,		'b','i',	mefont),
1697 	M(NONE,		'b','x',	mefont),
1698 	M(NONE,		0,0,		0)
1699 };
1700 
1701 struct	mactab	manmactab[] = {
1702 	M(PARAG,	'B','I',	manfont),
1703 	M(PARAG,	'B','R',	manfont),
1704 	M(PARAG,	'I','B',	manfont),
1705 	M(PARAG,	'I','R',	manfont),
1706 	M(PARAG,	'R','B',	manfont),
1707 	M(PARAG,	'R','I',	manfont),
1708 
1709 	M(PARAG,	'P','P',	manpp),
1710 	M(PARAG,	'L','P',	manpp),
1711 	M(PARAG,	'H','P',	manpp),
1712 	M(NONE,		0,0,		0)
1713 };
1714