1 /* $NetBSD: deroff.c,v 1.12 2019/02/03 03:19:29 mrg Exp $ */
2
3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
4
5 /*-
6 * Copyright (c) 1988, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33 /*
34 * Copyright (C) Caldera International Inc. 2001-2002.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code and documentation must retain the above
41 * copyright notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed or owned by Caldera
48 * International, Inc.
49 * 4. Neither the name of Caldera International, Inc. nor the names of other
50 * contributors may be used to endorse or promote products derived from
51 * this software without specific prior written permission.
52 *
53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67 #include <sys/cdefs.h>
68 __RCSID("$NetBSD: deroff.c,v 1.12 2019/02/03 03:19:29 mrg Exp $");
69
70 #include <err.h>
71 #include <limits.h>
72 #include <stddef.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <unistd.h>
77
78 /*
79 * Deroff command -- strip troff, eqn, and Tbl sequences from
80 * a file. Has two flags argument, -w, to cause output one word per line
81 * rather than in the original format.
82 * -mm (or -ms) causes the corresponding macro's to be interpreted
83 * so that just sentences are output
84 * -ml also gets rid of lists.
85 * Deroff follows .so and .nx commands, removes contents of macro
86 * definitions, equations (both .EQ ... .EN and $...$),
87 * Tbl command sequences, and Troff backslash constructions.
88 *
89 * All input is through the Cget macro;
90 * the most recently read character is in c.
91 *
92 * Modified by Robert Henry to process -me and -man macros.
93 */
94
95 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
96 #define C1get ( (c=getc(infile)) == EOF ? eof() : c)
97
98 #ifdef DEBUG
99 # define C _C()
100 # define C1 _C1()
101 #else /* not DEBUG */
102 # define C Cget
103 # define C1 C1get
104 #endif /* not DEBUG */
105
106 #define SKIP while (C != '\n')
107 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
108
109 #define YES 1
110 #define NO 0
111 #define MS 0 /* -ms */
112 #define MM 1 /* -mm */
113 #define ME 2 /* -me */
114 #define MA 3 /* -man */
115
116 #ifdef DEBUG
117 static char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
118 #endif /* DEBUG */
119
120 #define ONE 1
121 #define TWO 2
122
123 #define NOCHAR -2
124 #define SPECIAL 0
125 #define APOS 1
126 #define PUNCT 2
127 #define DIGIT 3
128 #define LETTER 4
129
130 #define MAXFILES 20
131
132 static int iflag;
133 static int wordflag;
134 static int msflag; /* processing a source written using a mac package */
135 static int mac; /* which package */
136 static int disp;
137 static int parag;
138 static int inmacro;
139 static int intable;
140 static int keepblock; /* keep blocks of text; normally false when msflag */
141
142 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
143
144 static char line[LINE_MAX];
145 static char *lp;
146
147 static int c;
148 static int pc;
149 static int ldelim;
150 static int rdelim;
151
152 static char fname[PATH_MAX];
153 static FILE *files[MAXFILES];
154 static FILE **filesp;
155 static FILE *infile;
156
157 static int argc;
158 static char **argv;
159
160 /*
161 * Macro processing
162 *
163 * Macro table definitions
164 */
165 typedef int pacmac; /* compressed macro name */
166 static int argconcat = 0; /* concat arguments together (-me only) */
167
168 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
169 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF), __USE(c1), __USE(c2))
170
171 struct mactab {
172 int condition;
173 pacmac macname;
174 int (*func)(pacmac);
175 };
176
177 static const struct mactab troffmactab[];
178 static const struct mactab ppmactab[];
179 static const struct mactab msmactab[];
180 static const struct mactab mmmactab[];
181 static const struct mactab memactab[];
182 static const struct mactab manmactab[];
183
184 /*
185 * Macro table initialization
186 */
187 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
188
189 /*
190 * Flags for matching conditions other than
191 * the macro name
192 */
193 #define NONE 0
194 #define FNEST 1 /* no nested files */
195 #define NOMAC 2 /* no macro */
196 #define MAC 3 /* macro */
197 #define PARAG 4 /* in a paragraph */
198 #define MSF 5 /* msflag is on */
199 #define NBLK 6 /* set if no blocks to be kept */
200
201 /*
202 * Return codes from macro minions, determine where to jump,
203 * how to repeat/reprocess text
204 */
205 #define COMX 1 /* goto comx */
206 #define COM 2 /* goto com */
207
208 static int skeqn(void);
209 static int eof(void);
210 #ifdef DEBUG
211 static int _C1(void);
212 static int _C(void);
213 #endif
214 static int EQ(pacmac);
215 static int domacro(pacmac);
216 static int PS(pacmac);
217 static int skip(pacmac);
218 static int intbl(pacmac);
219 static int outtbl(pacmac);
220 static int so(pacmac);
221 static int nx(pacmac);
222 static int skiptocom(pacmac);
223 static int PP(pacmac);
224 static int AU(pacmac);
225 static int SH(pacmac);
226 static int UX(pacmac);
227 static int MMHU(pacmac);
228 static int mesnblock(pacmac);
229 static int mssnblock(pacmac);
230 static int nf(pacmac);
231 static int ce(pacmac);
232 static int meip(pacmac);
233 static int mepp(pacmac);
234 static int mesh(pacmac);
235 static int mefont(pacmac);
236 static int manfont(pacmac);
237 static int manpp(pacmac);
238 static int macsort(const void *, const void *);
239 static int sizetab(const struct mactab *);
240 static void getfname(void);
241 static void textline(char *, int);
242 static void work(void) __dead;
243 static void regline(void (*)(char *, int), int);
244 static void macro(void);
245 static void tbl(void);
246 static void stbl(void);
247 static void eqn(void);
248 static void backsl(void);
249 static void sce(void);
250 static void refer(int);
251 static void inpic(void);
252 static void msputmac(char *, int);
253 static void msputwords(int);
254 static void meputmac(char *, int);
255 static void meputwords(int);
256 static void noblock(char, char);
257 static void defcomline(pacmac);
258 static void comline(void);
259 static void buildtab(const struct mactab **, int *);
260 static FILE *opn(char *);
261 static struct mactab *macfill(struct mactab *, const struct mactab *);
262 static void usage(void) __dead;
263
264 int
main(int ac,char ** av)265 main(int ac, char **av)
266 {
267 int i, ch;
268 int errflg = 0;
269 int kflag = NO;
270
271 iflag = NO;
272 wordflag = NO;
273 msflag = NO;
274 mac = ME;
275 disp = NO;
276 parag = NO;
277 inmacro = NO;
278 intable = NO;
279 ldelim = NOCHAR;
280 rdelim = NOCHAR;
281 keepblock = YES;
282
283 while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
284 switch (ch) {
285 case 'i':
286 iflag = YES;
287 break;
288 case 'k':
289 kflag = YES;
290 break;
291 case 'm':
292 msflag = YES;
293 keepblock = NO;
294 switch (optarg[0]) {
295 case 'm':
296 mac = MM;
297 break;
298 case 's':
299 mac = MS;
300 break;
301 case 'e':
302 mac = ME;
303 break;
304 case 'a':
305 mac = MA;
306 break;
307 case 'l':
308 disp = YES;
309 break;
310 default:
311 errflg++;
312 break;
313 }
314 if (errflg == 0 && optarg[1] != '\0')
315 errflg++;
316 break;
317 case 'p':
318 parag = YES;
319 break;
320 case 'w':
321 wordflag = YES;
322 kflag = YES;
323 break;
324 default:
325 errflg++;
326 }
327 }
328 argc = ac - optind;
329 argv = av + optind;
330
331 if (kflag)
332 keepblock = YES;
333 if (errflg)
334 usage();
335
336 #ifdef DEBUG
337 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
338 msflag, mactab[mac], keepblock, disp);
339 #endif /* DEBUG */
340 if (argc == 0) {
341 infile = stdin;
342 } else {
343 infile = opn(argv[0]);
344 --argc;
345 ++argv;
346 }
347 files[0] = infile;
348 filesp = &files[0];
349
350 for (i = 'a'; i <= 'z' ; ++i)
351 chars[i] = LETTER;
352 for (i = 'A'; i <= 'Z'; ++i)
353 chars[i] = LETTER;
354 for (i = '0'; i <= '9'; ++i)
355 chars[i] = DIGIT;
356 chars['\''] = APOS;
357 chars['&'] = APOS;
358 chars['.'] = PUNCT;
359 chars[','] = PUNCT;
360 chars[';'] = PUNCT;
361 chars['?'] = PUNCT;
362 chars[':'] = PUNCT;
363 work();
364 return 0;
365 }
366
367 static int
skeqn(void)368 skeqn(void)
369 {
370
371 while ((c = getc(infile)) != rdelim) {
372 if (c == EOF)
373 c = eof();
374 else if (c == '"') {
375 while ((c = getc(infile)) != '"') {
376 if (c == EOF ||
377 (c == '\\' && (c = getc(infile)) == EOF))
378 c = eof();
379 }
380 }
381 }
382 if (msflag)
383 return c == 'x';
384 return c == ' ';
385 }
386
387 static FILE *
opn(char * p)388 opn(char *p)
389 {
390 FILE *fd;
391
392 if ((fd = fopen(p, "r")) == NULL)
393 err(1, "fopen %s", p);
394
395 return fd;
396 }
397
398 static int
eof(void)399 eof(void)
400 {
401
402 if (infile != stdin)
403 fclose(infile);
404 if (filesp > files)
405 infile = *--filesp;
406 else if (argc > 0) {
407 infile = opn(argv[0]);
408 --argc;
409 ++argv;
410 } else
411 exit(0);
412 return C;
413 }
414
415 static void
getfname(void)416 getfname(void)
417 {
418 char *p;
419 struct chain {
420 struct chain *nextp;
421 char *datap;
422 } *q;
423 static struct chain *namechain= NULL;
424
425 while (C == ' ')
426 ; /* nothing */
427
428 for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) &&
429 (*p = c) != '\n' &&
430 c != ' ' && c != '\t' && c != '\\'; ++p)
431 C;
432 *p = '\0';
433 while (c != '\n')
434 C;
435
436 /* see if this name has already been used */
437 for (q = namechain ; q; q = q->nextp)
438 if (strcmp(fname, q->datap) == 0) {
439 fname[0] = '\0';
440 return;
441 }
442
443 q = (struct chain *) malloc(sizeof(struct chain));
444 if (q == NULL)
445 err(1, NULL);
446 q->nextp = namechain;
447 q->datap = strdup(fname);
448 if (q->datap == NULL)
449 err(1, NULL);
450 namechain = q;
451 }
452
453 /*ARGSUSED*/
454 static void
textline(char * str,int constant)455 textline(char *str, int constant)
456 {
457
458 if (wordflag) {
459 msputwords(0);
460 return;
461 }
462 puts(str);
463 }
464
465 static void
work(void)466 work(void)
467 {
468
469 for (;;) {
470 C;
471 #ifdef FULLDEBUG
472 printf("Starting work with `%c'\n", c);
473 #endif /* FULLDEBUG */
474 if (c == '.' || c == '\'')
475 comline();
476 else
477 regline(textline, TWO);
478 }
479 }
480
481 static void
regline(void (* pfunc)(char *,int),int constant)482 regline(void (*pfunc)(char *, int), int constant)
483 {
484
485 line[0] = c;
486 lp = line;
487 while (lp - line < (ptrdiff_t)sizeof(line)) {
488 if (c == '\\') {
489 *lp = ' ';
490 backsl();
491 }
492 if (c == '\n')
493 break;
494 if (intable && c == 'T') {
495 *++lp = C;
496 if (c == '{' || c == '}') {
497 lp[-1] = ' ';
498 *lp = C;
499 }
500 } else {
501 *++lp = C;
502 }
503 }
504 *lp = '\0';
505
506 if (line[0] != '\0')
507 (*pfunc)(line, constant);
508 }
509
510 static void
macro(void)511 macro(void)
512 {
513
514 if (msflag) {
515 do {
516 SKIP;
517 } while (C!='.' || C!='.' || C=='.'); /* look for .. */
518 if (c != '\n')
519 SKIP;
520 return;
521 }
522 SKIP;
523 inmacro = YES;
524 }
525
526 static void
tbl(void)527 tbl(void)
528 {
529
530 while (C != '.')
531 ; /* nothing */
532 SKIP;
533 intable = YES;
534 }
535
536 static void
stbl(void)537 stbl(void)
538 {
539
540 while (C != '.')
541 ; /* nothing */
542 SKIP_TO_COM;
543 if (c != 'T' || C != 'E') {
544 SKIP;
545 pc = c;
546 while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
547 pc = c;
548 }
549 }
550
551 static void
eqn(void)552 eqn(void)
553 {
554 int c1, c2;
555 int dflg;
556 char last;
557
558 last=0;
559 dflg = 1;
560 SKIP;
561
562 for (;;) {
563 if (C1 == '.' || c == '\'') {
564 while (C1 == ' ' || c == '\t')
565 ;
566 if (c == 'E' && C1 == 'N') {
567 SKIP;
568 if (msflag && dflg) {
569 putchar('x');
570 putchar(' ');
571 if (last) {
572 putchar(last);
573 putchar('\n');
574 }
575 }
576 return;
577 }
578 } else if (c == 'd') {
579 /* look for delim */
580 if (C1 == 'e' && C1 == 'l')
581 if (C1 == 'i' && C1 == 'm') {
582 while (C1 == ' ')
583 ; /* nothing */
584
585 if ((c1 = c) == '\n' ||
586 (c2 = C1) == '\n' ||
587 (c1 == 'o' && c2 == 'f' && C1=='f')) {
588 ldelim = NOCHAR;
589 rdelim = NOCHAR;
590 } else {
591 ldelim = c1;
592 rdelim = c2;
593 }
594 }
595 dflg = 0;
596 }
597
598 if (c != '\n')
599 while (C1 != '\n') {
600 if (chars[c] == PUNCT)
601 last = c;
602 else if (c != ' ')
603 last = 0;
604 }
605 }
606 }
607
608 /* skip over a complete backslash construction */
609 static void
backsl(void)610 backsl(void)
611 {
612 int bdelim;
613
614 sw:
615 switch (C) {
616 case '"':
617 SKIP;
618 return;
619
620 case 's':
621 if (C == '\\')
622 backsl();
623 else {
624 while (C >= '0' && c <= '9')
625 ; /* nothing */
626 ungetc(c, infile);
627 c = '0';
628 }
629 --lp;
630 return;
631
632 case 'f':
633 case 'n':
634 case '*':
635 if (C != '(')
636 return;
637
638 /* FALLTHROUGH */
639 case '(':
640 if (msflag) {
641 if (C == 'e') {
642 if (C == 'm') {
643 *lp = '-';
644 return;
645 }
646 }
647 else if (c != '\n')
648 C;
649 return;
650 }
651 if (C != '\n')
652 C;
653 return;
654
655 case '$':
656 C; /* discard argument number */
657 return;
658
659 case 'b':
660 case 'x':
661 case 'v':
662 case 'h':
663 case 'w':
664 case 'o':
665 case 'l':
666 case 'L':
667 if ((bdelim = C) == '\n')
668 return;
669 while (C != '\n' && c != bdelim)
670 if (c == '\\')
671 backsl();
672 return;
673
674 case '\\':
675 if (inmacro)
676 goto sw;
677
678 default:
679 return;
680 }
681 }
682
683 static void
sce(void)684 sce(void)
685 {
686 char *ap;
687 int n, i;
688 char a[10];
689
690 for (ap = a; C != '\n'; ap++) {
691 *ap = c;
692 if (ap == &a[9]) {
693 SKIP;
694 ap = a;
695 break;
696 }
697 }
698 if (ap != a)
699 n = atoi(a);
700 else
701 n = 1;
702 for (i = 0; i < n;) {
703 if (C == '.') {
704 if (C == 'c') {
705 if (C == 'e') {
706 while (C == ' ')
707 ; /* nothing */
708 if (c == '0') {
709 SKIP;
710 break;
711 } else
712 SKIP;
713 }
714 else
715 SKIP;
716 } else if (c == 'P' || C == 'P') {
717 if (c != '\n')
718 SKIP;
719 break;
720 } else if (c != '\n')
721 SKIP;
722 } else {
723 SKIP;
724 i++;
725 }
726 }
727 }
728
729 static void
refer(int c1)730 refer(int c1)
731 {
732 int c2;
733
734 if (c1 != '\n')
735 SKIP;
736
737 for (c2 = -1;;) {
738 if (C != '.')
739 SKIP;
740 else {
741 if (C != ']')
742 SKIP;
743 else {
744 while (C != '\n')
745 c2 = c;
746 if (c2 != -1 && chars[c2] == PUNCT)
747 putchar(c2);
748 return;
749 }
750 }
751 }
752 }
753
754 static void
inpic(void)755 inpic(void)
756 {
757 int c1;
758 char *p1;
759
760 SKIP;
761 p1 = line;
762 c = '\n';
763 for (;;) {
764 c1 = c;
765 if (C == '.' && c1 == '\n') {
766 if (C != 'P') {
767 if (c == '\n')
768 continue;
769 else {
770 SKIP;
771 c = '\n';
772 continue;
773 }
774 }
775 if (C != 'E') {
776 if (c == '\n')
777 continue;
778 else {
779 SKIP;
780 c = '\n';
781 continue;
782 }
783 }
784 SKIP;
785 return;
786 }
787 else if (c == '\"') {
788 while (C != '\"') {
789 if (c == '\\') {
790 if (C == '\"')
791 continue;
792 ungetc(c, infile);
793 backsl();
794 } else
795 *p1++ = c;
796 }
797 *p1++ = ' ';
798 }
799 else if (c == '\n' && p1 != line) {
800 *p1 = '\0';
801 if (wordflag)
802 msputwords(NO);
803 else {
804 puts(line);
805 putchar('\n');
806 }
807 p1 = line;
808 }
809 }
810 }
811
812 #ifdef DEBUG
813 static int
_C1(void)814 _C1(void)
815 {
816
817 return C1get;
818 }
819
820 static int
_C(void)821 _C(void)
822 {
823
824 return Cget;
825 }
826 #endif /* DEBUG */
827
828 /*
829 * Put out a macro line, using ms and mm conventions.
830 */
831 static void
msputmac(char * s,int constant)832 msputmac(char *s, int constant)
833 {
834 char *t;
835 int found;
836 int last;
837
838 last = 0;
839 found = 0;
840 if (wordflag) {
841 msputwords(YES);
842 return;
843 }
844 while (*s) {
845 while (*s == ' ' || *s == '\t')
846 putchar(*s++);
847 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
848 ; /* nothing */
849 if (*s == '\"')
850 s++;
851 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
852 chars[(unsigned char)s[1]] == LETTER) {
853 while (s < t)
854 if (*s == '\"')
855 s++;
856 else
857 putchar(*s++);
858 last = *(t-1);
859 found++;
860 } else if (found && chars[(unsigned char)s[0]] == PUNCT &&
861 s[1] == '\0') {
862 putchar(*s++);
863 } else {
864 last = *(t - 1);
865 s = t;
866 }
867 }
868 putchar('\n');
869 if (msflag && chars[last] == PUNCT) {
870 putchar(last);
871 putchar('\n');
872 }
873 }
874
875 /*
876 * put out words (for the -w option) with ms and mm conventions
877 */
878 static void
msputwords(int macline)879 msputwords(int macline)
880 {
881 char *p, *p1;
882 int i, nlet;
883
884 for (p1 = line;;) {
885 /*
886 * skip initial specials ampersands and apostrophes
887 */
888 while (chars[(unsigned char)*p1] < DIGIT)
889 if (*p1++ == '\0')
890 return;
891 nlet = 0;
892 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
893 if (i == LETTER)
894 ++nlet;
895
896 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
897 /*
898 * delete trailing ampersands and apostrophes
899 */
900 while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
901 i == APOS )
902 --p;
903 while (p1 < p)
904 putchar(*p1++);
905 putchar('\n');
906 } else {
907 p1 = p;
908 }
909 }
910 }
911
912 /*
913 * put out a macro using the me conventions
914 */
915 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; }
916 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
917
918 static void
meputmac(char * cp,int constant)919 meputmac(char *cp, int constant)
920 {
921 char *np;
922 int found;
923 int argno;
924 int last;
925 int inquote;
926
927 last = 0;
928 found = 0;
929 if (wordflag) {
930 meputwords(YES);
931 return;
932 }
933 for (argno = 0; *cp; argno++) {
934 SKIPBLANK(cp);
935 inquote = (*cp == '"');
936 if (inquote)
937 cp++;
938 for (np = cp; *np; np++) {
939 switch (*np) {
940 case '\n':
941 case '\0':
942 break;
943
944 case '\t':
945 case ' ':
946 if (inquote)
947 continue;
948 else
949 goto endarg;
950
951 case '"':
952 if (inquote && np[1] == '"') {
953 memmove(np, np + 1, strlen(np));
954 np++;
955 continue;
956 } else {
957 *np = ' '; /* bye bye " */
958 goto endarg;
959 }
960
961 default:
962 continue;
963 }
964 }
965 endarg: ;
966 /*
967 * cp points at the first char in the arg
968 * np points one beyond the last char in the arg
969 */
970 if ((argconcat == 0) || (argconcat != argno))
971 putchar(' ');
972 #ifdef FULLDEBUG
973 {
974 char *p;
975 printf("[%d,%d: ", argno, np - cp);
976 for (p = cp; p < np; p++) {
977 putchar(*p);
978 }
979 printf("]");
980 }
981 #endif /* FULLDEBUG */
982 /*
983 * Determine if the argument merits being printed
984 *
985 * constant is the cut off point below which something
986 * is not a word.
987 */
988 if (((np - cp) > constant) &&
989 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
990 for (; cp < np; cp++)
991 putchar(*cp);
992 last = np[-1];
993 found++;
994 } else if (found && (np - cp == 1) &&
995 chars[(unsigned char)*cp] == PUNCT) {
996 putchar(*cp);
997 } else {
998 last = np[-1];
999 }
1000 cp = np;
1001 }
1002 if (msflag && chars[last] == PUNCT)
1003 putchar(last);
1004 putchar('\n');
1005 }
1006
1007 /*
1008 * put out words (for the -w option) with ms and mm conventions
1009 */
1010 static void
meputwords(int macline)1011 meputwords(int macline)
1012 {
1013
1014 msputwords(macline);
1015 }
1016
1017 /*
1018 *
1019 * Skip over a nested set of macros
1020 *
1021 * Possible arguments to noblock are:
1022 *
1023 * fi end of unfilled text
1024 * PE pic ending
1025 * DE display ending
1026 *
1027 * for ms and mm only:
1028 * KE keep ending
1029 *
1030 * NE undocumented match to NS (for mm?)
1031 * LE mm only: matches RL or *L (for lists)
1032 *
1033 * for me:
1034 * ([lqbzcdf]
1035 */
1036 static void
noblock(char a1,char a2)1037 noblock(char a1, char a2)
1038 {
1039 int c1,c2;
1040 int eqnf;
1041 int lct;
1042
1043 lct = 0;
1044 eqnf = 1;
1045 SKIP;
1046 for (;;) {
1047 while (C != '.')
1048 if (c == '\n')
1049 continue;
1050 else
1051 SKIP;
1052 if ((c1 = C) == '\n')
1053 continue;
1054 if ((c2 = C) == '\n')
1055 continue;
1056 if (c1 == a1 && c2 == a2) {
1057 SKIP;
1058 if (lct != 0) {
1059 lct--;
1060 continue;
1061 }
1062 if (eqnf)
1063 putchar('.');
1064 putchar('\n');
1065 return;
1066 } else if (a1 == 'L' && c2 == 'L') {
1067 lct++;
1068 SKIP;
1069 }
1070 /*
1071 * equations (EQ) nested within a display
1072 */
1073 else if (c1 == 'E' && c2 == 'Q') {
1074 if ((mac == ME && a1 == ')')
1075 || (mac != ME && a1 == 'D')) {
1076 eqn();
1077 eqnf=0;
1078 }
1079 }
1080 /*
1081 * turning on filling is done by the paragraphing
1082 * macros
1083 */
1084 else if (a1 == 'f') { /* .fi */
1085 if ((mac == ME && (c2 == 'h' || c2 == 'p'))
1086 || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1087 SKIP;
1088 return;
1089 }
1090 } else {
1091 SKIP;
1092 }
1093 }
1094 }
1095
1096 static int
1097 /*ARGSUSED*/
EQ(pacmac unused)1098 EQ(pacmac unused)
1099 {
1100
1101 eqn();
1102 return 0;
1103 }
1104
1105 static int
1106 /*ARGSUSED*/
domacro(pacmac unused)1107 domacro(pacmac unused)
1108 {
1109
1110 macro();
1111 return 0;
1112 }
1113
1114 static int
1115 /*ARGSUSED*/
PS(pacmac unused)1116 PS(pacmac unused)
1117 {
1118
1119 for (C; c == ' ' || c == '\t'; C)
1120 ; /* nothing */
1121
1122 if (c == '<') { /* ".PS < file" -- don't expect a .PE */
1123 SKIP;
1124 return 0;
1125 }
1126 if (!msflag)
1127 inpic();
1128 else
1129 noblock('P', 'E');
1130 return 0;
1131 }
1132
1133 static int
1134 /*ARGSUSED*/
skip(pacmac unused)1135 skip(pacmac unused)
1136 {
1137
1138 SKIP;
1139 return 0;
1140 }
1141
1142 static int
1143 /*ARGSUSED*/
intbl(pacmac unused)1144 intbl(pacmac unused)
1145 {
1146
1147 if (msflag)
1148 stbl();
1149 else
1150 tbl();
1151 return 0;
1152 }
1153
1154 static int
1155 /*ARGSUSED*/
outtbl(pacmac unused)1156 outtbl(pacmac unused)
1157 {
1158
1159 intable = NO;
1160 return 0;
1161 }
1162
1163 static int
1164 /*ARGSUSED*/
so(pacmac unused)1165 so(pacmac unused)
1166 {
1167
1168 if (!iflag) {
1169 getfname();
1170 if (fname[0]) {
1171 if (++filesp - &files[0] > MAXFILES)
1172 err(1, "too many nested files (max %d)",
1173 MAXFILES);
1174 infile = *filesp = opn(fname);
1175 }
1176 }
1177 return 0;
1178 }
1179
1180 static int
1181 /*ARGSUSED*/
nx(pacmac unused)1182 nx(pacmac unused)
1183 {
1184
1185 if (!iflag) {
1186 getfname();
1187 if (fname[0] == '\0')
1188 exit(0);
1189 if (infile != stdin)
1190 fclose(infile);
1191 infile = *filesp = opn(fname);
1192 }
1193 return 0;
1194 }
1195
1196 static int
1197 /*ARGSUSED*/
skiptocom(pacmac unused)1198 skiptocom(pacmac unused)
1199 {
1200
1201 SKIP_TO_COM;
1202 return COMX;
1203 }
1204
1205 static int
PP(pacmac c12)1206 PP(pacmac c12)
1207 {
1208 int c1, c2;
1209
1210 frommac(c12, c1, c2);
1211 printf(".%c%c", c1, c2);
1212 while (C != '\n')
1213 putchar(c);
1214 putchar('\n');
1215 return 0;
1216 }
1217
1218 static int
1219 /*ARGSUSED*/
AU(pacmac unused)1220 AU(pacmac unused)
1221 {
1222
1223 if (mac == MM)
1224 return 0;
1225 SKIP_TO_COM;
1226 return COMX;
1227 }
1228
1229 static int
SH(pacmac c12)1230 SH(pacmac c12)
1231 {
1232 int c1, c2;
1233
1234 frommac(c12, c1, c2);
1235
1236 if (parag) {
1237 printf(".%c%c", c1, c2);
1238 while (C != '\n')
1239 putchar(c);
1240 putchar(c);
1241 putchar('!');
1242 for (;;) {
1243 while (C != '\n')
1244 putchar(c);
1245 putchar('\n');
1246 if (C == '.')
1247 return COM;
1248 putchar('!');
1249 putchar(c);
1250 }
1251 /*NOTREACHED*/
1252 } else {
1253 SKIP_TO_COM;
1254 return COMX;
1255 }
1256 }
1257
1258 static int
1259 /*ARGSUSED*/
UX(pacmac unused)1260 UX(pacmac unused)
1261 {
1262
1263 if (wordflag)
1264 printf("UNIX\n");
1265 else
1266 printf("UNIX ");
1267 return 0;
1268 }
1269
1270 static int
MMHU(pacmac c12)1271 MMHU(pacmac c12)
1272 {
1273 int c1, c2;
1274
1275 frommac(c12, c1, c2);
1276 if (parag) {
1277 printf(".%c%c", c1, c2);
1278 while (C != '\n')
1279 putchar(c);
1280 putchar('\n');
1281 } else {
1282 SKIP;
1283 }
1284 return 0;
1285 }
1286
1287 static int
mesnblock(pacmac c12)1288 mesnblock(pacmac c12)
1289 {
1290 int c1, c2;
1291
1292 frommac(c12, c1, c2);
1293 noblock(')', c2);
1294 return 0;
1295 }
1296
1297 static int
mssnblock(pacmac c12)1298 mssnblock(pacmac c12)
1299 {
1300 int c1, c2;
1301
1302 frommac(c12, c1, c2);
1303 noblock(c1, 'E');
1304 return 0;
1305 }
1306
1307 static int
1308 /*ARGUSED*/
nf(pacmac unused)1309 nf(pacmac unused)
1310 {
1311
1312 noblock('f', 'i');
1313 return 0;
1314 }
1315
1316 static int
1317 /*ARGUSED*/
ce(pacmac unused)1318 ce(pacmac unused)
1319 {
1320
1321 sce();
1322 return 0;
1323 }
1324
1325 static int
meip(pacmac c12)1326 meip(pacmac c12)
1327 {
1328
1329 if (parag)
1330 mepp(c12);
1331 else if (wordflag) /* save the tag */
1332 regline(meputmac, ONE);
1333 else
1334 SKIP;
1335 return 0;
1336 }
1337
1338 /*
1339 * only called for -me .pp or .sh, when parag is on
1340 */
1341 static int
mepp(pacmac c12)1342 mepp(pacmac c12)
1343 {
1344
1345 PP(c12); /* eats the line */
1346 return 0;
1347 }
1348
1349 /*
1350 * Start of a section heading; output the section name if doing words
1351 */
1352 static int
mesh(pacmac c12)1353 mesh(pacmac c12)
1354 {
1355
1356 if (parag)
1357 mepp(c12);
1358 else if (wordflag)
1359 defcomline(c12);
1360 else
1361 SKIP;
1362 return 0;
1363 }
1364
1365 /*
1366 * process a font setting
1367 */
1368 static int
mefont(pacmac c12)1369 mefont(pacmac c12)
1370 {
1371
1372 argconcat = 1;
1373 defcomline(c12);
1374 argconcat = 0;
1375 return 0;
1376 }
1377
1378 static int
manfont(pacmac c12)1379 manfont(pacmac c12)
1380 {
1381
1382 return mefont(c12);
1383 }
1384
1385 static int
manpp(pacmac c12)1386 manpp(pacmac c12)
1387 {
1388
1389 return mepp(c12);
1390 }
1391
1392 static void
defcomline(pacmac c12)1393 defcomline(pacmac c12)
1394 {
1395 int c1, c2;
1396
1397 frommac(c12, c1, c2);
1398 if (msflag && mac == MM && c2 == 'L') {
1399 if (disp || c1 == 'R') {
1400 noblock('L', 'E');
1401 } else {
1402 SKIP;
1403 putchar('.');
1404 }
1405 }
1406 else if (c1 == '.' && c2 == '.') {
1407 if (msflag) {
1408 SKIP;
1409 return;
1410 }
1411 while (C == '.')
1412 /*VOID*/;
1413 }
1414 ++inmacro;
1415 /*
1416 * Process the arguments to the macro
1417 */
1418 switch (mac) {
1419 default:
1420 case MM:
1421 case MS:
1422 if (c1 <= 'Z' && msflag)
1423 regline(msputmac, ONE);
1424 else
1425 regline(msputmac, TWO);
1426 break;
1427 case ME:
1428 regline(meputmac, ONE);
1429 break;
1430 }
1431 --inmacro;
1432 }
1433
1434 static void
comline(void)1435 comline(void)
1436 {
1437 int c1;
1438 int c2;
1439 pacmac c12;
1440 int mid;
1441 int lb, ub;
1442 int hit;
1443 static int tabsize = 0;
1444 static const struct mactab *mactab = NULL;
1445 const struct mactab *mp;
1446
1447 if (mactab == 0)
1448 buildtab(&mactab, &tabsize);
1449 com:
1450 while (C == ' ' || c == '\t')
1451 ;
1452 comx:
1453 if ((c1 = c) == '\n')
1454 return;
1455 c2 = C;
1456 if (c1 == '.' && c2 != '.')
1457 inmacro = NO;
1458 if (msflag && c1 == '[') {
1459 refer(c2);
1460 return;
1461 }
1462 if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1463 printf(".P\n");
1464 return;
1465 }
1466 if (c2 == '\n')
1467 return;
1468 /*
1469 * Single letter macro
1470 */
1471 if (mac == ME && (c2 == ' ' || c2 == '\t') )
1472 c2 = ' ';
1473 c12 = tomac(c1, c2);
1474 /*
1475 * binary search through the table of macros
1476 */
1477 lb = 0;
1478 ub = tabsize - 1;
1479 while (lb <= ub) {
1480 mid = (ub + lb) / 2;
1481 mp = &mactab[mid];
1482 if (mp->macname < c12)
1483 lb = mid + 1;
1484 else if (mp->macname > c12)
1485 ub = mid - 1;
1486 else {
1487 hit = 1;
1488 #ifdef FULLDEBUG
1489 printf("preliminary hit macro %c%c ", c1, c2);
1490 #endif /* FULLDEBUG */
1491 switch (mp->condition) {
1492 case NONE:
1493 hit = YES;
1494 break;
1495 case FNEST:
1496 hit = (filesp == files);
1497 break;
1498 case NOMAC:
1499 hit = !inmacro;
1500 break;
1501 case MAC:
1502 hit = inmacro;
1503 break;
1504 case PARAG:
1505 hit = parag;
1506 break;
1507 case NBLK:
1508 hit = !keepblock;
1509 break;
1510 default:
1511 hit = 0;
1512 }
1513
1514 if (hit) {
1515 #ifdef FULLDEBUG
1516 printf("MATCH\n");
1517 #endif /* FULLDEBUG */
1518 switch ((*(mp->func))(c12)) {
1519 default:
1520 return;
1521 case COMX:
1522 goto comx;
1523 case COM:
1524 goto com;
1525 }
1526 }
1527 #ifdef FULLDEBUG
1528 printf("FAIL\n");
1529 #endif /* FULLDEBUG */
1530 break;
1531 }
1532 }
1533 defcomline(c12);
1534 }
1535
1536 static int
macsort(const void * p1,const void * p2)1537 macsort(const void *p1, const void *p2)
1538 {
1539 const struct mactab *t1 = p1;
1540 const struct mactab *t2 = p2;
1541
1542 return t1->macname - t2->macname;
1543 }
1544
1545 static int
sizetab(const struct mactab * mp)1546 sizetab(const struct mactab *mp)
1547 {
1548 int i;
1549
1550 i = 0;
1551 if (mp) {
1552 for (; mp->macname; mp++, i++)
1553 /*VOID*/ ;
1554 }
1555 return i;
1556 }
1557
1558 static struct mactab *
macfill(struct mactab * dst,const struct mactab * src)1559 macfill(struct mactab *dst, const struct mactab *src)
1560 {
1561
1562 if (src) {
1563 while (src->macname)
1564 *dst++ = *src++;
1565 }
1566 return dst;
1567 }
1568
1569 static void
usage(void)1570 usage(void)
1571 {
1572 extern char *__progname;
1573
1574 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname);
1575 exit(1);
1576 }
1577
1578 static void
buildtab(const struct mactab ** r_back,int * r_size)1579 buildtab(const struct mactab **r_back, int *r_size)
1580 {
1581 size_t size;
1582 const struct mactab *p1, *p2;
1583 struct mactab *back, *p;
1584
1585 size = sizetab(troffmactab) + sizetab(ppmactab);
1586 p1 = p2 = NULL;
1587 if (msflag) {
1588 switch (mac) {
1589 case ME:
1590 p1 = memactab;
1591 break;
1592 case MM:
1593 p1 = msmactab;
1594 p2 = mmmactab;
1595 break;
1596 case MS:
1597 p1 = msmactab;
1598 break;
1599 case MA:
1600 p1 = manmactab;
1601 break;
1602 default:
1603 break;
1604 }
1605 }
1606 size += sizetab(p1);
1607 size += sizetab(p2);
1608 back = calloc(size + 2, sizeof(struct mactab));
1609 if (back == NULL)
1610 err(1, NULL);
1611
1612 p = macfill(back, troffmactab);
1613 p = macfill(p, ppmactab);
1614 p = macfill(p, p1);
1615 p = macfill(p, p2);
1616
1617 qsort(back, size, sizeof(struct mactab), macsort);
1618 *r_size = size;
1619 *r_back = back;
1620 }
1621
1622 /*
1623 * troff commands
1624 */
1625 static const struct mactab troffmactab[] = {
1626 M(NONE, '\\','"', skip), /* comment */
1627 M(NOMAC, 'd','e', domacro), /* define */
1628 M(NOMAC, 'i','g', domacro), /* ignore till .. */
1629 M(NOMAC, 'a','m', domacro), /* append macro */
1630 M(NBLK, 'n','f', nf), /* filled */
1631 M(NBLK, 'c','e', ce), /* centered */
1632
1633 M(NONE, 's','o', so), /* source a file */
1634 M(NONE, 'n','x', nx), /* go to next file */
1635
1636 M(NONE, 't','m', skip), /* print string on tty */
1637 M(NONE, 'h','w', skip), /* exception hyphen words */
1638 M(NONE, 0,0, 0)
1639 };
1640
1641 /*
1642 * Preprocessor output
1643 */
1644 static const struct mactab ppmactab[] = {
1645 M(FNEST, 'E','Q', EQ), /* equation starting */
1646 M(FNEST, 'T','S', intbl), /* table starting */
1647 M(FNEST, 'T','C', intbl), /* alternative table? */
1648 M(FNEST, 'T','&', intbl), /* table reformatting */
1649 M(NONE, 'T','E', outtbl),/* table ending */
1650 M(NONE, 'P','S', PS), /* picture starting */
1651 M(NONE, 0,0, 0)
1652 };
1653
1654 /*
1655 * Particular to ms and mm
1656 */
1657 static const struct mactab msmactab[] = {
1658 M(NONE, 'T','L', skiptocom), /* title follows */
1659 M(NONE, 'F','S', skiptocom), /* start footnote */
1660 M(NONE, 'O','K', skiptocom), /* Other kws */
1661
1662 M(NONE, 'N','R', skip), /* undocumented */
1663 M(NONE, 'N','D', skip), /* use supplied date */
1664
1665 M(PARAG, 'P','P', PP), /* begin parag */
1666 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */
1667 M(PARAG, 'L','P', PP), /* left blocked parag */
1668
1669 M(NONE, 'A','U', AU), /* author */
1670 M(NONE, 'A','I', AU), /* authors institution */
1671
1672 M(NONE, 'S','H', SH), /* section heading */
1673 M(NONE, 'S','N', SH), /* undocumented */
1674 M(NONE, 'U','X', UX), /* unix */
1675
1676 M(NBLK, 'D','S', mssnblock), /* start display text */
1677 M(NBLK, 'K','S', mssnblock), /* start keep */
1678 M(NBLK, 'K','F', mssnblock), /* start float keep */
1679 M(NONE, 0,0, 0)
1680 };
1681
1682 static const struct mactab mmmactab[] = {
1683 M(NONE, 'H',' ', MMHU), /* -mm ? */
1684 M(NONE, 'H','U', MMHU), /* -mm ? */
1685 M(PARAG, 'P',' ', PP), /* paragraph for -mm */
1686 M(NBLK, 'N','S', mssnblock), /* undocumented */
1687 M(NONE, 0,0, 0)
1688 };
1689
1690 static const struct mactab memactab[] = {
1691 M(PARAG, 'p','p', mepp),
1692 M(PARAG, 'l','p', mepp),
1693 M(PARAG, 'n','p', mepp),
1694 M(NONE, 'i','p', meip),
1695
1696 M(NONE, 's','h', mesh),
1697 M(NONE, 'u','h', mesh),
1698
1699 M(NBLK, '(','l', mesnblock),
1700 M(NBLK, '(','q', mesnblock),
1701 M(NBLK, '(','b', mesnblock),
1702 M(NBLK, '(','z', mesnblock),
1703 M(NBLK, '(','c', mesnblock),
1704
1705 M(NBLK, '(','d', mesnblock),
1706 M(NBLK, '(','f', mesnblock),
1707 M(NBLK, '(','x', mesnblock),
1708
1709 M(NONE, 'r',' ', mefont),
1710 M(NONE, 'i',' ', mefont),
1711 M(NONE, 'b',' ', mefont),
1712 M(NONE, 'u',' ', mefont),
1713 M(NONE, 'q',' ', mefont),
1714 M(NONE, 'r','b', mefont),
1715 M(NONE, 'b','i', mefont),
1716 M(NONE, 'b','x', mefont),
1717 M(NONE, 0,0, 0)
1718 };
1719
1720 static const struct mactab manmactab[] = {
1721 M(PARAG, 'B','I', manfont),
1722 M(PARAG, 'B','R', manfont),
1723 M(PARAG, 'I','B', manfont),
1724 M(PARAG, 'I','R', manfont),
1725 M(PARAG, 'R','B', manfont),
1726 M(PARAG, 'R','I', manfont),
1727
1728 M(PARAG, 'P','P', manpp),
1729 M(PARAG, 'L','P', manpp),
1730 M(PARAG, 'H','P', manpp),
1731 M(NONE, 0,0, 0)
1732 };
1733