1 /* $OpenBSD: deroff.c,v 1.18 2023/09/27 21:06:33 millert Exp $ */
2
3 /*-
4 * Copyright (c) 1988, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /*
32 * Copyright (C) Caldera International Inc. 2001-2002.
33 * All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code and documentation must retain the above
39 * copyright notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed or owned by Caldera
46 * International, Inc.
47 * 4. Neither the name of Caldera International, Inc. nor the names of other
48 * contributors may be used to endorse or promote products derived from
49 * this software without specific prior written permission.
50 *
51 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
52 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
53 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
56 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
57 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
58 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
60 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
61 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 #include <err.h>
66 #include <limits.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <unistd.h>
71
72 /*
73 * Deroff command -- strip troff, eqn, and Tbl sequences from
74 * a file. Has two flags argument, -w, to cause output one word per line
75 * rather than in the original format.
76 * -mm (or -ms) causes the corresponding macro's to be interpreted
77 * so that just sentences are output
78 * -ml also gets rid of lists.
79 * Deroff follows .so and .nx commands, removes contents of macro
80 * definitions, equations (both .EQ ... .EN and $...$),
81 * Tbl command sequences, and Troff backslash constructions.
82 *
83 * All input is through the Cget macro;
84 * the most recently read character is in c.
85 *
86 * Modified by Robert Henry to process -me and -man macros.
87 */
88
89 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
90 #define C1get ( (c=getc(infile)) == EOF ? eof() : c)
91
92 #ifdef DEBUG
93 # define C _C()
94 # define C1 _C1()
95 #else /* not DEBUG */
96 # define C Cget
97 # define C1 C1get
98 #endif /* not DEBUG */
99
100 #define SKIP while (C != '\n')
101 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
102
103 #define YES 1
104 #define NO 0
105 #define MS 0 /* -ms */
106 #define MM 1 /* -mm */
107 #define ME 2 /* -me */
108 #define MA 3 /* -man */
109
110 #ifdef DEBUG
111 char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
112 #endif /* DEBUG */
113
114 #define ONE 1
115 #define TWO 2
116
117 #define NOCHAR -2
118 #define SPECIAL 0
119 #define APOS 1
120 #define PUNCT 2
121 #define DIGIT 3
122 #define LETTER 4
123
124 #define MAXFILES 20
125
126 int iflag;
127 int wordflag;
128 int msflag; /* processing a source written using a mac package */
129 int mac; /* which package */
130 int disp;
131 int parag;
132 int inmacro;
133 int intable;
134 int keepblock; /* keep blocks of text; normally false when msflag */
135
136 char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
137
138 size_t linesz;
139 char *line;
140 char *lp;
141
142 int c;
143 int pc;
144 int ldelim;
145 int rdelim;
146
147 char fname[PATH_MAX];
148 FILE *files[MAXFILES];
149 FILE **filesp;
150 FILE *infile;
151
152 int argc;
153 char **argv;
154
155 /*
156 * Macro processing
157 *
158 * Macro table definitions
159 */
160 typedef int pacmac; /* compressed macro name */
161 int argconcat = 0; /* concat arguments together (-me only) */
162
163 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
164 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
165
166 struct mactab{
167 int condition;
168 pacmac macname;
169 int (*func)(); /* XXX - args */
170 };
171
172 struct mactab troffmactab[];
173 struct mactab ppmactab[];
174 struct mactab msmactab[];
175 struct mactab mmmactab[];
176 struct mactab memactab[];
177 struct mactab manmactab[];
178
179 /*
180 * Macro table initialization
181 */
182 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
183
184 /*
185 * Flags for matching conditions other than
186 * the macro name
187 */
188 #define NONE 0
189 #define FNEST 1 /* no nested files */
190 #define NOMAC 2 /* no macro */
191 #define MAC 3 /* macro */
192 #define PARAG 4 /* in a paragraph */
193 #define MSF 5 /* msflag is on */
194 #define NBLK 6 /* set if no blocks to be kept */
195
196 /*
197 * Return codes from macro minions, determine where to jump,
198 * how to repeat/reprocess text
199 */
200 #define COMX 1 /* goto comx */
201 #define COM 2 /* goto com */
202
203 int skeqn(void);
204 int eof(void);
205 int _C1(void);
206 int _C(void);
207 int EQ(void);
208 int domacro(void);
209 int PS(void);
210 int skip(void);
211 int intbl(void);
212 int outtbl(void);
213 int so(void);
214 int nx(void);
215 int skiptocom(void);
216 int PP(pacmac);
217 int AU(void);
218 int SH(pacmac);
219 int UX(void);
220 int MMHU(pacmac);
221 int mesnblock(pacmac);
222 int mssnblock(pacmac);
223 int nf(void);
224 int ce(void);
225 int meip(pacmac);
226 int mepp(pacmac);
227 int mesh(pacmac);
228 int mefont(pacmac);
229 int manfont(pacmac);
230 int manpp(pacmac);
231 int macsort(const void *, const void *);
232 int sizetab(struct mactab *);
233 void getfname(void);
234 void textline(char *, int);
235 void work(void);
236 void regline(void (*)(char *, int), int);
237 void macro(void);
238 void tbl(void);
239 void stbl(void);
240 void eqn(void);
241 void backsl(void);
242 void sce(void);
243 void refer(int);
244 void inpic(void);
245 void msputmac(char *, int);
246 void msputwords(int);
247 void meputmac(char *, int);
248 void meputwords(int);
249 void noblock(char, char);
250 void defcomline(pacmac);
251 void comline(void);
252 void buildtab(struct mactab **, int *);
253 FILE *opn(char *);
254 struct mactab *macfill(struct mactab *, struct mactab *);
255 __dead void usage(void);
256
257 int
main(int ac,char ** av)258 main(int ac, char **av)
259 {
260 int i, ch;
261 int errflg = 0;
262 int kflag = NO;
263
264 if (pledge("stdio rpath", NULL) == -1)
265 err(1, "pledge");
266
267 iflag = NO;
268 wordflag = NO;
269 msflag = NO;
270 mac = ME;
271 disp = NO;
272 parag = NO;
273 inmacro = NO;
274 intable = NO;
275 ldelim = NOCHAR;
276 rdelim = NOCHAR;
277 keepblock = YES;
278
279 while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
280 switch (ch) {
281 case 'i':
282 iflag = YES;
283 break;
284 case 'k':
285 kflag = YES;
286 break;
287 case 'm':
288 msflag = YES;
289 keepblock = NO;
290 switch (optarg[0]) {
291 case 'm':
292 mac = MM;
293 break;
294 case 's':
295 mac = MS;
296 break;
297 case 'e':
298 mac = ME;
299 break;
300 case 'a':
301 mac = MA;
302 break;
303 case 'l':
304 disp = YES;
305 break;
306 default:
307 errflg = 1;
308 break;
309 }
310 if (optarg[1] != '\0')
311 errflg = 1;
312 break;
313 case 'p':
314 parag = YES;
315 break;
316 case 'w':
317 wordflag = YES;
318 kflag = YES;
319 break;
320 default:
321 errflg = 1;
322 }
323 }
324 argc = ac - optind;
325 argv = av + optind;
326
327 if (kflag)
328 keepblock = YES;
329 if (errflg)
330 usage();
331
332 #ifdef DEBUG
333 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
334 msflag, mactab[mac], keepblock, disp);
335 #endif /* DEBUG */
336 if (argc == 0) {
337 infile = stdin;
338 } else {
339 infile = opn(argv[0]);
340 --argc;
341 ++argv;
342 }
343 files[0] = infile;
344 filesp = &files[0];
345
346 linesz = LINE_MAX;
347 if ((line = malloc(linesz)) == NULL)
348 err(1, NULL);
349
350 for (i = 'a'; i <= 'z'; ++i)
351 chars[i] = LETTER;
352 for (i = 'A'; i <= 'Z'; ++i)
353 chars[i] = LETTER;
354 for (i = '0'; i <= '9'; ++i)
355 chars[i] = DIGIT;
356 chars['\''] = APOS;
357 chars['&'] = APOS;
358 chars['.'] = PUNCT;
359 chars[','] = PUNCT;
360 chars[';'] = PUNCT;
361 chars['?'] = PUNCT;
362 chars[':'] = PUNCT;
363 work();
364 exit(0);
365 }
366
367 int
skeqn(void)368 skeqn(void)
369 {
370
371 while ((c = getc(infile)) != rdelim) {
372 if (c == EOF)
373 c = eof();
374 else if (c == '"') {
375 while ((c = getc(infile)) != '"') {
376 if (c == EOF ||
377 (c == '\\' && (c = getc(infile)) == EOF))
378 c = eof();
379 }
380 }
381 }
382 if (msflag)
383 return((c = 'x'));
384 return((c = ' '));
385 }
386
387 FILE *
opn(char * p)388 opn(char *p)
389 {
390 FILE *fd;
391
392 if ((fd = fopen(p, "r")) == NULL)
393 err(1, "fopen %s", p);
394
395 return(fd);
396 }
397
398 int
eof(void)399 eof(void)
400 {
401
402 if (infile != stdin)
403 fclose(infile);
404 if (filesp > files)
405 infile = *--filesp;
406 else if (argc > 0) {
407 infile = opn(argv[0]);
408 --argc;
409 ++argv;
410 } else
411 exit(0);
412 return(C);
413 }
414
415 void
getfname(void)416 getfname(void)
417 {
418 char *p;
419 struct chain {
420 struct chain *nextp;
421 char *datap;
422 } *q;
423 static struct chain *namechain= NULL;
424
425 while (C == ' ')
426 ; /* nothing */
427
428 for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' &&
429 c != ' ' && c != '\t' && c != '\\'; ++p)
430 C;
431 *p = '\0';
432 while (c != '\n')
433 C;
434
435 /* see if this name has already been used */
436 for (q = namechain ; q; q = q->nextp)
437 if (strcmp(fname, q->datap) == 0) {
438 fname[0] = '\0';
439 return;
440 }
441
442 q = malloc(sizeof(struct chain));
443 if (q == NULL)
444 err(1, NULL);
445 q->nextp = namechain;
446 q->datap = strdup(fname);
447 if (q->datap == NULL)
448 err(1, NULL);
449 namechain = q;
450 }
451
452 void
textline(char * str,int constant)453 textline(char *str, int constant)
454 {
455
456 if (wordflag) {
457 msputwords(0);
458 return;
459 }
460 puts(str);
461 }
462
463 void
work(void)464 work(void)
465 {
466
467 for (;;) {
468 C;
469 #ifdef FULLDEBUG
470 printf("Starting work with `%c'\n", c);
471 #endif /* FULLDEBUG */
472 if (c == '.' || c == '\'')
473 comline();
474 else
475 regline(textline, TWO);
476 }
477 }
478
479 void
regline(void (* pfunc)(char *,int),int constant)480 regline(void (*pfunc)(char *, int), int constant)
481 {
482
483 line[0] = c;
484 lp = line;
485 for (;;) {
486 if (lp - line == linesz - 1) {
487 char *newline = reallocarray(line, linesz, 2);
488 if (newline == NULL)
489 err(1, NULL);
490 lp = newline + (lp - line);
491 line = newline;
492 linesz *= 2;
493 }
494 if (c == '\\') {
495 *lp = ' ';
496 backsl();
497 }
498 if (c == '\n')
499 break;
500 if (intable && c == 'T') {
501 *++lp = C;
502 if (c == '{' || c == '}') {
503 lp[-1] = ' ';
504 *lp = C;
505 }
506 } else {
507 *++lp = C;
508 }
509 }
510 *lp = '\0';
511
512 if (line[0] != '\0')
513 (*pfunc)(line, constant);
514 }
515
516 void
macro(void)517 macro(void)
518 {
519
520 if (msflag) {
521 do {
522 SKIP;
523 } while (C!='.' || C!='.' || C=='.'); /* look for .. */
524 if (c != '\n')
525 SKIP;
526 return;
527 }
528 SKIP;
529 inmacro = YES;
530 }
531
532 void
tbl(void)533 tbl(void)
534 {
535
536 while (C != '.')
537 ; /* nothing */
538 SKIP;
539 intable = YES;
540 }
541
542 void
stbl(void)543 stbl(void)
544 {
545
546 while (C != '.')
547 ; /* nothing */
548 SKIP_TO_COM;
549 if (c != 'T' || C != 'E') {
550 SKIP;
551 pc = c;
552 while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
553 pc = c;
554 }
555 }
556
557 void
eqn(void)558 eqn(void)
559 {
560 int c1, c2;
561 int dflg;
562 char last;
563
564 last=0;
565 dflg = 1;
566 SKIP;
567
568 for (;;) {
569 if (C1 == '.' || c == '\'') {
570 while (C1 == ' ' || c == '\t')
571 ;
572 if (c == 'E' && C1 == 'N') {
573 SKIP;
574 if (msflag && dflg) {
575 putchar('x');
576 putchar(' ');
577 if (last) {
578 putchar(last);
579 putchar('\n');
580 }
581 }
582 return;
583 }
584 } else if (c == 'd') {
585 /* look for delim */
586 if (C1 == 'e' && C1 == 'l')
587 if (C1 == 'i' && C1 == 'm') {
588 while (C1 == ' ')
589 ; /* nothing */
590
591 if ((c1 = c) == '\n' ||
592 (c2 = C1) == '\n' ||
593 (c1 == 'o' && c2 == 'f' && C1=='f')) {
594 ldelim = NOCHAR;
595 rdelim = NOCHAR;
596 } else {
597 ldelim = c1;
598 rdelim = c2;
599 }
600 }
601 dflg = 0;
602 }
603
604 if (c != '\n')
605 while (C1 != '\n') {
606 if (chars[c] == PUNCT)
607 last = c;
608 else if (c != ' ')
609 last = 0;
610 }
611 }
612 }
613
614 /* skip over a complete backslash construction */
615 void
backsl(void)616 backsl(void)
617 {
618 int bdelim;
619
620 sw:
621 switch (C) {
622 case '"':
623 SKIP;
624 return;
625
626 case 's':
627 if (C == '\\')
628 backsl();
629 else {
630 while (C >= '0' && c <= '9')
631 ; /* nothing */
632 ungetc(c, infile);
633 c = '0';
634 }
635 --lp;
636 return;
637
638 case 'f':
639 case 'n':
640 case '*':
641 if (C != '(')
642 return;
643
644 case '(':
645 if (msflag) {
646 if (C == 'e') {
647 if (C == 'm') {
648 *lp = '-';
649 return;
650 }
651 }
652 else if (c != '\n')
653 C;
654 return;
655 }
656 if (C != '\n')
657 C;
658 return;
659
660 case '$':
661 C; /* discard argument number */
662 return;
663
664 case 'b':
665 case 'x':
666 case 'v':
667 case 'h':
668 case 'w':
669 case 'o':
670 case 'l':
671 case 'L':
672 if ((bdelim = C) == '\n')
673 return;
674 while (C != '\n' && c != bdelim)
675 if (c == '\\')
676 backsl();
677 return;
678
679 case '\\':
680 if (inmacro)
681 goto sw;
682
683 default:
684 return;
685 }
686 }
687
688 void
sce(void)689 sce(void)
690 {
691 char *ap;
692 int n, i;
693 char a[10];
694
695 for (ap = a; C != '\n'; ap++) {
696 *ap = c;
697 if (ap == &a[9]) {
698 SKIP;
699 ap = a;
700 break;
701 }
702 }
703 if (ap != a)
704 n = atoi(a);
705 else
706 n = 1;
707 for (i = 0; i < n;) {
708 if (C == '.') {
709 if (C == 'c') {
710 if (C == 'e') {
711 while (C == ' ')
712 ; /* nothing */
713 if (c == '0') {
714 SKIP;
715 break;
716 } else
717 SKIP;
718 }
719 else
720 SKIP;
721 } else if (c == 'P' || C == 'P') {
722 if (c != '\n')
723 SKIP;
724 break;
725 } else if (c != '\n')
726 SKIP;
727 } else {
728 SKIP;
729 i++;
730 }
731 }
732 }
733
734 void
refer(int c1)735 refer(int c1)
736 {
737 int c2;
738
739 if (c1 != '\n')
740 SKIP;
741
742 for (c2 = -1;;) {
743 if (C != '.')
744 SKIP;
745 else {
746 if (C != ']')
747 SKIP;
748 else {
749 while (C != '\n')
750 c2 = c;
751 if (c2 != -1 && chars[c2] == PUNCT)
752 putchar(c2);
753 return;
754 }
755 }
756 }
757 }
758
759 void
inpic(void)760 inpic(void)
761 {
762 int c1;
763 char *p1, *ep;
764
765 SKIP;
766 p1 = line;
767 ep = line + sizeof(line) - 1;
768 c = '\n';
769 for (;;) {
770 c1 = c;
771 if (C == '.' && c1 == '\n') {
772 if (C != 'P') {
773 if (c == '\n')
774 continue;
775 else {
776 SKIP;
777 c = '\n';
778 continue;
779 }
780 }
781 if (C != 'E') {
782 if (c == '\n')
783 continue;
784 else {
785 SKIP;
786 c = '\n';
787 continue;
788 }
789 }
790 SKIP;
791 return;
792 }
793 else if (c == '\"') {
794 while (C != '\"') {
795 if (c == '\\') {
796 if (C == '\"')
797 continue;
798 ungetc(c, infile);
799 backsl();
800 } else if (p1 + 1 >= ep) {
801 errx(1, ".PS length exceeds limit");
802 } else {
803 *p1++ = c;
804 }
805 }
806 *p1++ = ' ';
807 }
808 else if (c == '\n' && p1 != line) {
809 *p1 = '\0';
810 if (wordflag)
811 msputwords(NO);
812 else {
813 puts(line);
814 putchar('\n');
815 }
816 p1 = line;
817 }
818 }
819 }
820
821 #ifdef DEBUG
822 int
_C1(void)823 _C1(void)
824 {
825
826 return(C1get);
827 }
828
829 int
_C(void)830 _C(void)
831 {
832
833 return(Cget);
834 }
835 #endif /* DEBUG */
836
837 /*
838 * Put out a macro line, using ms and mm conventions.
839 */
840 void
msputmac(char * s,int constant)841 msputmac(char *s, int constant)
842 {
843 char *t;
844 int found;
845 int last;
846
847 last = 0;
848 found = 0;
849 if (wordflag) {
850 msputwords(YES);
851 return;
852 }
853 while (*s) {
854 while (*s == ' ' || *s == '\t')
855 putchar(*s++);
856 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
857 ; /* nothing */
858 if (*s == '\"')
859 s++;
860 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
861 chars[(unsigned char)s[1]] == LETTER) {
862 while (s < t)
863 if (*s == '\"')
864 s++;
865 else
866 putchar(*s++);
867 last = *(t-1);
868 found++;
869 } else if (found && chars[(unsigned char)s[0]] == PUNCT &&
870 s[1] == '\0') {
871 putchar(*s++);
872 } else {
873 last = *(t - 1);
874 s = t;
875 }
876 }
877 putchar('\n');
878 if (msflag && chars[last] == PUNCT) {
879 putchar(last);
880 putchar('\n');
881 }
882 }
883
884 /*
885 * put out words (for the -w option) with ms and mm conventions
886 */
887 void
msputwords(int macline)888 msputwords(int macline)
889 {
890 char *p, *p1;
891 int i, nlet;
892
893 for (p1 = line;;) {
894 /*
895 * skip initial specials ampersands and apostrophes
896 */
897 while (chars[(unsigned char)*p1] < DIGIT)
898 if (*p1++ == '\0')
899 return;
900 nlet = 0;
901 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
902 if (i == LETTER)
903 ++nlet;
904
905 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
906 /*
907 * delete trailing ampersands and apostrophes
908 */
909 while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
910 i == APOS )
911 --p;
912 while (p1 < p)
913 putchar(*p1++);
914 putchar('\n');
915 } else {
916 p1 = p;
917 }
918 }
919 }
920
921 /*
922 * put out a macro using the me conventions
923 */
924 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; }
925 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
926
927 void
meputmac(char * cp,int constant)928 meputmac(char *cp, int constant)
929 {
930 char *np;
931 int found;
932 int argno;
933 int last;
934 int inquote;
935
936 last = 0;
937 found = 0;
938 if (wordflag) {
939 meputwords(YES);
940 return;
941 }
942 for (argno = 0; *cp; argno++) {
943 SKIPBLANK(cp);
944 inquote = (*cp == '"');
945 if (inquote)
946 cp++;
947 for (np = cp; *np; np++) {
948 switch (*np) {
949 case '\n':
950 case '\0':
951 break;
952
953 case '\t':
954 case ' ':
955 if (inquote)
956 continue;
957 else
958 goto endarg;
959
960 case '"':
961 if (inquote && np[1] == '"') {
962 memmove(np, np + 1, strlen(np));
963 np++;
964 continue;
965 } else {
966 *np = ' '; /* bye bye " */
967 goto endarg;
968 }
969
970 default:
971 continue;
972 }
973 }
974 endarg: ;
975 /*
976 * cp points at the first char in the arg
977 * np points one beyond the last char in the arg
978 */
979 if ((argconcat == 0) || (argconcat != argno))
980 putchar(' ');
981 #ifdef FULLDEBUG
982 {
983 char *p;
984 printf("[%d,%d: ", argno, np - cp);
985 for (p = cp; p < np; p++) {
986 putchar(*p);
987 }
988 printf("]");
989 }
990 #endif /* FULLDEBUG */
991 /*
992 * Determine if the argument merits being printed
993 *
994 * constant is the cut off point below which something
995 * is not a word.
996 */
997 if (((np - cp) > constant) &&
998 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
999 for (; cp < np; cp++)
1000 putchar(*cp);
1001 last = np[-1];
1002 found++;
1003 } else if (found && (np - cp == 1) &&
1004 chars[(unsigned char)*cp] == PUNCT) {
1005 putchar(*cp);
1006 } else {
1007 last = np[-1];
1008 }
1009 cp = np;
1010 }
1011 if (msflag && chars[last] == PUNCT)
1012 putchar(last);
1013 putchar('\n');
1014 }
1015
1016 /*
1017 * put out words (for the -w option) with ms and mm conventions
1018 */
1019 void
meputwords(int macline)1020 meputwords(int macline)
1021 {
1022
1023 msputwords(macline);
1024 }
1025
1026 /*
1027 *
1028 * Skip over a nested set of macros
1029 *
1030 * Possible arguments to noblock are:
1031 *
1032 * fi end of unfilled text
1033 * PE pic ending
1034 * DE display ending
1035 *
1036 * for ms and mm only:
1037 * KE keep ending
1038 *
1039 * NE undocumented match to NS (for mm?)
1040 * LE mm only: matches RL or *L (for lists)
1041 *
1042 * for me:
1043 * ([lqbzcdf]
1044 */
1045 void
noblock(char a1,char a2)1046 noblock(char a1, char a2)
1047 {
1048 int c1,c2;
1049 int eqnf;
1050 int lct;
1051
1052 lct = 0;
1053 eqnf = 1;
1054 SKIP;
1055 for (;;) {
1056 while (C != '.')
1057 if (c == '\n')
1058 continue;
1059 else
1060 SKIP;
1061 if ((c1 = C) == '\n')
1062 continue;
1063 if ((c2 = C) == '\n')
1064 continue;
1065 if (c1 == a1 && c2 == a2) {
1066 SKIP;
1067 if (lct != 0) {
1068 lct--;
1069 continue;
1070 }
1071 if (eqnf)
1072 putchar('.');
1073 putchar('\n');
1074 return;
1075 } else if (a1 == 'L' && c2 == 'L') {
1076 lct++;
1077 SKIP;
1078 }
1079 /*
1080 * equations (EQ) nested within a display
1081 */
1082 else if (c1 == 'E' && c2 == 'Q') {
1083 if ((mac == ME && a1 == ')')
1084 || (mac != ME && a1 == 'D')) {
1085 eqn();
1086 eqnf=0;
1087 }
1088 }
1089 /*
1090 * turning on filling is done by the paragraphing
1091 * macros
1092 */
1093 else if (a1 == 'f') { /* .fi */
1094 if ((mac == ME && (c2 == 'h' || c2 == 'p'))
1095 || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1096 SKIP;
1097 return;
1098 }
1099 } else {
1100 SKIP;
1101 }
1102 }
1103 }
1104
1105 int
EQ(void)1106 EQ(void)
1107 {
1108
1109 eqn();
1110 return(0);
1111 }
1112
1113 int
domacro(void)1114 domacro(void)
1115 {
1116
1117 macro();
1118 return(0);
1119 }
1120
1121 int
PS(void)1122 PS(void)
1123 {
1124
1125 for (C; c == ' ' || c == '\t'; C)
1126 ; /* nothing */
1127
1128 if (c == '<') { /* ".PS < file" -- don't expect a .PE */
1129 SKIP;
1130 return(0);
1131 }
1132 if (!msflag)
1133 inpic();
1134 else
1135 noblock('P', 'E');
1136 return(0);
1137 }
1138
1139 int
skip(void)1140 skip(void)
1141 {
1142
1143 SKIP;
1144 return(0);
1145 }
1146
1147 int
intbl(void)1148 intbl(void)
1149 {
1150
1151 if (msflag)
1152 stbl();
1153 else
1154 tbl();
1155 return(0);
1156 }
1157
1158 int
outtbl(void)1159 outtbl(void)
1160 {
1161
1162 intable = NO;
1163 return(0);
1164 }
1165
1166 int
so(void)1167 so(void)
1168 {
1169
1170 if (!iflag) {
1171 getfname();
1172 if (fname[0]) {
1173 if (++filesp - &files[0] > MAXFILES)
1174 err(1, "too many nested files (max %d)",
1175 MAXFILES);
1176 infile = *filesp = opn(fname);
1177 }
1178 }
1179 return(0);
1180 }
1181
1182 int
nx(void)1183 nx(void)
1184 {
1185
1186 if (!iflag) {
1187 getfname();
1188 if (fname[0] == '\0')
1189 exit(0);
1190 if (infile != stdin)
1191 fclose(infile);
1192 infile = *filesp = opn(fname);
1193 }
1194 return(0);
1195 }
1196
1197 int
skiptocom(void)1198 skiptocom(void)
1199 {
1200
1201 SKIP_TO_COM;
1202 return(COMX);
1203 }
1204
1205 int
PP(pacmac c12)1206 PP(pacmac c12)
1207 {
1208 int c1, c2;
1209
1210 frommac(c12, c1, c2);
1211 printf(".%c%c", c1, c2);
1212 while (C != '\n')
1213 putchar(c);
1214 putchar('\n');
1215 return(0);
1216 }
1217
1218 int
AU(void)1219 AU(void)
1220 {
1221
1222 if (mac == MM)
1223 return(0);
1224 SKIP_TO_COM;
1225 return(COMX);
1226 }
1227
1228 int
SH(pacmac c12)1229 SH(pacmac c12)
1230 {
1231 int c1, c2;
1232
1233 frommac(c12, c1, c2);
1234
1235 if (parag) {
1236 printf(".%c%c", c1, c2);
1237 while (C != '\n')
1238 putchar(c);
1239 putchar(c);
1240 putchar('!');
1241 for (;;) {
1242 while (C != '\n')
1243 putchar(c);
1244 putchar('\n');
1245 if (C == '.')
1246 return(COM);
1247 putchar('!');
1248 putchar(c);
1249 }
1250 /*NOTREACHED*/
1251 } else {
1252 SKIP_TO_COM;
1253 return(COMX);
1254 }
1255 }
1256
1257 int
UX(void)1258 UX(void)
1259 {
1260
1261 if (wordflag)
1262 printf("UNIX\n");
1263 else
1264 printf("UNIX ");
1265 return(0);
1266 }
1267
1268 int
MMHU(pacmac c12)1269 MMHU(pacmac c12)
1270 {
1271 int c1, c2;
1272
1273 frommac(c12, c1, c2);
1274 if (parag) {
1275 printf(".%c%c", c1, c2);
1276 while (C != '\n')
1277 putchar(c);
1278 putchar('\n');
1279 } else {
1280 SKIP;
1281 }
1282 return(0);
1283 }
1284
1285 int
mesnblock(pacmac c12)1286 mesnblock(pacmac c12)
1287 {
1288 int c1, c2;
1289
1290 frommac(c12, c1, c2);
1291 noblock(')', c2);
1292 return(0);
1293 }
1294
1295 int
mssnblock(pacmac c12)1296 mssnblock(pacmac c12)
1297 {
1298 int c1, c2;
1299
1300 frommac(c12, c1, c2);
1301 noblock(c1, 'E');
1302 return(0);
1303 }
1304
1305 int
nf(void)1306 nf(void)
1307 {
1308
1309 noblock('f', 'i');
1310 return(0);
1311 }
1312
1313 int
ce(void)1314 ce(void)
1315 {
1316
1317 sce();
1318 return(0);
1319 }
1320
1321 int
meip(pacmac c12)1322 meip(pacmac c12)
1323 {
1324
1325 if (parag)
1326 mepp(c12);
1327 else if (wordflag) /* save the tag */
1328 regline(meputmac, ONE);
1329 else
1330 SKIP;
1331 return(0);
1332 }
1333
1334 /*
1335 * only called for -me .pp or .sh, when parag is on
1336 */
1337 int
mepp(pacmac c12)1338 mepp(pacmac c12)
1339 {
1340
1341 PP(c12); /* eats the line */
1342 return(0);
1343 }
1344
1345 /*
1346 * Start of a section heading; output the section name if doing words
1347 */
1348 int
mesh(pacmac c12)1349 mesh(pacmac c12)
1350 {
1351
1352 if (parag)
1353 mepp(c12);
1354 else if (wordflag)
1355 defcomline(c12);
1356 else
1357 SKIP;
1358 return(0);
1359 }
1360
1361 /*
1362 * process a font setting
1363 */
1364 int
mefont(pacmac c12)1365 mefont(pacmac c12)
1366 {
1367
1368 argconcat = 1;
1369 defcomline(c12);
1370 argconcat = 0;
1371 return(0);
1372 }
1373
1374 int
manfont(pacmac c12)1375 manfont(pacmac c12)
1376 {
1377
1378 return(mefont(c12));
1379 }
1380
1381 int
manpp(pacmac c12)1382 manpp(pacmac c12)
1383 {
1384
1385 return(mepp(c12));
1386 }
1387
1388 void
defcomline(pacmac c12)1389 defcomline(pacmac c12)
1390 {
1391 int c1, c2;
1392
1393 frommac(c12, c1, c2);
1394 if (msflag && mac == MM && c2 == 'L') {
1395 if (disp || c1 == 'R') {
1396 noblock('L', 'E');
1397 } else {
1398 SKIP;
1399 putchar('.');
1400 }
1401 }
1402 else if (c1 == '.' && c2 == '.') {
1403 if (msflag) {
1404 SKIP;
1405 return;
1406 }
1407 while (C == '.')
1408 /*VOID*/;
1409 }
1410 ++inmacro;
1411 /*
1412 * Process the arguments to the macro
1413 */
1414 switch (mac) {
1415 default:
1416 case MM:
1417 case MS:
1418 if (c1 <= 'Z' && msflag)
1419 regline(msputmac, ONE);
1420 else
1421 regline(msputmac, TWO);
1422 break;
1423 case ME:
1424 regline(meputmac, ONE);
1425 break;
1426 }
1427 --inmacro;
1428 }
1429
1430 void
comline(void)1431 comline(void)
1432 {
1433 int c1;
1434 int c2;
1435 pacmac c12;
1436 int mid;
1437 int lb, ub;
1438 int hit;
1439 static int tabsize = 0;
1440 static struct mactab *mactab = NULL;
1441 struct mactab *mp;
1442
1443 if (mactab == 0)
1444 buildtab(&mactab, &tabsize);
1445 com:
1446 while (C == ' ' || c == '\t')
1447 ;
1448 comx:
1449 if ((c1 = c) == '\n')
1450 return;
1451 c2 = C;
1452 if (c1 == '.' && c2 != '.')
1453 inmacro = NO;
1454 if (msflag && c1 == '[') {
1455 refer(c2);
1456 return;
1457 }
1458 if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1459 printf(".P\n");
1460 return;
1461 }
1462 if (c2 == '\n')
1463 return;
1464 /*
1465 * Single letter macro
1466 */
1467 if (mac == ME && (c2 == ' ' || c2 == '\t') )
1468 c2 = ' ';
1469 c12 = tomac(c1, c2);
1470 /*
1471 * binary search through the table of macros
1472 */
1473 lb = 0;
1474 ub = tabsize - 1;
1475 while (lb <= ub) {
1476 mid = (ub + lb) / 2;
1477 mp = &mactab[mid];
1478 if (mp->macname < c12)
1479 lb = mid + 1;
1480 else if (mp->macname > c12)
1481 ub = mid - 1;
1482 else {
1483 hit = 1;
1484 #ifdef FULLDEBUG
1485 printf("preliminary hit macro %c%c ", c1, c2);
1486 #endif /* FULLDEBUG */
1487 switch (mp->condition) {
1488 case NONE:
1489 hit = YES;
1490 break;
1491 case FNEST:
1492 hit = (filesp == files);
1493 break;
1494 case NOMAC:
1495 hit = !inmacro;
1496 break;
1497 case MAC:
1498 hit = inmacro;
1499 break;
1500 case PARAG:
1501 hit = parag;
1502 break;
1503 case NBLK:
1504 hit = !keepblock;
1505 break;
1506 default:
1507 hit = 0;
1508 }
1509
1510 if (hit) {
1511 #ifdef FULLDEBUG
1512 printf("MATCH\n");
1513 #endif /* FULLDEBUG */
1514 switch ((*(mp->func))(c12)) {
1515 default:
1516 return;
1517 case COMX:
1518 goto comx;
1519 case COM:
1520 goto com;
1521 }
1522 }
1523 #ifdef FULLDEBUG
1524 printf("FAIL\n");
1525 #endif /* FULLDEBUG */
1526 break;
1527 }
1528 }
1529 defcomline(c12);
1530 }
1531
1532 int
macsort(const void * p1,const void * p2)1533 macsort(const void *p1, const void *p2)
1534 {
1535 struct mactab *t1 = (struct mactab *)p1;
1536 struct mactab *t2 = (struct mactab *)p2;
1537
1538 return(t1->macname - t2->macname);
1539 }
1540
1541 int
sizetab(struct mactab * mp)1542 sizetab(struct mactab *mp)
1543 {
1544 int i;
1545
1546 i = 0;
1547 if (mp) {
1548 for (; mp->macname; mp++, i++)
1549 /*VOID*/ ;
1550 }
1551 return(i);
1552 }
1553
1554 struct mactab *
macfill(struct mactab * dst,struct mactab * src)1555 macfill(struct mactab *dst, struct mactab *src)
1556 {
1557
1558 if (src) {
1559 while (src->macname)
1560 *dst++ = *src++;
1561 }
1562 return(dst);
1563 }
1564
1565 __dead void
usage(void)1566 usage(void)
1567 {
1568 extern char *__progname;
1569
1570 fprintf(stderr, "usage: %s [-ikpw] [-m a | e | l | m | s] [file ...]\n", __progname);
1571 exit(1);
1572 }
1573
1574 void
buildtab(struct mactab ** r_back,int * r_size)1575 buildtab(struct mactab **r_back, int *r_size)
1576 {
1577 int size;
1578 struct mactab *p, *p1, *p2;
1579 struct mactab *back;
1580
1581 size = sizetab(troffmactab) + sizetab(ppmactab);
1582 p1 = p2 = NULL;
1583 if (msflag) {
1584 switch (mac) {
1585 case ME:
1586 p1 = memactab;
1587 break;
1588 case MM:
1589 p1 = msmactab;
1590 p2 = mmmactab;
1591 break;
1592 case MS:
1593 p1 = msmactab;
1594 break;
1595 case MA:
1596 p1 = manmactab;
1597 break;
1598 default:
1599 break;
1600 }
1601 }
1602 size += sizetab(p1);
1603 size += sizetab(p2);
1604 back = calloc(size+2, sizeof(struct mactab));
1605 if (back == NULL)
1606 err(1, NULL);
1607
1608 p = macfill(back, troffmactab);
1609 p = macfill(p, ppmactab);
1610 p = macfill(p, p1);
1611 p = macfill(p, p2);
1612
1613 qsort(back, size, sizeof(struct mactab), macsort);
1614 *r_size = size;
1615 *r_back = back;
1616 }
1617
1618 /*
1619 * troff commands
1620 */
1621 struct mactab troffmactab[] = {
1622 M(NONE, '\\','"', skip), /* comment */
1623 M(NOMAC, 'd','e', domacro), /* define */
1624 M(NOMAC, 'i','g', domacro), /* ignore till .. */
1625 M(NOMAC, 'a','m', domacro), /* append macro */
1626 M(NBLK, 'n','f', nf), /* filled */
1627 M(NBLK, 'c','e', ce), /* centered */
1628
1629 M(NONE, 's','o', so), /* source a file */
1630 M(NONE, 'n','x', nx), /* go to next file */
1631
1632 M(NONE, 't','m', skip), /* print string on tty */
1633 M(NONE, 'h','w', skip), /* exception hyphen words */
1634 M(NONE, 0,0, 0)
1635 };
1636
1637 /*
1638 * Preprocessor output
1639 */
1640 struct mactab ppmactab[] = {
1641 M(FNEST, 'E','Q', EQ), /* equation starting */
1642 M(FNEST, 'T','S', intbl), /* table starting */
1643 M(FNEST, 'T','C', intbl), /* alternative table? */
1644 M(FNEST, 'T','&', intbl), /* table reformatting */
1645 M(NONE, 'T','E', outtbl),/* table ending */
1646 M(NONE, 'P','S', PS), /* picture starting */
1647 M(NONE, 0,0, 0)
1648 };
1649
1650 /*
1651 * Particular to ms and mm
1652 */
1653 struct mactab msmactab[] = {
1654 M(NONE, 'T','L', skiptocom), /* title follows */
1655 M(NONE, 'F','S', skiptocom), /* start footnote */
1656 M(NONE, 'O','K', skiptocom), /* Other kws */
1657
1658 M(NONE, 'N','R', skip), /* undocumented */
1659 M(NONE, 'N','D', skip), /* use supplied date */
1660
1661 M(PARAG, 'P','P', PP), /* begin parag */
1662 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */
1663 M(PARAG, 'L','P', PP), /* left blocked parag */
1664
1665 M(NONE, 'A','U', AU), /* author */
1666 M(NONE, 'A','I', AU), /* authors institution */
1667
1668 M(NONE, 'S','H', SH), /* section heading */
1669 M(NONE, 'S','N', SH), /* undocumented */
1670 M(NONE, 'U','X', UX), /* unix */
1671
1672 M(NBLK, 'D','S', mssnblock), /* start display text */
1673 M(NBLK, 'K','S', mssnblock), /* start keep */
1674 M(NBLK, 'K','F', mssnblock), /* start float keep */
1675 M(NONE, 0,0, 0)
1676 };
1677
1678 struct mactab mmmactab[] = {
1679 M(NONE, 'H',' ', MMHU), /* -mm ? */
1680 M(NONE, 'H','U', MMHU), /* -mm ? */
1681 M(PARAG, 'P',' ', PP), /* paragraph for -mm */
1682 M(NBLK, 'N','S', mssnblock), /* undocumented */
1683 M(NONE, 0,0, 0)
1684 };
1685
1686 struct mactab memactab[] = {
1687 M(PARAG, 'p','p', mepp),
1688 M(PARAG, 'l','p', mepp),
1689 M(PARAG, 'n','p', mepp),
1690 M(NONE, 'i','p', meip),
1691
1692 M(NONE, 's','h', mesh),
1693 M(NONE, 'u','h', mesh),
1694
1695 M(NBLK, '(','l', mesnblock),
1696 M(NBLK, '(','q', mesnblock),
1697 M(NBLK, '(','b', mesnblock),
1698 M(NBLK, '(','z', mesnblock),
1699 M(NBLK, '(','c', mesnblock),
1700
1701 M(NBLK, '(','d', mesnblock),
1702 M(NBLK, '(','f', mesnblock),
1703 M(NBLK, '(','x', mesnblock),
1704
1705 M(NONE, 'r',' ', mefont),
1706 M(NONE, 'i',' ', mefont),
1707 M(NONE, 'b',' ', mefont),
1708 M(NONE, 'u',' ', mefont),
1709 M(NONE, 'q',' ', mefont),
1710 M(NONE, 'r','b', mefont),
1711 M(NONE, 'b','i', mefont),
1712 M(NONE, 'b','x', mefont),
1713 M(NONE, 0,0, 0)
1714 };
1715
1716 struct mactab manmactab[] = {
1717 M(PARAG, 'B','I', manfont),
1718 M(PARAG, 'B','R', manfont),
1719 M(PARAG, 'I','B', manfont),
1720 M(PARAG, 'I','R', manfont),
1721 M(PARAG, 'R','B', manfont),
1722 M(PARAG, 'R','I', manfont),
1723
1724 M(PARAG, 'P','P', manpp),
1725 M(PARAG, 'L','P', manpp),
1726 M(PARAG, 'H','P', manpp),
1727 M(NONE, 0,0, 0)
1728 };
1729