1 /* $NetBSD: checknr.c,v 1.24 2013/08/12 14:03:18 joerg Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California. All rights reserved.");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
41 #else
42 __RCSID("$NetBSD: checknr.c,v 1.24 2013/08/12 14:03:18 joerg Exp $");
43 #endif
44 #endif /* not lint */
45
46 /*
47 * checknr: check an nroff/troff input file for matching macro calls.
48 * we also attempt to match size and font changes, but only the embedded
49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
50 * later but for now think of these restrictions as contributions to
51 * structured typesetting.
52 */
53 #include <ctype.h>
54 #include <err.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58
59 #define MAXSTK 100 /* Stack size */
60 #define MAXBR 100 /* Max number of bracket pairs known */
61 #define MAXCMDS 500 /* Max number of commands known */
62
63 /*
64 * The stack on which we remember what we've seen so far.
65 */
66 static struct stkstr {
67 int opno; /* number of opening bracket */
68 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
69 int parm; /* parm to size, font, etc */
70 int lno; /* line number the thing came in in */
71 } stk[MAXSTK];
72 static int stktop;
73
74 /*
75 * The kinds of opening and closing brackets.
76 */
77 static struct brstr {
78 const char *opbr;
79 const char *clbr;
80 } br[MAXBR] = {
81 /* A few bare bones troff commands */
82 #define SZ 0
83 { "sz", "sz"}, /* also \s */
84 #define FT 1
85 { "ft", "ft"}, /* also \f */
86 /* the -mm package */
87 {"AL", "LE"},
88 {"AS", "AE"},
89 {"BL", "LE"},
90 {"BS", "BE"},
91 {"DF", "DE"},
92 {"DL", "LE"},
93 {"DS", "DE"},
94 {"FS", "FE"},
95 {"ML", "LE"},
96 {"NS", "NE"},
97 {"RL", "LE"},
98 {"VL", "LE"},
99 /* the -ms package */
100 {"AB", "AE"},
101 {"BD", "DE"},
102 {"CD", "DE"},
103 {"DS", "DE"},
104 {"FS", "FE"},
105 {"ID", "DE"},
106 {"KF", "KE"},
107 {"KS", "KE"},
108 {"LD", "DE"},
109 {"LG", "NL"},
110 {"QS", "QE"},
111 {"RS", "RE"},
112 {"SM", "NL"},
113 {"XA", "XE"},
114 {"XS", "XE"},
115 /* The -me package */
116 {"(b", ")b"},
117 {"(c", ")c"},
118 {"(d", ")d"},
119 {"(f", ")f"},
120 {"(l", ")l"},
121 {"(q", ")q"},
122 {"(x", ")x"},
123 {"(z", ")z"},
124 /* The -mdoc package */
125 {"Ao", "Ac"},
126 {"Bd", "Ed"},
127 {"Bk", "Ek"},
128 {"Bo", "Bc"},
129 {"Do", "Dc"},
130 {"Fo", "Fc"},
131 {"Oo", "Oc"},
132 {"Po", "Pc"},
133 {"Qo", "Qc"},
134 {"Rs", "Re"},
135 {"So", "Sc"},
136 {"Xo", "Xc"},
137 /* Things needed by preprocessors */
138 {"EQ", "EN"},
139 {"TS", "TE"},
140 /* Refer */
141 {"[", "]"},
142 {0, 0}
143 };
144
145 /*
146 * All commands known to nroff, plus macro packages.
147 * Used so we can complain about unrecognized commands.
148 */
149 static const char *knowncmds[MAXCMDS] = {
150 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
151 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
152 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
153 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
154 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
155 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
156 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1",
157 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
158 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
159 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
160 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
161 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
162 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
163 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
164 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
165 "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
166 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
167 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
168 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
169 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
170 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
171 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
172 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
173 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
174 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
175 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
176 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
177 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
178 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
179 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
180 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
181 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
182 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
183 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
184 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
185 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
186 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
187 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
188 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
189 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
190 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
191 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
192 };
193
194 static int lineno; /* current line number in input file */
195 static const char *cfilename; /* name of current file */
196 static int nfiles; /* number of files to process */
197 static int fflag; /* -f: ignore \f */
198 static int sflag; /* -s: ignore \s */
199 static int ncmds; /* size of knowncmds */
200 static int slot; /* slot in knowncmds found by binsrch */
201
202 static void addcmd(char *);
203 static void addmac(const char *);
204 static int binsrch(const char *);
205 static void checkknown(const char *);
206 static void chkcmd(const char *);
207 static void complain(int);
208 static int eq(const char *, const char *);
209 static void nomatch(const char *);
210 static void pe(int);
211 static void process(FILE *);
212 static void prop(int);
213 static void usage(void) __dead;
214
215 int
main(int argc,char ** argv)216 main(int argc, char **argv)
217 {
218 FILE *f;
219 int i;
220 char *cp;
221 char b1[4];
222
223 /* Figure out how many known commands there are */
224 while (knowncmds[ncmds])
225 ncmds++;
226 while (argc > 1 && argv[1][0] == '-') {
227 switch(argv[1][1]) {
228
229 /* -a: add pairs of macros */
230 case 'a':
231 i = strlen(argv[1]) - 2;
232 if (i % 6 != 0)
233 usage();
234 /* look for empty macro slots */
235 for (i=0; br[i].opbr; i++)
236 ;
237 for (cp=argv[1]+3; cp[-1]; cp += 6) {
238 char *tmp;
239
240 if (i >= MAXBR)
241 errx(1, "too many pairs");
242 if ((tmp = malloc(3)) == NULL)
243 err(1, "malloc");
244 strlcpy(tmp, cp, 3);
245 br[i].opbr = tmp;
246 if ((tmp = malloc(3)) == NULL)
247 err(1, "malloc");
248 strlcpy(tmp, cp+3, 3);
249 br[i].clbr = tmp;
250 addmac(br[i].opbr); /* knows pairs are also known cmds */
251 addmac(br[i].clbr);
252 i++;
253 }
254 break;
255
256 /* -c: add known commands */
257 case 'c':
258 i = strlen(argv[1]) - 2;
259 if (i % 3 != 0)
260 usage();
261 for (cp=argv[1]+3; cp[-1]; cp += 3) {
262 if (cp[2] && cp[2] != '.')
263 usage();
264 strncpy(b1, cp, 2);
265 addmac(b1);
266 }
267 break;
268
269 /* -f: ignore font changes */
270 case 'f':
271 fflag = 1;
272 break;
273
274 /* -s: ignore size changes */
275 case 's':
276 sflag = 1;
277 break;
278 default:
279 usage();
280 }
281 argc--; argv++;
282 }
283
284 nfiles = argc - 1;
285
286 if (nfiles > 0) {
287 for (i=1; i<argc; i++) {
288 cfilename = argv[i];
289 f = fopen(cfilename, "r");
290 if (f == NULL)
291 perror(cfilename);
292 else {
293 process(f);
294 fclose(f);
295 }
296 }
297 } else {
298 cfilename = "stdin";
299 process(stdin);
300 }
301 exit(0);
302 }
303
304 static void
usage(void)305 usage(void)
306 {
307 (void)fprintf(stderr,
308 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n",
309 getprogname());
310 exit(1);
311 }
312
313 static void
process(FILE * f)314 process(FILE *f)
315 {
316 int i, n;
317 char line[256]; /* the current line */
318 char mac[5]; /* The current macro or nroff command */
319 int pl;
320
321 stktop = -1;
322 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
323 if (line[0] == '.') {
324 /*
325 * find and isolate the macro/command name.
326 */
327 strncpy(mac, line+1, 4);
328 if (isspace((unsigned char)mac[0])) {
329 pe(lineno);
330 printf("Empty command\n");
331 } else if (isspace((unsigned char)mac[1])) {
332 mac[1] = 0;
333 } else if (isspace((unsigned char)mac[2])) {
334 mac[2] = 0;
335 } else if (mac[0] != '\\' || mac[1] != '\"') {
336 pe(lineno);
337 printf("Command too long\n");
338 }
339
340 /*
341 * Is it a known command?
342 */
343 checkknown(mac);
344
345 /*
346 * Should we add it?
347 */
348 if (eq(mac, "de"))
349 addcmd(line);
350
351 chkcmd(mac);
352 }
353
354 /*
355 * At this point we process the line looking
356 * for \s and \f.
357 */
358 for (i=0; line[i]; i++)
359 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
360 if (!sflag && line[++i]=='s') {
361 pl = line[++i];
362 if (isdigit((unsigned char)pl)) {
363 n = pl - '0';
364 pl = ' ';
365 } else
366 n = 0;
367 while (isdigit((unsigned char)line[++i]))
368 n = 10 * n + line[i] - '0';
369 i--;
370 if (n == 0) {
371 if (stktop >= 0 &&
372 stk[stktop].opno == SZ) {
373 stktop--;
374 } else {
375 pe(lineno);
376 printf("unmatched \\s0\n");
377 }
378 } else {
379 stk[++stktop].opno = SZ;
380 stk[stktop].pl = pl;
381 stk[stktop].parm = n;
382 stk[stktop].lno = lineno;
383 }
384 } else if (!fflag && line[i]=='f') {
385 n = line[++i];
386 if (n == 'P') {
387 if (stktop >= 0 &&
388 stk[stktop].opno == FT) {
389 stktop--;
390 } else {
391 pe(lineno);
392 printf("unmatched \\fP\n");
393 }
394 } else {
395 stk[++stktop].opno = FT;
396 stk[stktop].pl = 1;
397 stk[stktop].parm = n;
398 stk[stktop].lno = lineno;
399 }
400 }
401 }
402 }
403 /*
404 * We've hit the end and look at all this stuff that hasn't been
405 * matched yet! Complain, complain.
406 */
407 for (i=stktop; i>=0; i--) {
408 complain(i);
409 }
410 }
411
412 static void
complain(int i)413 complain(int i)
414 {
415 pe(stk[i].lno);
416 printf("Unmatched ");
417 prop(i);
418 printf("\n");
419 }
420
421 static void
prop(int i)422 prop(int i)
423 {
424 if (stk[i].pl == 0)
425 printf(".%s", br[stk[i].opno].opbr);
426 else switch(stk[i].opno) {
427 case SZ:
428 printf("\\s%c%d", stk[i].pl, stk[i].parm);
429 break;
430 case FT:
431 printf("\\f%c", stk[i].parm);
432 break;
433 default:
434 printf("Bug: stk[%d].opno = %d = .%s, .%s",
435 i, stk[i].opno, br[stk[i].opno].opbr,
436 br[stk[i].opno].clbr);
437 }
438 }
439
440 static void
chkcmd(const char * mac)441 chkcmd(const char *mac)
442 {
443 int i;
444
445 /*
446 * Check to see if it matches top of stack.
447 */
448 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
449 stktop--; /* OK. Pop & forget */
450 else {
451 /* No. Maybe it's an opener */
452 for (i=0; br[i].opbr; i++) {
453 if (eq(mac, br[i].opbr)) {
454 /* Found. Push it. */
455 stktop++;
456 stk[stktop].opno = i;
457 stk[stktop].pl = 0;
458 stk[stktop].parm = 0;
459 stk[stktop].lno = lineno;
460 break;
461 }
462 /*
463 * Maybe it's an unmatched closer.
464 * NOTE: this depends on the fact
465 * that none of the closers can be
466 * openers too.
467 */
468 if (eq(mac, br[i].clbr)) {
469 nomatch(mac);
470 break;
471 }
472 }
473 }
474 }
475
476 static void
nomatch(const char * mac)477 nomatch(const char *mac)
478 {
479 int i, j;
480
481 /*
482 * Look for a match further down on stack
483 * If we find one, it suggests that the stuff in
484 * between is supposed to match itself.
485 */
486 for (j=stktop; j>=0; j--)
487 if (eq(mac,br[stk[j].opno].clbr)) {
488 /* Found. Make a good diagnostic. */
489 if (j == stktop-2) {
490 /*
491 * Check for special case \fx..\fR and don't
492 * complain.
493 */
494 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
495 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
496 stktop = j -1;
497 return;
498 }
499 /*
500 * We have two unmatched frobs. Chances are
501 * they were intended to match, so we mention
502 * them together.
503 */
504 pe(stk[j+1].lno);
505 prop(j+1);
506 printf(" does not match %d: ", stk[j+2].lno);
507 prop(j+2);
508 printf("\n");
509 } else for (i=j+1; i <= stktop; i++) {
510 complain(i);
511 }
512 stktop = j-1;
513 return;
514 }
515 /* Didn't find one. Throw this away. */
516 pe(lineno);
517 printf("Unmatched .%s\n", mac);
518 }
519
520 /* eq: are two strings equal? */
521 static int
eq(const char * s1,const char * s2)522 eq(const char *s1, const char *s2)
523 {
524 return strcmp(s1, s2) == 0;
525 }
526
527 /* print the first part of an error message, given the line number */
528 static void
pe(int pelineno)529 pe(int pelineno)
530 {
531 if (nfiles > 1)
532 printf("%s: ", cfilename);
533 printf("%d: ", pelineno);
534 }
535
536 static void
checkknown(const char * mac)537 checkknown(const char *mac)
538 {
539
540 if (eq(mac, "."))
541 return;
542 if (binsrch(mac) >= 0)
543 return;
544 if (mac[0] == '\\' && mac[1] == '"') /* comments */
545 return;
546
547 pe(lineno);
548 printf("Unknown command: .%s\n", mac);
549 }
550
551 /*
552 * We have a .de xx line in "line". Add xx to the list of known commands.
553 */
554 static void
addcmd(char * line)555 addcmd(char *line)
556 {
557 char *mac;
558
559 /* grab the macro being defined */
560 mac = line+4;
561 while (isspace((unsigned char)*mac))
562 mac++;
563 if (*mac == 0) {
564 pe(lineno);
565 printf("illegal define: %s\n", line);
566 return;
567 }
568 mac[2] = 0;
569 if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
570 mac[1] = 0;
571 if (ncmds >= MAXCMDS) {
572 printf("Only %d known commands allowed\n", MAXCMDS);
573 exit(1);
574 }
575 addmac(mac);
576 }
577
578 /*
579 * Add mac to the list. We should really have some kind of tree
580 * structure here but this is a quick-and-dirty job and I just don't
581 * have time to mess with it. (I wonder if this will come back to haunt
582 * me someday?) Anyway, I claim that .de is fairly rare in user
583 * nroff programs, and the register loop below is pretty fast.
584 */
585 static void
addmac(const char * mac)586 addmac(const char *mac)
587 {
588 const char **src, **dest, **loc;
589
590 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
591 #ifdef DEBUG
592 printf("binsrch(%s) -> already in table\n", mac);
593 #endif /* DEBUG */
594 return;
595 }
596 /* binsrch sets slot as a side effect */
597 #ifdef DEBUG
598 printf("binsrch(%s) -> %d\n", mac, slot);
599 #endif
600 loc = &knowncmds[slot];
601 src = &knowncmds[ncmds-1];
602 dest = src+1;
603 while (dest > loc)
604 *dest-- = *src--;
605 if ((*loc = strdup(mac)) == NULL)
606 err(1, "strdup");
607 ncmds++;
608 #ifdef DEBUG
609 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
610 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
611 knowncmds[slot+2], ncmds);
612 #endif
613 }
614
615 /*
616 * Do a binary search in knowncmds for mac.
617 * If found, return the index. If not, return -1.
618 */
619 static int
binsrch(const char * mac)620 binsrch(const char *mac)
621 {
622 const char *p; /* pointer to current cmd in list */
623 int d; /* difference if any */
624 int mid; /* mid point in binary search */
625 int top, bot; /* boundaries of bin search, inclusive */
626
627 top = ncmds-1;
628 bot = 0;
629 while (top >= bot) {
630 mid = (top+bot)/2;
631 p = knowncmds[mid];
632 d = p[0] - mac[0];
633 if (d == 0)
634 d = p[1] - mac[1];
635 if (d == 0)
636 return mid;
637 if (d < 0)
638 bot = mid + 1;
639 else
640 top = mid - 1;
641 }
642 slot = bot; /* place it would have gone */
643 return -1;
644 }
645