1 /*
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 */
7
8 #ifndef lint
9 static char copyright[] =
10 "@(#) Copyright (c) 1980, 1993\n\
11 The Regents of the University of California. All rights reserved.\n";
12 #endif /* not lint */
13
14 #ifndef lint
15 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 06/06/93";
16 #endif /* not lint */
17
18 /*
19 * checknr: check an nroff/troff input file for matching macro calls.
20 * we also attempt to match size and font changes, but only the embedded
21 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
22 * later but for now think of these restrictions as contributions to
23 * structured typesetting.
24 */
25 #include <stdio.h>
26 #include <ctype.h>
27
28 #define MAXSTK 100 /* Stack size */
29 #define MAXBR 100 /* Max number of bracket pairs known */
30 #define MAXCMDS 500 /* Max number of commands known */
31
32 /*
33 * The stack on which we remember what we've seen so far.
34 */
35 struct stkstr {
36 int opno; /* number of opening bracket */
37 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
38 int parm; /* parm to size, font, etc */
39 int lno; /* line number the thing came in in */
40 } stk[MAXSTK];
41 int stktop;
42
43 /*
44 * The kinds of opening and closing brackets.
45 */
46 struct brstr {
47 char *opbr;
48 char *clbr;
49 } br[MAXBR] = {
50 /* A few bare bones troff commands */
51 #define SZ 0
52 "sz", "sz", /* also \s */
53 #define FT 1
54 "ft", "ft", /* also \f */
55 /* the -mm package */
56 "AL", "LE",
57 "AS", "AE",
58 "BL", "LE",
59 "BS", "BE",
60 "DF", "DE",
61 "DL", "LE",
62 "DS", "DE",
63 "FS", "FE",
64 "ML", "LE",
65 "NS", "NE",
66 "RL", "LE",
67 "VL", "LE",
68 /* the -ms package */
69 "AB", "AE",
70 "BD", "DE",
71 "CD", "DE",
72 "DS", "DE",
73 "FS", "FE",
74 "ID", "DE",
75 "KF", "KE",
76 "KS", "KE",
77 "LD", "DE",
78 "LG", "NL",
79 "QS", "QE",
80 "RS", "RE",
81 "SM", "NL",
82 "XA", "XE",
83 "XS", "XE",
84 /* The -me package */
85 "(b", ")b",
86 "(c", ")c",
87 "(d", ")d",
88 "(f", ")f",
89 "(l", ")l",
90 "(q", ")q",
91 "(x", ")x",
92 "(z", ")z",
93 /* Things needed by preprocessors */
94 "EQ", "EN",
95 "TS", "TE",
96 /* Refer */
97 "[", "]",
98 0, 0
99 };
100
101 /*
102 * All commands known to nroff, plus macro packages.
103 * Used so we can complain about unrecognized commands.
104 */
105 char *knowncmds[MAXCMDS] = {
106 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
107 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
108 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
109 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
110 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
111 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
112 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
113 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
114 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
115 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
116 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
117 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
118 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
119 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
120 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
121 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
122 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
123 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
124 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
125 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
126 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
127 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
128 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
129 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
130 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
131 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
132 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
133 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
134 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
135 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
136 "yr", 0
137 };
138
139 int lineno; /* current line number in input file */
140 char line[256]; /* the current line */
141 char *cfilename; /* name of current file */
142 int nfiles; /* number of files to process */
143 int fflag; /* -f: ignore \f */
144 int sflag; /* -s: ignore \s */
145 int ncmds; /* size of knowncmds */
146 int slot; /* slot in knowncmds found by binsrch */
147
148 char *malloc();
149
main(argc,argv)150 main(argc, argv)
151 int argc;
152 char **argv;
153 {
154 FILE *f;
155 int i;
156 char *cp;
157 char b1[4];
158
159 /* Figure out how many known commands there are */
160 while (knowncmds[ncmds])
161 ncmds++;
162 while (argc > 1 && argv[1][0] == '-') {
163 switch(argv[1][1]) {
164
165 /* -a: add pairs of macros */
166 case 'a':
167 i = strlen(argv[1]) - 2;
168 if (i % 6 != 0)
169 usage();
170 /* look for empty macro slots */
171 for (i=0; br[i].opbr; i++)
172 ;
173 for (cp=argv[1]+3; cp[-1]; cp += 6) {
174 br[i].opbr = malloc(3);
175 strncpy(br[i].opbr, cp, 2);
176 br[i].clbr = malloc(3);
177 strncpy(br[i].clbr, cp+3, 2);
178 addmac(br[i].opbr); /* knows pairs are also known cmds */
179 addmac(br[i].clbr);
180 i++;
181 }
182 break;
183
184 /* -c: add known commands */
185 case 'c':
186 i = strlen(argv[1]) - 2;
187 if (i % 3 != 0)
188 usage();
189 for (cp=argv[1]+3; cp[-1]; cp += 3) {
190 if (cp[2] && cp[2] != '.')
191 usage();
192 strncpy(b1, cp, 2);
193 addmac(b1);
194 }
195 break;
196
197 /* -f: ignore font changes */
198 case 'f':
199 fflag = 1;
200 break;
201
202 /* -s: ignore size changes */
203 case 's':
204 sflag = 1;
205 break;
206 default:
207 usage();
208 }
209 argc--; argv++;
210 }
211
212 nfiles = argc - 1;
213
214 if (nfiles > 0) {
215 for (i=1; i<argc; i++) {
216 cfilename = argv[i];
217 f = fopen(cfilename, "r");
218 if (f == NULL)
219 perror(cfilename);
220 else
221 process(f);
222 }
223 } else {
224 cfilename = "stdin";
225 process(stdin);
226 }
227 exit(0);
228 }
229
usage()230 usage()
231 {
232 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
233 exit(1);
234 }
235
process(f)236 process(f)
237 FILE *f;
238 {
239 register int i, n;
240 char mac[5]; /* The current macro or nroff command */
241 int pl;
242
243 stktop = -1;
244 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
245 if (line[0] == '.') {
246 /*
247 * find and isolate the macro/command name.
248 */
249 strncpy(mac, line+1, 4);
250 if (isspace(mac[0])) {
251 pe(lineno);
252 printf("Empty command\n");
253 } else if (isspace(mac[1])) {
254 mac[1] = 0;
255 } else if (isspace(mac[2])) {
256 mac[2] = 0;
257 } else if (mac[0] != '\\' || mac[1] != '\"') {
258 pe(lineno);
259 printf("Command too long\n");
260 }
261
262 /*
263 * Is it a known command?
264 */
265 checkknown(mac);
266
267 /*
268 * Should we add it?
269 */
270 if (eq(mac, "de"))
271 addcmd(line);
272
273 chkcmd(line, mac);
274 }
275
276 /*
277 * At this point we process the line looking
278 * for \s and \f.
279 */
280 for (i=0; line[i]; i++)
281 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
282 if (!sflag && line[++i]=='s') {
283 pl = line[++i];
284 if (isdigit(pl)) {
285 n = pl - '0';
286 pl = ' ';
287 } else
288 n = 0;
289 while (isdigit(line[++i]))
290 n = 10 * n + line[i] - '0';
291 i--;
292 if (n == 0) {
293 if (stk[stktop].opno == SZ) {
294 stktop--;
295 } else {
296 pe(lineno);
297 printf("unmatched \\s0\n");
298 }
299 } else {
300 stk[++stktop].opno = SZ;
301 stk[stktop].pl = pl;
302 stk[stktop].parm = n;
303 stk[stktop].lno = lineno;
304 }
305 } else if (!fflag && line[i]=='f') {
306 n = line[++i];
307 if (n == 'P') {
308 if (stk[stktop].opno == FT) {
309 stktop--;
310 } else {
311 pe(lineno);
312 printf("unmatched \\fP\n");
313 }
314 } else {
315 stk[++stktop].opno = FT;
316 stk[stktop].pl = 1;
317 stk[stktop].parm = n;
318 stk[stktop].lno = lineno;
319 }
320 }
321 }
322 }
323 /*
324 * We've hit the end and look at all this stuff that hasn't been
325 * matched yet! Complain, complain.
326 */
327 for (i=stktop; i>=0; i--) {
328 complain(i);
329 }
330 }
331
complain(i)332 complain(i)
333 {
334 pe(stk[i].lno);
335 printf("Unmatched ");
336 prop(i);
337 printf("\n");
338 }
339
prop(i)340 prop(i)
341 {
342 if (stk[i].pl == 0)
343 printf(".%s", br[stk[i].opno].opbr);
344 else switch(stk[i].opno) {
345 case SZ:
346 printf("\\s%c%d", stk[i].pl, stk[i].parm);
347 break;
348 case FT:
349 printf("\\f%c", stk[i].parm);
350 break;
351 default:
352 printf("Bug: stk[%d].opno = %d = .%s, .%s",
353 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
354 }
355 }
356
chkcmd(line,mac)357 chkcmd(line, mac)
358 char *line;
359 char *mac;
360 {
361 register int i, n;
362
363 /*
364 * Check to see if it matches top of stack.
365 */
366 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
367 stktop--; /* OK. Pop & forget */
368 else {
369 /* No. Maybe it's an opener */
370 for (i=0; br[i].opbr; i++) {
371 if (eq(mac, br[i].opbr)) {
372 /* Found. Push it. */
373 stktop++;
374 stk[stktop].opno = i;
375 stk[stktop].pl = 0;
376 stk[stktop].parm = 0;
377 stk[stktop].lno = lineno;
378 break;
379 }
380 /*
381 * Maybe it's an unmatched closer.
382 * NOTE: this depends on the fact
383 * that none of the closers can be
384 * openers too.
385 */
386 if (eq(mac, br[i].clbr)) {
387 nomatch(mac);
388 break;
389 }
390 }
391 }
392 }
393
nomatch(mac)394 nomatch(mac)
395 char *mac;
396 {
397 register int i, j;
398
399 /*
400 * Look for a match further down on stack
401 * If we find one, it suggests that the stuff in
402 * between is supposed to match itself.
403 */
404 for (j=stktop; j>=0; j--)
405 if (eq(mac,br[stk[j].opno].clbr)) {
406 /* Found. Make a good diagnostic. */
407 if (j == stktop-2) {
408 /*
409 * Check for special case \fx..\fR and don't
410 * complain.
411 */
412 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
413 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
414 stktop = j -1;
415 return;
416 }
417 /*
418 * We have two unmatched frobs. Chances are
419 * they were intended to match, so we mention
420 * them together.
421 */
422 pe(stk[j+1].lno);
423 prop(j+1);
424 printf(" does not match %d: ", stk[j+2].lno);
425 prop(j+2);
426 printf("\n");
427 } else for (i=j+1; i <= stktop; i++) {
428 complain(i);
429 }
430 stktop = j-1;
431 return;
432 }
433 /* Didn't find one. Throw this away. */
434 pe(lineno);
435 printf("Unmatched .%s\n", mac);
436 }
437
438 /* eq: are two strings equal? */
eq(s1,s2)439 eq(s1, s2)
440 char *s1, *s2;
441 {
442 return (strcmp(s1, s2) == 0);
443 }
444
445 /* print the first part of an error message, given the line number */
pe(lineno)446 pe(lineno)
447 int lineno;
448 {
449 if (nfiles > 1)
450 printf("%s: ", cfilename);
451 printf("%d: ", lineno);
452 }
453
checkknown(mac)454 checkknown(mac)
455 char *mac;
456 {
457
458 if (eq(mac, "."))
459 return;
460 if (binsrch(mac) >= 0)
461 return;
462 if (mac[0] == '\\' && mac[1] == '"') /* comments */
463 return;
464
465 pe(lineno);
466 printf("Unknown command: .%s\n", mac);
467 }
468
469 /*
470 * We have a .de xx line in "line". Add xx to the list of known commands.
471 */
addcmd(line)472 addcmd(line)
473 char *line;
474 {
475 char *mac;
476
477 /* grab the macro being defined */
478 mac = line+4;
479 while (isspace(*mac))
480 mac++;
481 if (*mac == 0) {
482 pe(lineno);
483 printf("illegal define: %s\n", line);
484 return;
485 }
486 mac[2] = 0;
487 if (isspace(mac[1]) || mac[1] == '\\')
488 mac[1] = 0;
489 if (ncmds >= MAXCMDS) {
490 printf("Only %d known commands allowed\n", MAXCMDS);
491 exit(1);
492 }
493 addmac(mac);
494 }
495
496 /*
497 * Add mac to the list. We should really have some kind of tree
498 * structure here but this is a quick-and-dirty job and I just don't
499 * have time to mess with it. (I wonder if this will come back to haunt
500 * me someday?) Anyway, I claim that .de is fairly rare in user
501 * nroff programs, and the register loop below is pretty fast.
502 */
addmac(mac)503 addmac(mac)
504 char *mac;
505 {
506 register char **src, **dest, **loc;
507
508 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
509 #ifdef DEBUG
510 printf("binsrch(%s) -> already in table\n", mac);
511 #endif DEBUG
512 return;
513 }
514 /* binsrch sets slot as a side effect */
515 #ifdef DEBUG
516 printf("binsrch(%s) -> %d\n", mac, slot);
517 #endif
518 loc = &knowncmds[slot];
519 src = &knowncmds[ncmds-1];
520 dest = src+1;
521 while (dest > loc)
522 *dest-- = *src--;
523 *loc = malloc(3);
524 strcpy(*loc, mac);
525 ncmds++;
526 #ifdef DEBUG
527 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
528 #endif
529 }
530
531 /*
532 * Do a binary search in knowncmds for mac.
533 * If found, return the index. If not, return -1.
534 */
binsrch(mac)535 binsrch(mac)
536 char *mac;
537 {
538 register char *p; /* pointer to current cmd in list */
539 register int d; /* difference if any */
540 register int mid; /* mid point in binary search */
541 register int top, bot; /* boundaries of bin search, inclusive */
542
543 top = ncmds-1;
544 bot = 0;
545 while (top >= bot) {
546 mid = (top+bot)/2;
547 p = knowncmds[mid];
548 d = p[0] - mac[0];
549 if (d == 0)
550 d = p[1] - mac[1];
551 if (d == 0)
552 return mid;
553 if (d < 0)
554 bot = mid + 1;
555 else
556 top = mid - 1;
557 }
558 slot = bot; /* place it would have gone */
559 return -1;
560 }
561