xref: /original-bsd/usr.bin/checknr/checknr.c (revision 0999a820)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char copyright[] =
10 "@(#) Copyright (c) 1980, 1993\n\
11 	The Regents of the University of California.  All rights reserved.\n";
12 #endif /* not lint */
13 
14 #ifndef lint
15 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 06/06/93";
16 #endif /* not lint */
17 
18 /*
19  * checknr: check an nroff/troff input file for matching macro calls.
20  * we also attempt to match size and font changes, but only the embedded
21  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
22  * later but for now think of these restrictions as contributions to
23  * structured typesetting.
24  */
25 #include <stdio.h>
26 #include <ctype.h>
27 
28 #define MAXSTK	100	/* Stack size */
29 #define MAXBR	100	/* Max number of bracket pairs known */
30 #define MAXCMDS	500	/* Max number of commands known */
31 
32 /*
33  * The stack on which we remember what we've seen so far.
34  */
35 struct stkstr {
36 	int opno;	/* number of opening bracket */
37 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
38 	int parm;	/* parm to size, font, etc */
39 	int lno;	/* line number the thing came in in */
40 } stk[MAXSTK];
41 int stktop;
42 
43 /*
44  * The kinds of opening and closing brackets.
45  */
46 struct brstr {
47 	char *opbr;
48 	char *clbr;
49 } br[MAXBR] = {
50 	/* A few bare bones troff commands */
51 #define SZ	0
52 	"sz",	"sz",	/* also \s */
53 #define FT	1
54 	"ft",	"ft",	/* also \f */
55 	/* the -mm package */
56 	"AL",	"LE",
57 	"AS",	"AE",
58 	"BL",	"LE",
59 	"BS",	"BE",
60 	"DF",	"DE",
61 	"DL",	"LE",
62 	"DS",	"DE",
63 	"FS",	"FE",
64 	"ML",	"LE",
65 	"NS",	"NE",
66 	"RL",	"LE",
67 	"VL",	"LE",
68 	/* the -ms package */
69 	"AB",	"AE",
70 	"BD",	"DE",
71 	"CD",	"DE",
72 	"DS",	"DE",
73 	"FS",	"FE",
74 	"ID",	"DE",
75 	"KF",	"KE",
76 	"KS",	"KE",
77 	"LD",	"DE",
78 	"LG",	"NL",
79 	"QS",	"QE",
80 	"RS",	"RE",
81 	"SM",	"NL",
82 	"XA",	"XE",
83 	"XS",	"XE",
84 	/* The -me package */
85 	"(b",	")b",
86 	"(c",	")c",
87 	"(d",	")d",
88 	"(f",	")f",
89 	"(l",	")l",
90 	"(q",	")q",
91 	"(x",	")x",
92 	"(z",	")z",
93 	/* Things needed by preprocessors */
94 	"EQ",	"EN",
95 	"TS",	"TE",
96 	/* Refer */
97 	"[",	"]",
98 	0,	0
99 };
100 
101 /*
102  * All commands known to nroff, plus macro packages.
103  * Used so we can complain about unrecognized commands.
104  */
105 char *knowncmds[MAXCMDS] = {
106 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
107 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
108 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
109 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
110 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
111 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
112 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
113 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
114 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
115 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
116 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
117 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
118 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
119 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
120 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
121 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
122 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
123 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
124 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
125 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
126 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
127 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
128 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
129 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
130 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
131 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
132 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
133 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
134 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
135 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
136 "yr", 0
137 };
138 
139 int	lineno;		/* current line number in input file */
140 char	line[256];	/* the current line */
141 char	*cfilename;	/* name of current file */
142 int	nfiles;		/* number of files to process */
143 int	fflag;		/* -f: ignore \f */
144 int	sflag;		/* -s: ignore \s */
145 int	ncmds;		/* size of knowncmds */
146 int	slot;		/* slot in knowncmds found by binsrch */
147 
148 char	*malloc();
149 
150 main(argc, argv)
151 int argc;
152 char **argv;
153 {
154 	FILE *f;
155 	int i;
156 	char *cp;
157 	char b1[4];
158 
159 	/* Figure out how many known commands there are */
160 	while (knowncmds[ncmds])
161 		ncmds++;
162 	while (argc > 1 && argv[1][0] == '-') {
163 		switch(argv[1][1]) {
164 
165 		/* -a: add pairs of macros */
166 		case 'a':
167 			i = strlen(argv[1]) - 2;
168 			if (i % 6 != 0)
169 				usage();
170 			/* look for empty macro slots */
171 			for (i=0; br[i].opbr; i++)
172 				;
173 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
174 				br[i].opbr = malloc(3);
175 				strncpy(br[i].opbr, cp, 2);
176 				br[i].clbr = malloc(3);
177 				strncpy(br[i].clbr, cp+3, 2);
178 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
179 				addmac(br[i].clbr);
180 				i++;
181 			}
182 			break;
183 
184 		/* -c: add known commands */
185 		case 'c':
186 			i = strlen(argv[1]) - 2;
187 			if (i % 3 != 0)
188 				usage();
189 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
190 				if (cp[2] && cp[2] != '.')
191 					usage();
192 				strncpy(b1, cp, 2);
193 				addmac(b1);
194 			}
195 			break;
196 
197 		/* -f: ignore font changes */
198 		case 'f':
199 			fflag = 1;
200 			break;
201 
202 		/* -s: ignore size changes */
203 		case 's':
204 			sflag = 1;
205 			break;
206 		default:
207 			usage();
208 		}
209 		argc--; argv++;
210 	}
211 
212 	nfiles = argc - 1;
213 
214 	if (nfiles > 0) {
215 		for (i=1; i<argc; i++) {
216 			cfilename = argv[i];
217 			f = fopen(cfilename, "r");
218 			if (f == NULL)
219 				perror(cfilename);
220 			else
221 				process(f);
222 		}
223 	} else {
224 		cfilename = "stdin";
225 		process(stdin);
226 	}
227 	exit(0);
228 }
229 
230 usage()
231 {
232 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
233 	exit(1);
234 }
235 
236 process(f)
237 FILE *f;
238 {
239 	register int i, n;
240 	char mac[5];	/* The current macro or nroff command */
241 	int pl;
242 
243 	stktop = -1;
244 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
245 		if (line[0] == '.') {
246 			/*
247 			 * find and isolate the macro/command name.
248 			 */
249 			strncpy(mac, line+1, 4);
250 			if (isspace(mac[0])) {
251 				pe(lineno);
252 				printf("Empty command\n");
253 			} else if (isspace(mac[1])) {
254 				mac[1] = 0;
255 			} else if (isspace(mac[2])) {
256 				mac[2] = 0;
257 			} else if (mac[0] != '\\' || mac[1] != '\"') {
258 				pe(lineno);
259 				printf("Command too long\n");
260 			}
261 
262 			/*
263 			 * Is it a known command?
264 			 */
265 			checkknown(mac);
266 
267 			/*
268 			 * Should we add it?
269 			 */
270 			if (eq(mac, "de"))
271 				addcmd(line);
272 
273 			chkcmd(line, mac);
274 		}
275 
276 		/*
277 		 * At this point we process the line looking
278 		 * for \s and \f.
279 		 */
280 		for (i=0; line[i]; i++)
281 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
282 				if (!sflag && line[++i]=='s') {
283 					pl = line[++i];
284 					if (isdigit(pl)) {
285 						n = pl - '0';
286 						pl = ' ';
287 					} else
288 						n = 0;
289 					while (isdigit(line[++i]))
290 						n = 10 * n + line[i] - '0';
291 					i--;
292 					if (n == 0) {
293 						if (stk[stktop].opno == SZ) {
294 							stktop--;
295 						} else {
296 							pe(lineno);
297 							printf("unmatched \\s0\n");
298 						}
299 					} else {
300 						stk[++stktop].opno = SZ;
301 						stk[stktop].pl = pl;
302 						stk[stktop].parm = n;
303 						stk[stktop].lno = lineno;
304 					}
305 				} else if (!fflag && line[i]=='f') {
306 					n = line[++i];
307 					if (n == 'P') {
308 						if (stk[stktop].opno == FT) {
309 							stktop--;
310 						} else {
311 							pe(lineno);
312 							printf("unmatched \\fP\n");
313 						}
314 					} else {
315 						stk[++stktop].opno = FT;
316 						stk[stktop].pl = 1;
317 						stk[stktop].parm = n;
318 						stk[stktop].lno = lineno;
319 					}
320 				}
321 			}
322 	}
323 	/*
324 	 * We've hit the end and look at all this stuff that hasn't been
325 	 * matched yet!  Complain, complain.
326 	 */
327 	for (i=stktop; i>=0; i--) {
328 		complain(i);
329 	}
330 }
331 
332 complain(i)
333 {
334 	pe(stk[i].lno);
335 	printf("Unmatched ");
336 	prop(i);
337 	printf("\n");
338 }
339 
340 prop(i)
341 {
342 	if (stk[i].pl == 0)
343 		printf(".%s", br[stk[i].opno].opbr);
344 	else switch(stk[i].opno) {
345 	case SZ:
346 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
347 		break;
348 	case FT:
349 		printf("\\f%c", stk[i].parm);
350 		break;
351 	default:
352 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
353 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
354 	}
355 }
356 
357 chkcmd(line, mac)
358 char *line;
359 char *mac;
360 {
361 	register int i, n;
362 
363 	/*
364 	 * Check to see if it matches top of stack.
365 	 */
366 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
367 		stktop--;	/* OK. Pop & forget */
368 	else {
369 		/* No. Maybe it's an opener */
370 		for (i=0; br[i].opbr; i++) {
371 			if (eq(mac, br[i].opbr)) {
372 				/* Found. Push it. */
373 				stktop++;
374 				stk[stktop].opno = i;
375 				stk[stktop].pl = 0;
376 				stk[stktop].parm = 0;
377 				stk[stktop].lno = lineno;
378 				break;
379 			}
380 			/*
381 			 * Maybe it's an unmatched closer.
382 			 * NOTE: this depends on the fact
383 			 * that none of the closers can be
384 			 * openers too.
385 			 */
386 			if (eq(mac, br[i].clbr)) {
387 				nomatch(mac);
388 				break;
389 			}
390 		}
391 	}
392 }
393 
394 nomatch(mac)
395 char *mac;
396 {
397 	register int i, j;
398 
399 	/*
400 	 * Look for a match further down on stack
401 	 * If we find one, it suggests that the stuff in
402 	 * between is supposed to match itself.
403 	 */
404 	for (j=stktop; j>=0; j--)
405 		if (eq(mac,br[stk[j].opno].clbr)) {
406 			/* Found.  Make a good diagnostic. */
407 			if (j == stktop-2) {
408 				/*
409 				 * Check for special case \fx..\fR and don't
410 				 * complain.
411 				 */
412 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
413 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
414 					stktop = j -1;
415 					return;
416 				}
417 				/*
418 				 * We have two unmatched frobs.  Chances are
419 				 * they were intended to match, so we mention
420 				 * them together.
421 				 */
422 				pe(stk[j+1].lno);
423 				prop(j+1);
424 				printf(" does not match %d: ", stk[j+2].lno);
425 				prop(j+2);
426 				printf("\n");
427 			} else for (i=j+1; i <= stktop; i++) {
428 				complain(i);
429 			}
430 			stktop = j-1;
431 			return;
432 		}
433 	/* Didn't find one.  Throw this away. */
434 	pe(lineno);
435 	printf("Unmatched .%s\n", mac);
436 }
437 
438 /* eq: are two strings equal? */
439 eq(s1, s2)
440 char *s1, *s2;
441 {
442 	return (strcmp(s1, s2) == 0);
443 }
444 
445 /* print the first part of an error message, given the line number */
446 pe(lineno)
447 int lineno;
448 {
449 	if (nfiles > 1)
450 		printf("%s: ", cfilename);
451 	printf("%d: ", lineno);
452 }
453 
454 checkknown(mac)
455 char *mac;
456 {
457 
458 	if (eq(mac, "."))
459 		return;
460 	if (binsrch(mac) >= 0)
461 		return;
462 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
463 		return;
464 
465 	pe(lineno);
466 	printf("Unknown command: .%s\n", mac);
467 }
468 
469 /*
470  * We have a .de xx line in "line".  Add xx to the list of known commands.
471  */
472 addcmd(line)
473 char *line;
474 {
475 	char *mac;
476 
477 	/* grab the macro being defined */
478 	mac = line+4;
479 	while (isspace(*mac))
480 		mac++;
481 	if (*mac == 0) {
482 		pe(lineno);
483 		printf("illegal define: %s\n", line);
484 		return;
485 	}
486 	mac[2] = 0;
487 	if (isspace(mac[1]) || mac[1] == '\\')
488 		mac[1] = 0;
489 	if (ncmds >= MAXCMDS) {
490 		printf("Only %d known commands allowed\n", MAXCMDS);
491 		exit(1);
492 	}
493 	addmac(mac);
494 }
495 
496 /*
497  * Add mac to the list.  We should really have some kind of tree
498  * structure here but this is a quick-and-dirty job and I just don't
499  * have time to mess with it.  (I wonder if this will come back to haunt
500  * me someday?)  Anyway, I claim that .de is fairly rare in user
501  * nroff programs, and the register loop below is pretty fast.
502  */
503 addmac(mac)
504 char *mac;
505 {
506 	register char **src, **dest, **loc;
507 
508 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
509 #ifdef DEBUG
510 		printf("binsrch(%s) -> already in table\n", mac);
511 #endif DEBUG
512 		return;
513 	}
514 	/* binsrch sets slot as a side effect */
515 #ifdef DEBUG
516 printf("binsrch(%s) -> %d\n", mac, slot);
517 #endif
518 	loc = &knowncmds[slot];
519 	src = &knowncmds[ncmds-1];
520 	dest = src+1;
521 	while (dest > loc)
522 		*dest-- = *src--;
523 	*loc = malloc(3);
524 	strcpy(*loc, mac);
525 	ncmds++;
526 #ifdef DEBUG
527 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
528 #endif
529 }
530 
531 /*
532  * Do a binary search in knowncmds for mac.
533  * If found, return the index.  If not, return -1.
534  */
535 binsrch(mac)
536 char *mac;
537 {
538 	register char *p;	/* pointer to current cmd in list */
539 	register int d;		/* difference if any */
540 	register int mid;	/* mid point in binary search */
541 	register int top, bot;	/* boundaries of bin search, inclusive */
542 
543 	top = ncmds-1;
544 	bot = 0;
545 	while (top >= bot) {
546 		mid = (top+bot)/2;
547 		p = knowncmds[mid];
548 		d = p[0] - mac[0];
549 		if (d == 0)
550 			d = p[1] - mac[1];
551 		if (d == 0)
552 			return mid;
553 		if (d < 0)
554 			bot = mid + 1;
555 		else
556 			top = mid - 1;
557 	}
558 	slot = bot;	/* place it would have gone */
559 	return -1;
560 }
561