xref: /original-bsd/usr.bin/checknr/checknr.c (revision f0fd5f8a)
1 static char *sccsid = "@(#)checknr.c	4.5 (Berkeley) 11/07/82";
2 /*
3  * checknr: check an nroff/troff input file for matching macro calls.
4  * we also attempt to match size and font changes, but only the embedded
5  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
6  * later but for now think of these restrictions as contributions to
7  * structured typesetting.
8  */
9 #include <stdio.h>
10 #include <ctype.h>
11 
12 #define MAXSTK	100	/* Stack size */
13 #define MAXBR	100	/* Max number of bracket pairs known */
14 #define MAXCMDS	500	/* Max number of commands known */
15 
16 /*
17  * The stack on which we remember what we've seen so far.
18  */
19 struct stkstr {
20 	int opno;	/* number of opening bracket */
21 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 	int parm;	/* parm to size, font, etc */
23 	int lno;	/* line number the thing came in in */
24 } stk[MAXSTK];
25 int stktop;
26 
27 /*
28  * The kinds of opening and closing brackets.
29  */
30 struct brstr {
31 	char *opbr;
32 	char *clbr;
33 } br[MAXBR] = {
34 	/* A few bare bones troff commands */
35 #define SZ	0
36 	"sz",	"sz",	/* also \s */
37 #define FT	1
38 	"ft",	"ft",	/* also \f */
39 	/* the -mm package */
40 	"AL",	"LE",
41 	"AS",	"AE",
42 	"BL",	"LE",
43 	"BS",	"BE",
44 	"DF",	"DE",
45 	"DL",	"LE",
46 	"DS",	"DE",
47 	"FS",	"FE",
48 	"ML",	"LE",
49 	"NS",	"NE",
50 	"RL",	"LE",
51 	"VL",	"LE",
52 	/* the -ms package */
53 	"AB",	"AE",
54 	"CD",	"DE",
55 	"DS",	"DE",
56 	"FS",	"FE",
57 	"ID",	"DE",
58 	"KF",	"KE",
59 	"KS",	"KE",
60 	"LD",	"DE",
61 	"LG",	"NL",
62 	"QS",	"QE",
63 	"RS",	"RE",
64 	"SM",	"NL",
65 	/* The -me package */
66 	"(b",	")b",
67 	"(c",	")c",
68 	"(d",	")d",
69 	"(f",	")f",
70 	"(l",	")l",
71 	"(q",	")q",
72 	"(x",	")x",
73 	"(z",	")z",
74 	/* Things needed by preprocessors */
75 	"EQ",	"EN",
76 	"TS",	"TE",
77 	/* Refer */
78 	"[",	"]",
79 	0,	0
80 };
81 
82 /*
83  * All commands known to nroff, plus macro packages.
84  * Used so we can complain about unrecognized commands.
85  */
86 char *knowncmds[MAXCMDS] = {
87 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
88 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
89 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
90 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
91 "AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B",  "B1", "B2", "BD",
92 "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
93 "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ",
94 "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS",
95 "FV", "FX", "H",  "HC", "HM", "HO", "HU", "I",  "ID", "IE", "IH", "IM",
96 "IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG",
97 "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH",
98 "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",  "PF", "PH", "PP", "PT",
99 "PY", "QE", "QP", "QS", "R",  "RA", "RC", "RE", "RL", "RP", "RQ", "RS",
100 "RT", "S",  "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY",
101 "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS",
102 "TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[",  "[-",
103 "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
104 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
105 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
106 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
107 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
108 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
109 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
110 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
111 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
112 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
113 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
114 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
115 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp", "yr",
116 0
117 };
118 
119 int	lineno;		/* current line number in input file */
120 char	line[256];	/* the current line */
121 char	*cfilename;	/* name of current file */
122 int	nfiles;		/* number of files to process */
123 int	fflag;		/* -f: ignore \f */
124 int	sflag;		/* -s: ignore \s */
125 int	ncmds;		/* size of knowncmds */
126 int	slot;		/* slot in knowncmds found by binsrch */
127 
128 char	*malloc();
129 
130 main(argc, argv)
131 int argc;
132 char **argv;
133 {
134 	FILE *f;
135 	int i;
136 	char *cp;
137 	char b1[4];
138 
139 	/* Figure out how many known commands there are */
140 	while (knowncmds[ncmds])
141 		ncmds++;
142 	while (argc > 1 && argv[1][0] == '-') {
143 		switch(argv[1][1]) {
144 
145 		/* -a: add pairs of macros */
146 		case 'a':
147 			i = strlen(argv[1]) - 2;
148 			if (i % 6 != 0)
149 				usage();
150 			/* look for empty macro slots */
151 			for (i=0; br[i].opbr; i++)
152 				;
153 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
154 				br[i].opbr = malloc(3);
155 				strncpy(br[i].opbr, cp, 2);
156 				br[i].clbr = malloc(3);
157 				strncpy(br[i].clbr, cp+3, 2);
158 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
159 				addmac(br[i].clbr);
160 				i++;
161 			}
162 			break;
163 
164 		/* -c: add known commands */
165 		case 'c':
166 			i = strlen(argv[1]) - 2;
167 			if (i % 3 != 0)
168 				usage();
169 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
170 				if (cp[2] && cp[2] != '.')
171 					usage();
172 				strncpy(b1, cp, 2);
173 				addmac(b1);
174 			}
175 			break;
176 
177 		/* -f: ignore font changes */
178 		case 'f':
179 			fflag = 1;
180 			break;
181 
182 		/* -s: ignore size changes */
183 		case 's':
184 			sflag = 1;
185 			break;
186 		default:
187 			usage();
188 		}
189 		argc--; argv++;
190 	}
191 
192 	nfiles = argc - 1;
193 
194 	if (nfiles > 0) {
195 		for (i=1; i<argc; i++) {
196 			cfilename = argv[i];
197 			f = fopen(cfilename, "r");
198 			if (f == NULL)
199 				perror(cfilename);
200 			else
201 				process(f);
202 		}
203 	} else {
204 		cfilename = "stdin";
205 		process(stdin);
206 	}
207 	exit(0);
208 }
209 
210 usage()
211 {
212 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
213 	exit(1);
214 }
215 
216 process(f)
217 FILE *f;
218 {
219 	register int i, n;
220 	char mac[5];	/* The current macro or nroff command */
221 	int pl;
222 
223 	stktop = -1;
224 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
225 		if (line[0] == '.') {
226 			/*
227 			 * find and isolate the macro/command name.
228 			 */
229 			strncpy(mac, line+1, 4);
230 			if (isspace(mac[0])) {
231 				pe(lineno);
232 				printf("Empty command\n");
233 			} else if (isspace(mac[1])) {
234 				mac[1] = 0;
235 			} else if (isspace(mac[2])) {
236 				mac[2] = 0;
237 			} else if (mac[0] != '\\' || mac[1] != '\"') {
238 				pe(lineno);
239 				printf("Command too long\n");
240 			}
241 
242 			/*
243 			 * Is it a known command?
244 			 */
245 			checkknown(mac);
246 
247 			/*
248 			 * Should we add it?
249 			 */
250 			if (eq(mac, "de"))
251 				addcmd(line);
252 
253 			chkcmd(line, mac);
254 		}
255 
256 		/*
257 		 * At this point we process the line looking
258 		 * for \s and \f.
259 		 */
260 		for (i=0; line[i]; i++)
261 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
262 				if (!sflag && line[++i]=='s') {
263 					pl = line[++i];
264 					if (isdigit(pl)) {
265 						n = pl - '0';
266 						pl = ' ';
267 					} else
268 						n = 0;
269 					while (isdigit(line[++i]))
270 						n = 10 * n + line[i] - '0';
271 					i--;
272 					if (n == 0) {
273 						if (stk[stktop].opno == SZ) {
274 							stktop--;
275 						} else {
276 							pe(lineno);
277 							printf("unmatched \\s0\n");
278 						}
279 					} else {
280 						stk[++stktop].opno = SZ;
281 						stk[stktop].pl = pl;
282 						stk[stktop].parm = n;
283 						stk[stktop].lno = lineno;
284 					}
285 				} else if (!fflag && line[i]=='f') {
286 					n = line[++i];
287 					if (n == 'P') {
288 						if (stk[stktop].opno == FT) {
289 							stktop--;
290 						} else {
291 							pe(lineno);
292 							printf("unmatched \\fP\n");
293 						}
294 					} else {
295 						stk[++stktop].opno = FT;
296 						stk[stktop].pl = 1;
297 						stk[stktop].parm = n;
298 						stk[stktop].lno = lineno;
299 					}
300 				}
301 			}
302 	}
303 	/*
304 	 * We've hit the end and look at all this stuff that hasn't been
305 	 * matched yet!  Complain, complain.
306 	 */
307 	for (i=stktop; i>=0; i--) {
308 		complain(i);
309 	}
310 }
311 
312 complain(i)
313 {
314 	pe(stk[i].lno);
315 	printf("Unmatched ");
316 	prop(i);
317 	printf("\n");
318 }
319 
320 prop(i)
321 {
322 	if (stk[i].pl == 0)
323 		printf(".%s", br[stk[i].opno].opbr);
324 	else switch(stk[i].opno) {
325 	case SZ:
326 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
327 		break;
328 	case FT:
329 		printf("\\f%c", stk[i].parm);
330 		break;
331 	default:
332 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
333 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
334 	}
335 }
336 
337 chkcmd(line, mac)
338 char *line;
339 char *mac;
340 {
341 	register int i, n;
342 
343 	/*
344 	 * Check to see if it matches top of stack.
345 	 */
346 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
347 		stktop--;	/* OK. Pop & forget */
348 	else {
349 		/* No. Maybe it's an opener */
350 		for (i=0; br[i].opbr; i++) {
351 			if (eq(mac, br[i].opbr)) {
352 				/* Found. Push it. */
353 				stktop++;
354 				stk[stktop].opno = i;
355 				stk[stktop].pl = 0;
356 				stk[stktop].parm = 0;
357 				stk[stktop].lno = lineno;
358 				break;
359 			}
360 			/*
361 			 * Maybe it's an unmatched closer.
362 			 * NOTE: this depends on the fact
363 			 * that none of the closers can be
364 			 * openers too.
365 			 */
366 			if (eq(mac, br[i].clbr)) {
367 				nomatch(mac);
368 				break;
369 			}
370 		}
371 	}
372 }
373 
374 nomatch(mac)
375 char *mac;
376 {
377 	register int i, j;
378 
379 	/*
380 	 * Look for a match further down on stack
381 	 * If we find one, it suggests that the stuff in
382 	 * between is supposed to match itself.
383 	 */
384 	for (j=stktop; j>=0; j--)
385 		if (eq(mac,br[stk[j].opno].clbr)) {
386 			/* Found.  Make a good diagnostic. */
387 			if (j == stktop-2) {
388 				/*
389 				 * Check for special case \fx..\fR and don't
390 				 * complain.
391 				 */
392 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
393 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
394 					stktop = j -1;
395 					return;
396 				}
397 				/*
398 				 * We have two unmatched frobs.  Chances are
399 				 * they were intended to match, so we mention
400 				 * them together.
401 				 */
402 				pe(stk[j+1].lno);
403 				prop(j+1);
404 				printf(" does not match %d: ", stk[j+2].lno);
405 				prop(j+2);
406 				printf("\n");
407 			} else for (i=j+1; i <= stktop; i++) {
408 				complain(i);
409 			}
410 			stktop = j-1;
411 			return;
412 		}
413 	/* Didn't find one.  Throw this away. */
414 	pe(lineno);
415 	printf("Unmatched .%s\n", mac);
416 }
417 
418 /* eq: are two strings equal? */
419 eq(s1, s2)
420 char *s1, *s2;
421 {
422 	return (strcmp(s1, s2) == 0);
423 }
424 
425 /* print the first part of an error message, given the line number */
426 pe(lineno)
427 int lineno;
428 {
429 	if (nfiles > 1)
430 		printf("%s: ", cfilename);
431 	printf("%d: ", lineno);
432 }
433 
434 checkknown(mac)
435 char *mac;
436 {
437 
438 	if (eq(mac, "."))
439 		return;
440 	if (binsrch(mac) >= 0)
441 		return;
442 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
443 		return;
444 
445 	pe(lineno);
446 	printf("Unknown command: .%s\n", mac);
447 }
448 
449 /*
450  * We have a .de xx line in "line".  Add xx to the list of known commands.
451  */
452 addcmd(line)
453 char *line;
454 {
455 	char *mac;
456 
457 	/* grab the macro being defined */
458 	mac = line+4;
459 	while (isspace(*mac))
460 		mac++;
461 	if (*mac == 0) {
462 		pe(lineno);
463 		printf("illegal define: %s\n", line);
464 		return;
465 	}
466 	mac[2] = 0;
467 	if (isspace(mac[1]) || mac[1] == '\\')
468 		mac[1] = 0;
469 	if (ncmds >= MAXCMDS) {
470 		printf("Only %d known commands allowed\n", MAXCMDS);
471 		exit(1);
472 	}
473 	addmac(mac);
474 }
475 
476 /*
477  * Add mac to the list.  We should really have some kind of tree
478  * structure here but this is a quick-and-dirty job and I just don't
479  * have time to mess with it.  (I wonder if this will come back to haunt
480  * me someday?)  Anyway, I claim that .de is fairly rare in user
481  * nroff programs, and the register loop below is pretty fast.
482  */
483 addmac(mac)
484 char *mac;
485 {
486 	register char **src, **dest, **loc;
487 
488 	binsrch(mac);	/* it's OK to redefine something */
489 	/* binsrch sets slot as a side effect */
490 #ifdef DEBUG
491 printf("binsrch(%s) -> %d\n", mac, slot);
492 #endif
493 	loc = &knowncmds[slot];
494 	src = &knowncmds[ncmds-1];
495 	dest = src+1;
496 	while (dest > loc)
497 		*dest-- = *src--;
498 	*loc = malloc(3);
499 	strcpy(*loc, mac);
500 	ncmds++;
501 #ifdef DEBUG
502 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
503 #endif
504 }
505 
506 /*
507  * Do a binary search in knowncmds for mac.
508  * If found, return the index.  If not, return -1.
509  */
510 binsrch(mac)
511 char *mac;
512 {
513 	register char *p;	/* pointer to current cmd in list */
514 	register int d;		/* difference if any */
515 	register int mid;	/* mid point in binary search */
516 	register int top, bot;	/* boundaries of bin search, inclusive */
517 
518 	top = ncmds-1;
519 	bot = 0;
520 	while (top >= bot) {
521 		mid = (top+bot)/2;
522 		p = knowncmds[mid];
523 		d = p[0] - mac[0];
524 		if (d == 0)
525 			d = p[1] - mac[1];
526 		if (d == 0)
527 			return mid;
528 		if (d < 0)
529 			bot = mid + 1;
530 		else
531 			top = mid - 1;
532 	}
533 	slot = bot;	/* place it would have gone */
534 	return -1;
535 }
536 
537 
538