xref: /original-bsd/usr.bin/checknr/checknr.c (revision 957a0273)
1 static char *sccsid = "@(#)checknr.c	4.4 (Berkeley) 05/13/81";
2 /*
3  * checknr: check an nroff/troff input file for matching macro calls.
4  * we also attempt to match size and font changes, but only the embedded
5  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
6  * later but for now think of these restrictions as contributions to
7  * structured typesetting.
8  */
9 #include <stdio.h>
10 #include <ctype.h>
11 
12 #define MAXSTK	100	/* Stack size */
13 #define MAXBR	100	/* Max number of bracket pairs known */
14 #define MAXCMDS	500	/* Max number of commands known */
15 
16 /*
17  * The stack on which we remember what we've seen so far.
18  */
19 struct stkstr {
20 	int opno;	/* number of opening bracket */
21 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 	int parm;	/* parm to size, font, etc */
23 	int lno;	/* line number the thing came in in */
24 } stk[MAXSTK];
25 int stktop;
26 
27 /*
28  * The kinds of opening and closing brackets.
29  */
30 struct brstr {
31 	char *opbr;
32 	char *clbr;
33 } br[MAXBR] = {
34 	/* A few bare bones troff commands */
35 #define SZ	0
36 	"sz",	"sz",	/* also \s */
37 #define FT	1
38 	"ft",	"ft",	/* also \f */
39 	/* the -mm package */
40 	"AL",	"LE",
41 	"AS",	"AE",
42 	"BL",	"LE",
43 	"BS",	"BE",
44 	"DF",	"DE",
45 	"DL",	"LE",
46 	"DS",	"DE",
47 	"FS",	"FE",
48 	"ML",	"LE",
49 	"NS",	"NE",
50 	"RL",	"LE",
51 	"VL",	"LE",
52 	/* the -ms package */
53 	"AB",	"AE",
54 	"CD",	"DE",
55 	"DS",	"DE",
56 	"FS",	"FE",
57 	"ID",	"DE",
58 	"KF",	"KE",
59 	"KS",	"KE",
60 	"LD",	"DE",
61 	"LG",	"NL",
62 	"QS",	"QE",
63 	"RS",	"RE",
64 	"SM",	"NL",
65 	/* The -me package */
66 	"(b",	")b",
67 	"(c",	")c",
68 	"(d",	")d",
69 	"(f",	")f",
70 	"(l",	")l",
71 	"(q",	")q",
72 	"(x",	")x",
73 	"(z",	")z",
74 	/* Things needed by preprocessors */
75 	"EQ",	"EN",
76 	"TS",	"TE",
77 	/* Refer */
78 	"[",	"]",
79 	0,	0
80 };
81 
82 /*
83  * All commands known to nroff, plus macro packages.
84  * Used so we can complain about unrecognized commands.
85  */
86 char *knowncmds[MAXCMDS] = {
87 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
88 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
89 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
90 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
91 "AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B",  "B1", "B2", "BD",
92 "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
93 "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ",
94 "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS",
95 "FV", "FX", "H",  "HC", "HM", "HO", "HU", "I",  "ID", "IE", "IH", "IM",
96 "IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG",
97 "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH",
98 "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",  "PF", "PH", "PP", "PT",
99 "PY", "QE", "QP", "QS", "R",  "RA", "RC", "RE", "RL", "RP", "RQ", "RS",
100 "RT", "S",  "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY",
101 "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS",
102 "TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[",  "[-",
103 "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
104 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
105 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
106 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
107 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
108 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
109 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
110 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
111 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
112 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
113 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
114 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
115 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp", "yr",
116 0
117 };
118 
119 int	lineno;		/* current line number in input file */
120 char	line[256];	/* the current line */
121 char	*cfilename;	/* name of current file */
122 int	nfiles;		/* number of files to process */
123 int	fflag;		/* -f: ignore \f */
124 int	sflag;		/* -s: ignore \s */
125 int	ncmds;		/* size of knowncmds */
126 int	slot;		/* slot in knowncmds found by binsrch */
127 
128 char	*malloc();
129 
130 main(argc, argv)
131 int argc;
132 char **argv;
133 {
134 	FILE *f;
135 	int i;
136 	char *cp;
137 	char b1[4];
138 
139 	if (argc <= 1)
140 		usage();
141 	/* Figure out how many known commands there are */
142 	while (knowncmds[ncmds])
143 		ncmds++;
144 	while (argc > 1 && argv[1][0] == '-') {
145 		switch(argv[1][1]) {
146 
147 		/* -a: add pairs of macros */
148 		case 'a':
149 			i = strlen(argv[1]) - 2;
150 			if (i % 6 != 0)
151 				usage();
152 			/* look for empty macro slots */
153 			for (i=0; br[i].opbr; i++)
154 				;
155 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
156 				br[i].opbr = malloc(3);
157 				strncpy(br[i].opbr, cp, 2);
158 				br[i].clbr = malloc(3);
159 				strncpy(br[i].clbr, cp+3, 2);
160 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
161 				addmac(br[i].clbr);
162 				i++;
163 			}
164 			break;
165 
166 		/* -c: add known commands */
167 		case 'c':
168 			i = strlen(argv[1]) - 2;
169 			if (i % 3 != 0)
170 				usage();
171 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
172 				if (cp[2] && cp[2] != '.')
173 					usage();
174 				strncpy(b1, cp, 2);
175 				addmac(b1);
176 			}
177 			break;
178 
179 		/* -f: ignore font changes */
180 		case 'f':
181 			fflag = 1;
182 			break;
183 
184 		/* -s: ignore size changes */
185 		case 's':
186 			sflag = 1;
187 			break;
188 		default:
189 			usage();
190 		}
191 		argc--; argv++;
192 	}
193 
194 	nfiles = argc - 1;
195 
196 	if (nfiles > 0) {
197 		for (i=1; i<argc; i++) {
198 			cfilename = argv[i];
199 			f = fopen(cfilename, "r");
200 			if (f == NULL)
201 				perror(cfilename);
202 			else
203 				process(f);
204 		}
205 	} else {
206 		cfilename = "stdin";
207 		process(stdin);
208 	}
209 	exit(0);
210 }
211 
212 usage()
213 {
214 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
215 	exit(1);
216 }
217 
218 process(f)
219 FILE *f;
220 {
221 	register int i, n;
222 	char mac[5];	/* The current macro or nroff command */
223 	int pl;
224 
225 	stktop = -1;
226 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
227 		if (line[0] == '.') {
228 			/*
229 			 * find and isolate the macro/command name.
230 			 */
231 			strncpy(mac, line+1, 4);
232 			if (isspace(mac[0])) {
233 				pe(lineno);
234 				printf("Empty command\n");
235 			} else if (isspace(mac[1])) {
236 				mac[1] = 0;
237 			} else if (isspace(mac[2])) {
238 				mac[2] = 0;
239 			} else if (mac[0] != '\\' || mac[1] != '\"') {
240 				pe(lineno);
241 				printf("Command too long\n");
242 			}
243 
244 			/*
245 			 * Is it a known command?
246 			 */
247 			checkknown(mac);
248 
249 			/*
250 			 * Should we add it?
251 			 */
252 			if (eq(mac, "de"))
253 				addcmd(line);
254 
255 			chkcmd(line, mac);
256 		}
257 
258 		/*
259 		 * At this point we process the line looking
260 		 * for \s and \f.
261 		 */
262 		for (i=0; line[i]; i++)
263 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
264 				if (!sflag && line[++i]=='s') {
265 					pl = line[++i];
266 					if (isdigit(pl)) {
267 						n = pl - '0';
268 						pl = ' ';
269 					} else
270 						n = 0;
271 					while (isdigit(line[++i]))
272 						n = 10 * n + line[i] - '0';
273 					i--;
274 					if (n == 0) {
275 						if (stk[stktop].opno == SZ) {
276 							stktop--;
277 						} else {
278 							pe(lineno);
279 							printf("unmatched \\s0\n");
280 						}
281 					} else {
282 						stk[++stktop].opno = SZ;
283 						stk[stktop].pl = pl;
284 						stk[stktop].parm = n;
285 						stk[stktop].lno = lineno;
286 					}
287 				} else if (!fflag && line[i]=='f') {
288 					n = line[++i];
289 					if (n == 'P') {
290 						if (stk[stktop].opno == FT) {
291 							stktop--;
292 						} else {
293 							pe(lineno);
294 							printf("unmatched \\fP\n");
295 						}
296 					} else {
297 						stk[++stktop].opno = FT;
298 						stk[stktop].pl = 1;
299 						stk[stktop].parm = n;
300 						stk[stktop].lno = lineno;
301 					}
302 				}
303 			}
304 	}
305 	/*
306 	 * We've hit the end and look at all this stuff that hasn't been
307 	 * matched yet!  Complain, complain.
308 	 */
309 	for (i=stktop; i>=0; i--) {
310 		complain(i);
311 	}
312 }
313 
314 complain(i)
315 {
316 	pe(stk[i].lno);
317 	printf("Unmatched ");
318 	prop(i);
319 	printf("\n");
320 }
321 
322 prop(i)
323 {
324 	if (stk[i].pl == 0)
325 		printf(".%s", br[stk[i].opno].opbr);
326 	else switch(stk[i].opno) {
327 	case SZ:
328 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
329 		break;
330 	case FT:
331 		printf("\\f%c", stk[i].parm);
332 		break;
333 	default:
334 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
335 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
336 	}
337 }
338 
339 chkcmd(line, mac)
340 char *line;
341 char *mac;
342 {
343 	register int i, n;
344 
345 	/*
346 	 * Check to see if it matches top of stack.
347 	 */
348 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
349 		stktop--;	/* OK. Pop & forget */
350 	else {
351 		/* No. Maybe it's an opener */
352 		for (i=0; br[i].opbr; i++) {
353 			if (eq(mac, br[i].opbr)) {
354 				/* Found. Push it. */
355 				stktop++;
356 				stk[stktop].opno = i;
357 				stk[stktop].pl = 0;
358 				stk[stktop].parm = 0;
359 				stk[stktop].lno = lineno;
360 				break;
361 			}
362 			/*
363 			 * Maybe it's an unmatched closer.
364 			 * NOTE: this depends on the fact
365 			 * that none of the closers can be
366 			 * openers too.
367 			 */
368 			if (eq(mac, br[i].clbr)) {
369 				nomatch(mac);
370 				break;
371 			}
372 		}
373 	}
374 }
375 
376 nomatch(mac)
377 char *mac;
378 {
379 	register int i, j;
380 
381 	/*
382 	 * Look for a match further down on stack
383 	 * If we find one, it suggests that the stuff in
384 	 * between is supposed to match itself.
385 	 */
386 	for (j=stktop; j>=0; j--)
387 		if (eq(mac,br[stk[j].opno].clbr)) {
388 			/* Found.  Make a good diagnostic. */
389 			if (j == stktop-2) {
390 				/*
391 				 * Check for special case \fx..\fR and don't
392 				 * complain.
393 				 */
394 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
395 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
396 					stktop = j -1;
397 					return;
398 				}
399 				/*
400 				 * We have two unmatched frobs.  Chances are
401 				 * they were intended to match, so we mention
402 				 * them together.
403 				 */
404 				pe(stk[j+1].lno);
405 				prop(j+1);
406 				printf(" does not match %d: ", stk[j+2].lno);
407 				prop(j+2);
408 				printf("\n");
409 			} else for (i=j+1; i <= stktop; i++) {
410 				complain(i);
411 			}
412 			stktop = j-1;
413 			return;
414 		}
415 	/* Didn't find one.  Throw this away. */
416 	pe(lineno);
417 	printf("Unmatched .%s\n", mac);
418 }
419 
420 /* eq: are two strings equal? */
421 eq(s1, s2)
422 char *s1, *s2;
423 {
424 	return (strcmp(s1, s2) == 0);
425 }
426 
427 /* print the first part of an error message, given the line number */
428 pe(lineno)
429 int lineno;
430 {
431 	if (nfiles > 1)
432 		printf("%s: ", cfilename);
433 	printf("%d: ", lineno);
434 }
435 
436 checkknown(mac)
437 char *mac;
438 {
439 
440 	if (eq(mac, "."))
441 		return;
442 	if (binsrch(mac) >= 0)
443 		return;
444 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
445 		return;
446 
447 	pe(lineno);
448 	printf("Unknown command: .%s\n", mac);
449 }
450 
451 /*
452  * We have a .de xx line in "line".  Add xx to the list of known commands.
453  */
454 addcmd(line)
455 char *line;
456 {
457 	char *mac;
458 
459 	/* grab the macro being defined */
460 	mac = line+4;
461 	while (isspace(*mac))
462 		mac++;
463 	if (*mac == 0) {
464 		pe(lineno);
465 		printf("illegal define: %s\n", line);
466 		return;
467 	}
468 	mac[2] = 0;
469 	if (isspace(mac[1]) || mac[1] == '\\')
470 		mac[1] = 0;
471 	if (ncmds >= MAXCMDS) {
472 		printf("Only %d known commands allowed\n", MAXCMDS);
473 		exit(1);
474 	}
475 	addmac(mac);
476 }
477 
478 /*
479  * Add mac to the list.  We should really have some kind of tree
480  * structure here but this is a quick-and-dirty job and I just don't
481  * have time to mess with it.  (I wonder if this will come back to haunt
482  * me someday?)  Anyway, I claim that .de is fairly rare in user
483  * nroff programs, and the register loop below is pretty fast.
484  */
485 addmac(mac)
486 char *mac;
487 {
488 	register char **src, **dest, **loc;
489 
490 	binsrch(mac);	/* it's OK to redefine something */
491 	/* binsrch sets slot as a side effect */
492 #ifdef DEBUG
493 printf("binsrch(%s) -> %d\n", mac, slot);
494 #endif
495 	loc = &knowncmds[slot];
496 	src = &knowncmds[ncmds-1];
497 	dest = src+1;
498 	while (dest > loc)
499 		*dest-- = *src--;
500 	*loc = malloc(3);
501 	strcpy(*loc, mac);
502 	ncmds++;
503 #ifdef DEBUG
504 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
505 #endif
506 }
507 
508 /*
509  * Do a binary search in knowncmds for mac.
510  * If found, return the index.  If not, return -1.
511  */
512 binsrch(mac)
513 char *mac;
514 {
515 	register char *p;	/* pointer to current cmd in list */
516 	register int d;		/* difference if any */
517 	register int mid;	/* mid point in binary search */
518 	register int top, bot;	/* boundaries of bin search, inclusive */
519 
520 	top = ncmds-1;
521 	bot = 0;
522 	while (top >= bot) {
523 		mid = (top+bot)/2;
524 		p = knowncmds[mid];
525 		d = p[0] - mac[0];
526 		if (d == 0)
527 			d = p[1] - mac[1];
528 		if (d == 0)
529 			return mid;
530 		if (d < 0)
531 			bot = mid + 1;
532 		else
533 			top = mid - 1;
534 	}
535 	slot = bot;	/* place it would have gone */
536 	return -1;
537 }
538 
539 
540