xref: /original-bsd/usr.bin/checknr/checknr.c (revision a76afa45)
1 /*
2  * Copyright (c) 1980 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  */
17 
18 #ifndef lint
19 char copyright[] =
20 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\
21  All rights reserved.\n";
22 #endif /* not lint */
23 
24 #ifndef lint
25 static char sccsid[] = "@(#)checknr.c	5.3 (Berkeley) 10/30/88";
26 #endif /* not lint */
27 
28 /*
29  * checknr: check an nroff/troff input file for matching macro calls.
30  * we also attempt to match size and font changes, but only the embedded
31  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
32  * later but for now think of these restrictions as contributions to
33  * structured typesetting.
34  */
35 #include <stdio.h>
36 #include <ctype.h>
37 
38 #define MAXSTK	100	/* Stack size */
39 #define MAXBR	100	/* Max number of bracket pairs known */
40 #define MAXCMDS	500	/* Max number of commands known */
41 
42 /*
43  * The stack on which we remember what we've seen so far.
44  */
45 struct stkstr {
46 	int opno;	/* number of opening bracket */
47 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
48 	int parm;	/* parm to size, font, etc */
49 	int lno;	/* line number the thing came in in */
50 } stk[MAXSTK];
51 int stktop;
52 
53 /*
54  * The kinds of opening and closing brackets.
55  */
56 struct brstr {
57 	char *opbr;
58 	char *clbr;
59 } br[MAXBR] = {
60 	/* A few bare bones troff commands */
61 #define SZ	0
62 	"sz",	"sz",	/* also \s */
63 #define FT	1
64 	"ft",	"ft",	/* also \f */
65 	/* the -mm package */
66 	"AL",	"LE",
67 	"AS",	"AE",
68 	"BL",	"LE",
69 	"BS",	"BE",
70 	"DF",	"DE",
71 	"DL",	"LE",
72 	"DS",	"DE",
73 	"FS",	"FE",
74 	"ML",	"LE",
75 	"NS",	"NE",
76 	"RL",	"LE",
77 	"VL",	"LE",
78 	/* the -ms package */
79 	"AB",	"AE",
80 	"BD",	"DE",
81 	"CD",	"DE",
82 	"DS",	"DE",
83 	"FS",	"FE",
84 	"ID",	"DE",
85 	"KF",	"KE",
86 	"KS",	"KE",
87 	"LD",	"DE",
88 	"LG",	"NL",
89 	"QS",	"QE",
90 	"RS",	"RE",
91 	"SM",	"NL",
92 	"XA",	"XE",
93 	"XS",	"XE",
94 	/* The -me package */
95 	"(b",	")b",
96 	"(c",	")c",
97 	"(d",	")d",
98 	"(f",	")f",
99 	"(l",	")l",
100 	"(q",	")q",
101 	"(x",	")x",
102 	"(z",	")z",
103 	/* Things needed by preprocessors */
104 	"EQ",	"EN",
105 	"TS",	"TE",
106 	/* Refer */
107 	"[",	"]",
108 	0,	0
109 };
110 
111 /*
112  * All commands known to nroff, plus macro packages.
113  * Used so we can complain about unrecognized commands.
114  */
115 char *knowncmds[MAXCMDS] = {
116 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
117 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
118 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
119 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
120 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
121 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
122 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
123 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
124 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
125 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
126 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
127 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
128 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
129 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
130 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
131 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
132 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
133 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
134 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
135 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
136 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
137 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
138 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
139 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
140 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
141 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
142 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
143 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
144 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
145 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
146 "yr", 0
147 };
148 
149 int	lineno;		/* current line number in input file */
150 char	line[256];	/* the current line */
151 char	*cfilename;	/* name of current file */
152 int	nfiles;		/* number of files to process */
153 int	fflag;		/* -f: ignore \f */
154 int	sflag;		/* -s: ignore \s */
155 int	ncmds;		/* size of knowncmds */
156 int	slot;		/* slot in knowncmds found by binsrch */
157 
158 char	*malloc();
159 
160 main(argc, argv)
161 int argc;
162 char **argv;
163 {
164 	FILE *f;
165 	int i;
166 	char *cp;
167 	char b1[4];
168 
169 	/* Figure out how many known commands there are */
170 	while (knowncmds[ncmds])
171 		ncmds++;
172 	while (argc > 1 && argv[1][0] == '-') {
173 		switch(argv[1][1]) {
174 
175 		/* -a: add pairs of macros */
176 		case 'a':
177 			i = strlen(argv[1]) - 2;
178 			if (i % 6 != 0)
179 				usage();
180 			/* look for empty macro slots */
181 			for (i=0; br[i].opbr; i++)
182 				;
183 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
184 				br[i].opbr = malloc(3);
185 				strncpy(br[i].opbr, cp, 2);
186 				br[i].clbr = malloc(3);
187 				strncpy(br[i].clbr, cp+3, 2);
188 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
189 				addmac(br[i].clbr);
190 				i++;
191 			}
192 			break;
193 
194 		/* -c: add known commands */
195 		case 'c':
196 			i = strlen(argv[1]) - 2;
197 			if (i % 3 != 0)
198 				usage();
199 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
200 				if (cp[2] && cp[2] != '.')
201 					usage();
202 				strncpy(b1, cp, 2);
203 				addmac(b1);
204 			}
205 			break;
206 
207 		/* -f: ignore font changes */
208 		case 'f':
209 			fflag = 1;
210 			break;
211 
212 		/* -s: ignore size changes */
213 		case 's':
214 			sflag = 1;
215 			break;
216 		default:
217 			usage();
218 		}
219 		argc--; argv++;
220 	}
221 
222 	nfiles = argc - 1;
223 
224 	if (nfiles > 0) {
225 		for (i=1; i<argc; i++) {
226 			cfilename = argv[i];
227 			f = fopen(cfilename, "r");
228 			if (f == NULL)
229 				perror(cfilename);
230 			else
231 				process(f);
232 		}
233 	} else {
234 		cfilename = "stdin";
235 		process(stdin);
236 	}
237 	exit(0);
238 }
239 
240 usage()
241 {
242 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
243 	exit(1);
244 }
245 
246 process(f)
247 FILE *f;
248 {
249 	register int i, n;
250 	char mac[5];	/* The current macro or nroff command */
251 	int pl;
252 
253 	stktop = -1;
254 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
255 		if (line[0] == '.') {
256 			/*
257 			 * find and isolate the macro/command name.
258 			 */
259 			strncpy(mac, line+1, 4);
260 			if (isspace(mac[0])) {
261 				pe(lineno);
262 				printf("Empty command\n");
263 			} else if (isspace(mac[1])) {
264 				mac[1] = 0;
265 			} else if (isspace(mac[2])) {
266 				mac[2] = 0;
267 			} else if (mac[0] != '\\' || mac[1] != '\"') {
268 				pe(lineno);
269 				printf("Command too long\n");
270 			}
271 
272 			/*
273 			 * Is it a known command?
274 			 */
275 			checkknown(mac);
276 
277 			/*
278 			 * Should we add it?
279 			 */
280 			if (eq(mac, "de"))
281 				addcmd(line);
282 
283 			chkcmd(line, mac);
284 		}
285 
286 		/*
287 		 * At this point we process the line looking
288 		 * for \s and \f.
289 		 */
290 		for (i=0; line[i]; i++)
291 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
292 				if (!sflag && line[++i]=='s') {
293 					pl = line[++i];
294 					if (isdigit(pl)) {
295 						n = pl - '0';
296 						pl = ' ';
297 					} else
298 						n = 0;
299 					while (isdigit(line[++i]))
300 						n = 10 * n + line[i] - '0';
301 					i--;
302 					if (n == 0) {
303 						if (stk[stktop].opno == SZ) {
304 							stktop--;
305 						} else {
306 							pe(lineno);
307 							printf("unmatched \\s0\n");
308 						}
309 					} else {
310 						stk[++stktop].opno = SZ;
311 						stk[stktop].pl = pl;
312 						stk[stktop].parm = n;
313 						stk[stktop].lno = lineno;
314 					}
315 				} else if (!fflag && line[i]=='f') {
316 					n = line[++i];
317 					if (n == 'P') {
318 						if (stk[stktop].opno == FT) {
319 							stktop--;
320 						} else {
321 							pe(lineno);
322 							printf("unmatched \\fP\n");
323 						}
324 					} else {
325 						stk[++stktop].opno = FT;
326 						stk[stktop].pl = 1;
327 						stk[stktop].parm = n;
328 						stk[stktop].lno = lineno;
329 					}
330 				}
331 			}
332 	}
333 	/*
334 	 * We've hit the end and look at all this stuff that hasn't been
335 	 * matched yet!  Complain, complain.
336 	 */
337 	for (i=stktop; i>=0; i--) {
338 		complain(i);
339 	}
340 }
341 
342 complain(i)
343 {
344 	pe(stk[i].lno);
345 	printf("Unmatched ");
346 	prop(i);
347 	printf("\n");
348 }
349 
350 prop(i)
351 {
352 	if (stk[i].pl == 0)
353 		printf(".%s", br[stk[i].opno].opbr);
354 	else switch(stk[i].opno) {
355 	case SZ:
356 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
357 		break;
358 	case FT:
359 		printf("\\f%c", stk[i].parm);
360 		break;
361 	default:
362 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
363 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
364 	}
365 }
366 
367 chkcmd(line, mac)
368 char *line;
369 char *mac;
370 {
371 	register int i, n;
372 
373 	/*
374 	 * Check to see if it matches top of stack.
375 	 */
376 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
377 		stktop--;	/* OK. Pop & forget */
378 	else {
379 		/* No. Maybe it's an opener */
380 		for (i=0; br[i].opbr; i++) {
381 			if (eq(mac, br[i].opbr)) {
382 				/* Found. Push it. */
383 				stktop++;
384 				stk[stktop].opno = i;
385 				stk[stktop].pl = 0;
386 				stk[stktop].parm = 0;
387 				stk[stktop].lno = lineno;
388 				break;
389 			}
390 			/*
391 			 * Maybe it's an unmatched closer.
392 			 * NOTE: this depends on the fact
393 			 * that none of the closers can be
394 			 * openers too.
395 			 */
396 			if (eq(mac, br[i].clbr)) {
397 				nomatch(mac);
398 				break;
399 			}
400 		}
401 	}
402 }
403 
404 nomatch(mac)
405 char *mac;
406 {
407 	register int i, j;
408 
409 	/*
410 	 * Look for a match further down on stack
411 	 * If we find one, it suggests that the stuff in
412 	 * between is supposed to match itself.
413 	 */
414 	for (j=stktop; j>=0; j--)
415 		if (eq(mac,br[stk[j].opno].clbr)) {
416 			/* Found.  Make a good diagnostic. */
417 			if (j == stktop-2) {
418 				/*
419 				 * Check for special case \fx..\fR and don't
420 				 * complain.
421 				 */
422 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
423 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
424 					stktop = j -1;
425 					return;
426 				}
427 				/*
428 				 * We have two unmatched frobs.  Chances are
429 				 * they were intended to match, so we mention
430 				 * them together.
431 				 */
432 				pe(stk[j+1].lno);
433 				prop(j+1);
434 				printf(" does not match %d: ", stk[j+2].lno);
435 				prop(j+2);
436 				printf("\n");
437 			} else for (i=j+1; i <= stktop; i++) {
438 				complain(i);
439 			}
440 			stktop = j-1;
441 			return;
442 		}
443 	/* Didn't find one.  Throw this away. */
444 	pe(lineno);
445 	printf("Unmatched .%s\n", mac);
446 }
447 
448 /* eq: are two strings equal? */
449 eq(s1, s2)
450 char *s1, *s2;
451 {
452 	return (strcmp(s1, s2) == 0);
453 }
454 
455 /* print the first part of an error message, given the line number */
456 pe(lineno)
457 int lineno;
458 {
459 	if (nfiles > 1)
460 		printf("%s: ", cfilename);
461 	printf("%d: ", lineno);
462 }
463 
464 checkknown(mac)
465 char *mac;
466 {
467 
468 	if (eq(mac, "."))
469 		return;
470 	if (binsrch(mac) >= 0)
471 		return;
472 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
473 		return;
474 
475 	pe(lineno);
476 	printf("Unknown command: .%s\n", mac);
477 }
478 
479 /*
480  * We have a .de xx line in "line".  Add xx to the list of known commands.
481  */
482 addcmd(line)
483 char *line;
484 {
485 	char *mac;
486 
487 	/* grab the macro being defined */
488 	mac = line+4;
489 	while (isspace(*mac))
490 		mac++;
491 	if (*mac == 0) {
492 		pe(lineno);
493 		printf("illegal define: %s\n", line);
494 		return;
495 	}
496 	mac[2] = 0;
497 	if (isspace(mac[1]) || mac[1] == '\\')
498 		mac[1] = 0;
499 	if (ncmds >= MAXCMDS) {
500 		printf("Only %d known commands allowed\n", MAXCMDS);
501 		exit(1);
502 	}
503 	addmac(mac);
504 }
505 
506 /*
507  * Add mac to the list.  We should really have some kind of tree
508  * structure here but this is a quick-and-dirty job and I just don't
509  * have time to mess with it.  (I wonder if this will come back to haunt
510  * me someday?)  Anyway, I claim that .de is fairly rare in user
511  * nroff programs, and the register loop below is pretty fast.
512  */
513 addmac(mac)
514 char *mac;
515 {
516 	register char **src, **dest, **loc;
517 
518 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
519 #ifdef DEBUG
520 		printf("binsrch(%s) -> already in table\n", mac);
521 #endif DEBUG
522 		return;
523 	}
524 	/* binsrch sets slot as a side effect */
525 #ifdef DEBUG
526 printf("binsrch(%s) -> %d\n", mac, slot);
527 #endif
528 	loc = &knowncmds[slot];
529 	src = &knowncmds[ncmds-1];
530 	dest = src+1;
531 	while (dest > loc)
532 		*dest-- = *src--;
533 	*loc = malloc(3);
534 	strcpy(*loc, mac);
535 	ncmds++;
536 #ifdef DEBUG
537 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
538 #endif
539 }
540 
541 /*
542  * Do a binary search in knowncmds for mac.
543  * If found, return the index.  If not, return -1.
544  */
545 binsrch(mac)
546 char *mac;
547 {
548 	register char *p;	/* pointer to current cmd in list */
549 	register int d;		/* difference if any */
550 	register int mid;	/* mid point in binary search */
551 	register int top, bot;	/* boundaries of bin search, inclusive */
552 
553 	top = ncmds-1;
554 	bot = 0;
555 	while (top >= bot) {
556 		mid = (top+bot)/2;
557 		p = knowncmds[mid];
558 		d = p[0] - mac[0];
559 		if (d == 0)
560 			d = p[1] - mac[1];
561 		if (d == 0)
562 			return mid;
563 		if (d < 0)
564 			bot = mid + 1;
565 		else
566 			top = mid - 1;
567 	}
568 	slot = bot;	/* place it would have gone */
569 	return -1;
570 }
571