xref: /original-bsd/usr.bin/checknr/checknr.c (revision d25e1985)
1 static char *sccsid = "@(#)checknr.c	4.1 (Berkeley) 10/01/80";
2 /*
3  * checknr: check an nroff/troff input file for matching macro calls.
4  * we also attempt to match size and font changes, but only the embedded
5  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
6  * later but for now think of these restrictions as contributions to
7  * structured typesetting.
8  */
9 #include <stdio.h>
10 #include <ctype.h>
11 
12 #define MAXSTK	100	/* Stack size */
13 #define MAXBR	100	/* Max number of bracket pairs known */
14 #define MAXCMDS	500	/* Max number of commands known */
15 
16 /*
17  * The stack on which we remember what we've seen so far.
18  */
19 struct stkstr {
20 	int opno;	/* number of opening bracket */
21 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 	int parm;	/* parm to size, font, etc */
23 	int lno;	/* line number the thing came in in */
24 } stk[MAXSTK];
25 int stktop;
26 
27 /*
28  * The kinds of opening and closing brackets.
29  */
30 struct brstr {
31 	char *opbr;
32 	char *clbr;
33 } br[MAXBR] = {
34 	/* A few bare bones troff commands */
35 #define SZ	0
36 	"sz",	"sz",	/* also \s */
37 #define FT	1
38 	"ft",	"ft",	/* also \f */
39 	/* the -ms package */
40 	"AB",	"AE",
41 	"RS",	"RE",
42 	"LG",	"NL",
43 	"SM",	"NL",
44 	"FS",	"FE",
45 	"DS",	"DE",
46 	"CD",	"DE",
47 	"LD",	"DE",
48 	"ID",	"DE",
49 	"KS",	"KE",
50 	"KF",	"KE",
51 	"QS",	"QE",
52 	/* Things needed by preprocessors */
53 	"TS",	"TE",
54 	"EQ",	"EN",
55 	/* The -me package */
56 	"(l",	")l",
57 	"(q",	")q",
58 	"(b",	")b",
59 	"(z",	")z",
60 	"(c",	")c",
61 	"(d",	")d",
62 	"(f",	")f",
63 	"(x",	")x",
64 	0,	0
65 };
66 
67 /*
68  * All commands known to nroff, plus ms and me.
69  * Used so we can complain about unrecognized commands.
70  */
71 char *knowncmds[MAXCMDS] = {
72 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l",
73 "(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q",
74 ")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(",
75 "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h",
76 "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AB", "AE",
77 "AE", "AI", "AI", "AT", "AU", "AU", "AX", "B",  "B1", "B2",
78 "BD", "BG", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
79 "DA", "DE", "DF", "DS", "EG", "EM", "EN", "EQ", "EQ", "FA",
80 "FE", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX",
81 "HO", "I",  "ID", "IE", "IH", "IM", "IP", "IZ", "KD", "KE",
82 "KF", "KQ", "KS", "LB", "LD", "LG", "LP", "MC", "ME", "MF",
83 "MH", "MR", "ND", "NH", "NL", "NP", "OK", "PP", "PT", "PY",
84 "QE", "QP", "QS", "R",  "RA", "RC", "RE", "RP", "RQ", "RS",
85 "RT", "S0", "S2", "S3", "SG", "SH", "SM", "SY", "TA", "TC",
86 "TD", "TE", "TH", "TL", "TL", "TM", "TQ", "TR", "TS", "TS",
87 "TX", "UL", "US", "UX", "WH", "XD", "XF", "XK", "XP", "[-",
88 "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]-",
89 "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
90 "b",  "ba", "bc", "bd", "bi", "bl", "bp", "bp", "br", "bx",
91 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da",
92 "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef",
93 "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl",
94 "fo", "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw",
95 "hx", "hy", "i",  "ie", "if", "ig", "in", "ip", "it", "ix",
96 "lc", "lg", "li", "ll", "ll", "ln", "lo", "lp", "ls", "lt",
97 "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na",
98 "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
99 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po",
100 "po", "pp", "ps", "q",  "r",  "rb", "rd", "re", "re", "rm",
101 "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so",
102 "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl",
103 "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "yr",
104 0
105 };
106 
107 int	lineno;		/* current line number in input file */
108 char	line[256];	/* the current line */
109 char	*cfilename;	/* name of current file */
110 int	nfiles;		/* number of files to process */
111 int	fflag;		/* -f: ignore \f */
112 int	sflag;		/* -s: ignore \s */
113 int	ncmds;		/* size of knowncmds */
114 int	slot;		/* slot in knowncmds found by binsrch */
115 
116 char	*malloc();
117 
118 main(argc, argv)
119 int argc;
120 char **argv;
121 {
122 	FILE *f;
123 	int i;
124 	char *cp;
125 
126 	if (argc <= 1)
127 		goto usage;
128 	while (argc > 1 && argv[1][0] == '-') {
129 		switch(argv[1][1]) {
130 		case 'a':
131 			/* -a: add pairs of macros */
132 			i = strlen(argv[1]) - 2;
133 			if (i % 6 != 0) {
134 usage:
135 				printf("Usage: nrc -s -f -a.xx.yy.xx.yy... (.xx, .yy)\n");
136 				break;
137 			}
138 			/* look for empty macro slots */
139 			for (i=0; br[i].opbr; i++)
140 				;
141 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
142 				br[i].opbr = cp;
143 				br[i].clbr = cp+3;
144 				cp[2] = cp[5] = 0;
145 				i++;
146 			}
147 			break;
148 		case 'f':
149 			fflag = 1;
150 			break;
151 		case 's':
152 			sflag = 1;
153 			break;
154 		default:
155 			printf("Illegal flag: %s\n", argv[1]);
156 			break;
157 		}
158 		argc--; argv++;
159 	}
160 
161 	nfiles = argc - 1;
162 
163 	if (nfiles > 0) {
164 		for (i=1; i<argc; i++) {
165 			cfilename = argv[i];
166 			f = fopen(cfilename, "r");
167 			if (f == NULL)
168 				perror(cfilename);
169 			else
170 				process(f);
171 		}
172 	} else {
173 		cfilename = "stdin";
174 		process(stdin);
175 	}
176 	exit(0);
177 }
178 
179 process(f)
180 FILE *f;
181 {
182 	register int i, n;
183 	char mac[5];	/* The current macro or nroff command */
184 	int pl;
185 
186 	stktop = -1;
187 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
188 		if (line[0] == '.') {
189 			/*
190 			 * find and isolate the macro/command name.
191 			 */
192 			strncpy(mac, line+1, 4);
193 			if (isspace(mac[0])) {
194 				pe(lineno);
195 				printf("Empty command\n");
196 			} else if (isspace(mac[1])) {
197 				mac[1] = 0;
198 			} else if (isspace(mac[2])) {
199 				mac[2] = 0;
200 			} else if (mac[2] != '\\' || mac[3] != '\"') {
201 				pe(lineno);
202 				printf("Command too long\n");
203 			}
204 
205 			/*
206 			 * Is it a known command?
207 			 */
208 			checkknown(mac);
209 
210 			/*
211 			 * Should we add it?
212 			 */
213 			if (eq(mac, "de"))
214 				addcmd(line);
215 
216 			chkcmd(line, mac);
217 		}
218 
219 		/*
220 		 * At this point we process the line looking
221 		 * for \s and \f.
222 		 */
223 		for (i=0; line[i]; i++)
224 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
225 				if (!sflag && line[++i]=='s') {
226 					pl = line[++i];
227 					if (isdigit(pl)) {
228 						n = pl - '0';
229 						pl = ' ';
230 					} else
231 						n = 0;
232 					while (isdigit(line[++i]))
233 						n = 10 * n + line[i] - '0';
234 					i--;
235 					if (n == 0) {
236 						if (stk[stktop].opno == SZ) {
237 							stktop--;
238 						} else {
239 							pe(lineno);
240 							printf("unmatched \\s0\n");
241 						}
242 					} else {
243 						stk[++stktop].opno = SZ;
244 						stk[stktop].pl = pl;
245 						stk[stktop].parm = n;
246 						stk[stktop].lno = lineno;
247 					}
248 				} else if (!fflag && line[i]=='f') {
249 					n = line[++i];
250 					if (n == 'P') {
251 						if (stk[stktop].opno == FT) {
252 							stktop--;
253 						} else {
254 							pe(lineno);
255 							printf("unmatched \\fP\n");
256 						}
257 					} else {
258 						stk[++stktop].opno = FT;
259 						stk[stktop].pl = 1;
260 						stk[stktop].parm = n;
261 						stk[stktop].lno = lineno;
262 					}
263 				}
264 			}
265 	}
266 	/*
267 	 * We've hit the end and look at all this stuff that hasn't been
268 	 * matched yet!  Complain, complain.
269 	 */
270 	for (i=stktop; i>=0; i--) {
271 		complain(i);
272 	}
273 }
274 
275 complain(i)
276 {
277 	pe(stk[i].lno);
278 	printf("Unmatched ");
279 	prop(i);
280 	printf("\n");
281 }
282 
283 prop(i)
284 {
285 	if (stk[i].pl == 0)
286 		printf(".%s", br[stk[i].opno].opbr);
287 	else switch(stk[i].opno) {
288 	case SZ:
289 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
290 		break;
291 	case FT:
292 		printf("\\f%c", stk[i].parm);
293 		break;
294 	default:
295 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
296 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
297 	}
298 }
299 
300 chkcmd(line, mac)
301 char *line;
302 char *mac;
303 {
304 	register int i, n;
305 
306 	/*
307 	 * Check to see if it matches top of stack.
308 	 */
309 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
310 		stktop--;	/* OK. Pop & forget */
311 	else {
312 		/* No. Maybe it's an opener */
313 		for (i=0; br[i].opbr; i++) {
314 			if (eq(mac, br[i].opbr)) {
315 				/* Found. Push it. */
316 				stktop++;
317 				stk[stktop].opno = i;
318 				stk[stktop].pl = 0;
319 				stk[stktop].parm = 0;
320 				stk[stktop].lno = lineno;
321 				break;
322 			}
323 			/*
324 			 * Maybe it's an unmatched closer.
325 			 * NOTE: this depends on the fact
326 			 * that none of the closers can be
327 			 * openers too.
328 			 */
329 			if (eq(mac, br[i].clbr)) {
330 				nomatch(mac);
331 				break;
332 			}
333 		}
334 	}
335 }
336 
337 nomatch(mac)
338 char *mac;
339 {
340 	register int i, j;
341 
342 	/*
343 	 * Look for a match further down on stack
344 	 * If we find one, it suggests that the stuff in
345 	 * between is supposed to match itself.
346 	 */
347 	for (j=stktop; j>=0; j--)
348 		if (eq(mac,br[stk[j].opno].clbr)) {
349 			/* Found.  Make a good diagnostic. */
350 			if (j == stktop-2) {
351 				/*
352 				 * Check for special case \fx..\fR and don't
353 				 * complain.
354 				 */
355 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
356 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
357 					stktop = j -1;
358 					return;
359 				}
360 				/*
361 				 * We have two unmatched frobs.  Chances are
362 				 * they were intended to match, so we mention
363 				 * them together.
364 				 */
365 				pe(stk[j+1].lno);
366 				prop(j+1);
367 				printf(" does not match %d: ", stk[j+2].lno);
368 				prop(j+2);
369 				printf("\n");
370 			} else for (i=j+1; i <= stktop; i++) {
371 				complain(i);
372 			}
373 			stktop = j-1;
374 			return;
375 		}
376 	/* Didn't find one.  Throw this away. */
377 	pe(lineno);
378 	printf("Unmatched .%s\n", mac);
379 }
380 
381 /* eq: are two strings equal? */
382 eq(s1, s2)
383 char *s1, *s2;
384 {
385 	return (strcmp(s1, s2) == 0);
386 }
387 
388 /* print the first part of an error message, given the line number */
389 pe(lineno)
390 int lineno;
391 {
392 	if (nfiles > 1)
393 		printf("%s: ", cfilename);
394 	printf("%d: ", lineno);
395 }
396 
397 checkknown(mac)
398 char *mac;
399 {
400 	/* First time figure out ncmds. */
401 	if (ncmds == 0) {
402 		while (knowncmds[ncmds])
403 			ncmds++;
404 	}
405 
406 	if (eq(mac, "."))
407 		return;
408 	if (binsrch(mac) >= 0)
409 		return;
410 
411 	pe(lineno);
412 	printf("Unknown command: .%s\n", mac);
413 }
414 
415 /*
416  * We have a .de xx line in "line".  Add xx to the list of known commands.
417  */
418 addcmd(line)
419 char *line;
420 {
421 	char *mac;
422 	register char **src, **dest, **loc;
423 
424 	/* grab the macro being defined */
425 	mac = line+4;
426 	while (isspace(*mac))
427 		mac++;
428 	if (*mac == 0) {
429 		pe(lineno);
430 		printf("illegal define: %s\n", line);
431 		return;
432 	}
433 	mac[2] = 0;
434 	if (isspace(mac[1]) || mac[1] == '\\')
435 		mac[1] = 0;
436 	if (ncmds >= MAXCMDS) {
437 		printf("Only %d known commands allowed\n", MAXCMDS);
438 		exit(1);
439 	}
440 
441 	/*
442 	 * Add mac to the list.  We should really have some kind of tree
443 	 * structure here but this is a quick-and-dirty job and I just don't
444 	 * have time to mess with it.  (I wonder if this will come back to haunt
445 	 * me someday?)  Anyway, I claim that .de is fairly rare in user
446 	 * nroff programs, and the register loop below is pretty fast.
447 	 */
448 	binsrch(mac);	/* it's OK to redefine something */
449 	/* binsrch sets slot as a side effect */
450 	loc = &knowncmds[slot];
451 	src = &knowncmds[ncmds-1];
452 	dest = src+1;
453 	while (dest > loc)
454 		*dest-- = *src--;
455 	*loc = malloc(3);
456 	strcpy(*loc, mac);
457 	ncmds++;
458 }
459 
460 /*
461  * Do a binary search in knowncmds for mac.
462  * If found, return the index.  If not, return -1.
463  */
464 binsrch(mac)
465 char *mac;
466 {
467 	register char *p;	/* pointer to current cmd in list */
468 	register int d;		/* difference if any */
469 	register int mid;	/* mid point in binary search */
470 	register int top, bot;	/* boundaries of bin search, inclusive */
471 
472 	top = ncmds-1;
473 	bot = 0;
474 	while (top >= bot) {
475 		mid = (top+bot)/2;
476 		p = knowncmds[mid];
477 		d = p[0] - mac[0];
478 		if (d == 0)
479 			d = p[1] - mac[1];
480 		if (d == 0)
481 			return mid;
482 		if (d < 0)
483 			bot = mid + 1;
484 		else
485 			top = mid - 1;
486 	}
487 	slot = bot;	/* place it would have gone */
488 	return -1;
489 }
490