xref: /netbsd/usr.bin/checknr/checknr.c (revision bf9ec67e)
1 /*	$NetBSD: checknr.c,v 1.10 2002/01/21 18:28:00 wiz Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
45 #else
46 __RCSID("$NetBSD: checknr.c,v 1.10 2002/01/21 18:28:00 wiz Exp $");
47 #endif
48 #endif /* not lint */
49 
50 /*
51  * checknr: check an nroff/troff input file for matching macro calls.
52  * we also attempt to match size and font changes, but only the embedded
53  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
54  * later but for now think of these restrictions as contributions to
55  * structured typesetting.
56  */
57 #include <ctype.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 
62 #define MAXSTK	100	/* Stack size */
63 #define MAXBR	100	/* Max number of bracket pairs known */
64 #define MAXCMDS	500	/* Max number of commands known */
65 
66 /*
67  * The stack on which we remember what we've seen so far.
68  */
69 struct stkstr {
70 	int opno;	/* number of opening bracket */
71 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72 	int parm;	/* parm to size, font, etc */
73 	int lno;	/* line number the thing came in in */
74 } stk[MAXSTK];
75 int stktop;
76 
77 /*
78  * The kinds of opening and closing brackets.
79  */
80 struct brstr {
81 	char *opbr;
82 	char *clbr;
83 } br[MAXBR] = {
84 	/* A few bare bones troff commands */
85 #define SZ	0
86 	{ "sz",	"sz"},	/* also \s */
87 #define FT	1
88 	{ "ft",	"ft"},	/* also \f */
89 	/* the -mm package */
90 	{"AL",	"LE"},
91 	{"AS",	"AE"},
92 	{"BL",	"LE"},
93 	{"BS",	"BE"},
94 	{"DF",	"DE"},
95 	{"DL",	"LE"},
96 	{"DS",	"DE"},
97 	{"FS",	"FE"},
98 	{"ML",	"LE"},
99 	{"NS",	"NE"},
100 	{"RL",	"LE"},
101 	{"VL",	"LE"},
102 	/* the -ms package */
103 	{"AB",	"AE"},
104 	{"BD",	"DE"},
105 	{"CD",	"DE"},
106 	{"DS",	"DE"},
107 	{"FS",	"FE"},
108 	{"ID",	"DE"},
109 	{"KF",	"KE"},
110 	{"KS",	"KE"},
111 	{"LD",	"DE"},
112 	{"LG",	"NL"},
113 	{"QS",	"QE"},
114 	{"RS",	"RE"},
115 	{"SM",	"NL"},
116 	{"XA",	"XE"},
117 	{"XS",	"XE"},
118 	/* The -me package */
119 	{"(b",	")b"},
120 	{"(c",	")c"},
121 	{"(d",	")d"},
122 	{"(f",	")f"},
123 	{"(l",	")l"},
124 	{"(q",	")q"},
125 	{"(x",	")x"},
126 	{"(z",	")z"},
127 	/* The -mdoc package */
128 	{"Ao",  "Ac"},
129 	{"Bd",  "Ed"},
130 	{"Bk",  "Ek"},
131 	{"Bo",  "Bc"},
132 	{"Do",  "Dc"},
133 	{"Fo",  "Fc"},
134 	{"Oo",  "Oc"},
135 	{"Po",  "Pc"},
136 	{"Qo",  "Qc"},
137 	{"Rs",  "Re"},
138 	{"So",  "Sc"},
139 	{"Xo",  "Xc"},
140 	/* Things needed by preprocessors */
141 	{"EQ",	"EN"},
142 	{"TS",	"TE"},
143 	/* Refer */
144 	{"[",	"]"},
145 	{0,	0},
146 };
147 
148 /*
149  * All commands known to nroff, plus macro packages.
150  * Used so we can complain about unrecognized commands.
151  */
152 char *knowncmds[MAXCMDS] = {
153 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
154 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
155 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
156 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
157 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
158 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
159 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" ,  "B1",
160 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
161 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
162 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
163 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
164 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
165 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
166 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
167 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
168 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
169 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
170 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
171 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
172 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
173 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
174 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
175 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
176 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
177 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
178 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
179 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
180 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
181 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
182 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
183 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
184 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
185 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
186 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
187 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
188 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
189 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
190 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
191 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
192 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
193 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
194 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
195 };
196 
197 int	lineno;		/* current line number in input file */
198 char	*cfilename;	/* name of current file */
199 int	nfiles;		/* number of files to process */
200 int	fflag;		/* -f: ignore \f */
201 int	sflag;		/* -s: ignore \s */
202 int	ncmds;		/* size of knowncmds */
203 int	slot;		/* slot in knowncmds found by binsrch */
204 
205 void	addcmd(char *);
206 void	addmac(char *);
207 int	binsrch(char *);
208 void	checkknown(char *);
209 void	chkcmd(char *, char *);
210 void	complain(int);
211 int	eq(const void *, const void *);
212 int	main(int, char **);
213 void	nomatch(char *);
214 void	pe(int);
215 void	process(FILE *);
216 void	prop(int);
217 void	usage(void);
218 
219 int
220 main(int argc, char **argv)
221 {
222 	FILE *f;
223 	int i;
224 	char *cp;
225 	char b1[4];
226 
227 	/* Figure out how many known commands there are */
228 	while (knowncmds[ncmds])
229 		ncmds++;
230 	while (argc > 1 && argv[1][0] == '-') {
231 		switch(argv[1][1]) {
232 
233 		/* -a: add pairs of macros */
234 		case 'a':
235 			i = strlen(argv[1]) - 2;
236 			if (i % 6 != 0)
237 				usage();
238 			/* look for empty macro slots */
239 			for (i=0; br[i].opbr; i++)
240 				;
241 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
242 				br[i].opbr = malloc(3);
243 				strncpy(br[i].opbr, cp, 2);
244 				br[i].clbr = malloc(3);
245 				strncpy(br[i].clbr, cp+3, 2);
246 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
247 				addmac(br[i].clbr);
248 				i++;
249 			}
250 			break;
251 
252 		/* -c: add known commands */
253 		case 'c':
254 			i = strlen(argv[1]) - 2;
255 			if (i % 3 != 0)
256 				usage();
257 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
258 				if (cp[2] && cp[2] != '.')
259 					usage();
260 				strncpy(b1, cp, 2);
261 				addmac(b1);
262 			}
263 			break;
264 
265 		/* -f: ignore font changes */
266 		case 'f':
267 			fflag = 1;
268 			break;
269 
270 		/* -s: ignore size changes */
271 		case 's':
272 			sflag = 1;
273 			break;
274 		default:
275 			usage();
276 		}
277 		argc--; argv++;
278 	}
279 
280 	nfiles = argc - 1;
281 
282 	if (nfiles > 0) {
283 		for (i=1; i<argc; i++) {
284 			cfilename = argv[i];
285 			f = fopen(cfilename, "r");
286 			if (f == NULL)
287 				perror(cfilename);
288 			else
289 				process(f);
290 			fclose(f);
291 		}
292 	} else {
293 		cfilename = "stdin";
294 		process(stdin);
295 	}
296 	exit(0);
297 }
298 
299 void
300 usage(void)
301 {
302 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
303 	exit(1);
304 }
305 
306 void
307 process(FILE *f)
308 {
309 	int i, n;
310 	char line[256];	/* the current line */
311 	char mac[5];	/* The current macro or nroff command */
312 	int pl;
313 
314 	stktop = -1;
315 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
316 		if (line[0] == '.') {
317 			/*
318 			 * find and isolate the macro/command name.
319 			 */
320 			strncpy(mac, line+1, 4);
321 			if (isspace((unsigned char)mac[0])) {
322 				pe(lineno);
323 				printf("Empty command\n");
324 			} else if (isspace((unsigned char)mac[1])) {
325 				mac[1] = 0;
326 			} else if (isspace((unsigned char)mac[2])) {
327 				mac[2] = 0;
328 			} else if (mac[0] != '\\' || mac[1] != '\"') {
329 				pe(lineno);
330 				printf("Command too long\n");
331 			}
332 
333 			/*
334 			 * Is it a known command?
335 			 */
336 			checkknown(mac);
337 
338 			/*
339 			 * Should we add it?
340 			 */
341 			if (eq(mac, "de"))
342 				addcmd(line);
343 
344 			chkcmd(line, mac);
345 		}
346 
347 		/*
348 		 * At this point we process the line looking
349 		 * for \s and \f.
350 		 */
351 		for (i=0; line[i]; i++)
352 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
353 				if (!sflag && line[++i]=='s') {
354 					pl = line[++i];
355 					if (isdigit((unsigned char)pl)) {
356 						n = pl - '0';
357 						pl = ' ';
358 					} else
359 						n = 0;
360 					while (isdigit((unsigned char)line[++i]))
361 						n = 10 * n + line[i] - '0';
362 					i--;
363 					if (n == 0) {
364 						if (stk[stktop].opno == SZ) {
365 							stktop--;
366 						} else {
367 							pe(lineno);
368 							printf("unmatched \\s0\n");
369 						}
370 					} else {
371 						stk[++stktop].opno = SZ;
372 						stk[stktop].pl = pl;
373 						stk[stktop].parm = n;
374 						stk[stktop].lno = lineno;
375 					}
376 				} else if (!fflag && line[i]=='f') {
377 					n = line[++i];
378 					if (n == 'P') {
379 						if (stk[stktop].opno == FT) {
380 							stktop--;
381 						} else {
382 							pe(lineno);
383 							printf("unmatched \\fP\n");
384 						}
385 					} else {
386 						stk[++stktop].opno = FT;
387 						stk[stktop].pl = 1;
388 						stk[stktop].parm = n;
389 						stk[stktop].lno = lineno;
390 					}
391 				}
392 			}
393 	}
394 	/*
395 	 * We've hit the end and look at all this stuff that hasn't been
396 	 * matched yet!  Complain, complain.
397 	 */
398 	for (i=stktop; i>=0; i--) {
399 		complain(i);
400 	}
401 }
402 
403 void
404 complain(int i)
405 {
406 	pe(stk[i].lno);
407 	printf("Unmatched ");
408 	prop(i);
409 	printf("\n");
410 }
411 
412 void
413 prop(int i)
414 {
415 	if (stk[i].pl == 0)
416 		printf(".%s", br[stk[i].opno].opbr);
417 	else switch(stk[i].opno) {
418 	case SZ:
419 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
420 		break;
421 	case FT:
422 		printf("\\f%c", stk[i].parm);
423 		break;
424 	default:
425 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
426 			i, stk[i].opno, br[stk[i].opno].opbr,
427 			br[stk[i].opno].clbr);
428 	}
429 }
430 
431 void
432 chkcmd(char *line, char *mac)
433 {
434 	int i;
435 
436 	/*
437 	 * Check to see if it matches top of stack.
438 	 */
439 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
440 		stktop--;	/* OK. Pop & forget */
441 	else {
442 		/* No. Maybe it's an opener */
443 		for (i=0; br[i].opbr; i++) {
444 			if (eq(mac, br[i].opbr)) {
445 				/* Found. Push it. */
446 				stktop++;
447 				stk[stktop].opno = i;
448 				stk[stktop].pl = 0;
449 				stk[stktop].parm = 0;
450 				stk[stktop].lno = lineno;
451 				break;
452 			}
453 			/*
454 			 * Maybe it's an unmatched closer.
455 			 * NOTE: this depends on the fact
456 			 * that none of the closers can be
457 			 * openers too.
458 			 */
459 			if (eq(mac, br[i].clbr)) {
460 				nomatch(mac);
461 				break;
462 			}
463 		}
464 	}
465 }
466 
467 void
468 nomatch(char *mac)
469 {
470 	int i, j;
471 
472 	/*
473 	 * Look for a match further down on stack
474 	 * If we find one, it suggests that the stuff in
475 	 * between is supposed to match itself.
476 	 */
477 	for (j=stktop; j>=0; j--)
478 		if (eq(mac,br[stk[j].opno].clbr)) {
479 			/* Found.  Make a good diagnostic. */
480 			if (j == stktop-2) {
481 				/*
482 				 * Check for special case \fx..\fR and don't
483 				 * complain.
484 				 */
485 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
486 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
487 					stktop = j -1;
488 					return;
489 				}
490 				/*
491 				 * We have two unmatched frobs.  Chances are
492 				 * they were intended to match, so we mention
493 				 * them together.
494 				 */
495 				pe(stk[j+1].lno);
496 				prop(j+1);
497 				printf(" does not match %d: ", stk[j+2].lno);
498 				prop(j+2);
499 				printf("\n");
500 			} else for (i=j+1; i <= stktop; i++) {
501 				complain(i);
502 			}
503 			stktop = j-1;
504 			return;
505 		}
506 	/* Didn't find one.  Throw this away. */
507 	pe(lineno);
508 	printf("Unmatched .%s\n", mac);
509 }
510 
511 /* eq: are two strings equal? */
512 int
513 eq(const void *s1, const void *s2)
514 {
515 	return (strcmp((char *)s1, (char *)s2) == 0);
516 }
517 
518 /* print the first part of an error message, given the line number */
519 void
520 pe(int pelineno)
521 {
522 	if (nfiles > 1)
523 		printf("%s: ", cfilename);
524 	printf("%d: ", pelineno);
525 }
526 
527 void
528 checkknown(char *mac)
529 {
530 
531 	if (eq(mac, "."))
532 		return;
533 	if (binsrch(mac) >= 0)
534 		return;
535 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
536 		return;
537 
538 	pe(lineno);
539 	printf("Unknown command: .%s\n", mac);
540 }
541 
542 /*
543  * We have a .de xx line in "line".  Add xx to the list of known commands.
544  */
545 void
546 addcmd(char *line)
547 {
548 	char *mac;
549 
550 	/* grab the macro being defined */
551 	mac = line+4;
552 	while (isspace((unsigned char)*mac))
553 		mac++;
554 	if (*mac == 0) {
555 		pe(lineno);
556 		printf("illegal define: %s\n", line);
557 		return;
558 	}
559 	mac[2] = 0;
560 	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
561 		mac[1] = 0;
562 	if (ncmds >= MAXCMDS) {
563 		printf("Only %d known commands allowed\n", MAXCMDS);
564 		exit(1);
565 	}
566 	addmac(mac);
567 }
568 
569 /*
570  * Add mac to the list.  We should really have some kind of tree
571  * structure here but this is a quick-and-dirty job and I just don't
572  * have time to mess with it.  (I wonder if this will come back to haunt
573  * me someday?)  Anyway, I claim that .de is fairly rare in user
574  * nroff programs, and the register loop below is pretty fast.
575  */
576 void
577 addmac(char *mac)
578 {
579 	char **src, **dest, **loc;
580 
581 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
582 #ifdef DEBUG
583 		printf("binsrch(%s) -> already in table\n", mac);
584 #endif /* DEBUG */
585 		return;
586 	}
587 	/* binsrch sets slot as a side effect */
588 #ifdef DEBUG
589 	printf("binsrch(%s) -> %d\n", mac, slot);
590 #endif
591 	loc = &knowncmds[slot];
592 	src = &knowncmds[ncmds-1];
593 	dest = src+1;
594 	while (dest > loc)
595 		*dest-- = *src--;
596 	*loc = malloc(3);
597 	strcpy(*loc, mac);
598 	ncmds++;
599 #ifdef DEBUG
600 	printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
601 	    knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
602 	    knowncmds[slot+2], ncmds);
603 #endif
604 }
605 
606 /*
607  * Do a binary search in knowncmds for mac.
608  * If found, return the index.  If not, return -1.
609  */
610 int
611 binsrch(char *mac)
612 {
613 	char *p;	/* pointer to current cmd in list */
614 	int d;		/* difference if any */
615 	int mid;	/* mid point in binary search */
616 	int top, bot;	/* boundaries of bin search, inclusive */
617 
618 	top = ncmds-1;
619 	bot = 0;
620 	while (top >= bot) {
621 		mid = (top+bot)/2;
622 		p = knowncmds[mid];
623 		d = p[0] - mac[0];
624 		if (d == 0)
625 			d = p[1] - mac[1];
626 		if (d == 0)
627 			return mid;
628 		if (d < 0)
629 			bot = mid + 1;
630 		else
631 			top = mid - 1;
632 	}
633 	slot = bot;	/* place it would have gone */
634 	return -1;
635 }
636