1 static char *sccsid = "@(#)checknr.c 4.5 (Berkeley) 11/07/82"; 2 /* 3 * checknr: check an nroff/troff input file for matching macro calls. 4 * we also attempt to match size and font changes, but only the embedded 5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 6 * later but for now think of these restrictions as contributions to 7 * structured typesetting. 8 */ 9 #include <stdio.h> 10 #include <ctype.h> 11 12 #define MAXSTK 100 /* Stack size */ 13 #define MAXBR 100 /* Max number of bracket pairs known */ 14 #define MAXCMDS 500 /* Max number of commands known */ 15 16 /* 17 * The stack on which we remember what we've seen so far. 18 */ 19 struct stkstr { 20 int opno; /* number of opening bracket */ 21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 22 int parm; /* parm to size, font, etc */ 23 int lno; /* line number the thing came in in */ 24 } stk[MAXSTK]; 25 int stktop; 26 27 /* 28 * The kinds of opening and closing brackets. 29 */ 30 struct brstr { 31 char *opbr; 32 char *clbr; 33 } br[MAXBR] = { 34 /* A few bare bones troff commands */ 35 #define SZ 0 36 "sz", "sz", /* also \s */ 37 #define FT 1 38 "ft", "ft", /* also \f */ 39 /* the -mm package */ 40 "AL", "LE", 41 "AS", "AE", 42 "BL", "LE", 43 "BS", "BE", 44 "DF", "DE", 45 "DL", "LE", 46 "DS", "DE", 47 "FS", "FE", 48 "ML", "LE", 49 "NS", "NE", 50 "RL", "LE", 51 "VL", "LE", 52 /* the -ms package */ 53 "AB", "AE", 54 "CD", "DE", 55 "DS", "DE", 56 "FS", "FE", 57 "ID", "DE", 58 "KF", "KE", 59 "KS", "KE", 60 "LD", "DE", 61 "LG", "NL", 62 "QS", "QE", 63 "RS", "RE", 64 "SM", "NL", 65 /* The -me package */ 66 "(b", ")b", 67 "(c", ")c", 68 "(d", ")d", 69 "(f", ")f", 70 "(l", ")l", 71 "(q", ")q", 72 "(x", ")x", 73 "(z", ")z", 74 /* Things needed by preprocessors */ 75 "EQ", "EN", 76 "TS", "TE", 77 /* Refer */ 78 "[", "]", 79 0, 0 80 }; 81 82 /* 83 * All commands known to nroff, plus macro packages. 84 * Used so we can complain about unrecognized commands. 85 */ 86 char *knowncmds[MAXCMDS] = { 87 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 88 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 89 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 90 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 91 "AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD", 92 "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D", 93 "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", 94 "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", 95 "FV", "FX", "H", "HC", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM", 96 "IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG", 97 "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", 98 "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "PF", "PH", "PP", "PT", 99 "PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS", 100 "RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY", 101 "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS", 102 "TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[", "[-", 103 "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 104 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 105 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 106 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 107 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 108 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 109 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 110 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 111 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 112 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 113 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 114 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 115 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr", 116 0 117 }; 118 119 int lineno; /* current line number in input file */ 120 char line[256]; /* the current line */ 121 char *cfilename; /* name of current file */ 122 int nfiles; /* number of files to process */ 123 int fflag; /* -f: ignore \f */ 124 int sflag; /* -s: ignore \s */ 125 int ncmds; /* size of knowncmds */ 126 int slot; /* slot in knowncmds found by binsrch */ 127 128 char *malloc(); 129 130 main(argc, argv) 131 int argc; 132 char **argv; 133 { 134 FILE *f; 135 int i; 136 char *cp; 137 char b1[4]; 138 139 /* Figure out how many known commands there are */ 140 while (knowncmds[ncmds]) 141 ncmds++; 142 while (argc > 1 && argv[1][0] == '-') { 143 switch(argv[1][1]) { 144 145 /* -a: add pairs of macros */ 146 case 'a': 147 i = strlen(argv[1]) - 2; 148 if (i % 6 != 0) 149 usage(); 150 /* look for empty macro slots */ 151 for (i=0; br[i].opbr; i++) 152 ; 153 for (cp=argv[1]+3; cp[-1]; cp += 6) { 154 br[i].opbr = malloc(3); 155 strncpy(br[i].opbr, cp, 2); 156 br[i].clbr = malloc(3); 157 strncpy(br[i].clbr, cp+3, 2); 158 addmac(br[i].opbr); /* knows pairs are also known cmds */ 159 addmac(br[i].clbr); 160 i++; 161 } 162 break; 163 164 /* -c: add known commands */ 165 case 'c': 166 i = strlen(argv[1]) - 2; 167 if (i % 3 != 0) 168 usage(); 169 for (cp=argv[1]+3; cp[-1]; cp += 3) { 170 if (cp[2] && cp[2] != '.') 171 usage(); 172 strncpy(b1, cp, 2); 173 addmac(b1); 174 } 175 break; 176 177 /* -f: ignore font changes */ 178 case 'f': 179 fflag = 1; 180 break; 181 182 /* -s: ignore size changes */ 183 case 's': 184 sflag = 1; 185 break; 186 default: 187 usage(); 188 } 189 argc--; argv++; 190 } 191 192 nfiles = argc - 1; 193 194 if (nfiles > 0) { 195 for (i=1; i<argc; i++) { 196 cfilename = argv[i]; 197 f = fopen(cfilename, "r"); 198 if (f == NULL) 199 perror(cfilename); 200 else 201 process(f); 202 } 203 } else { 204 cfilename = "stdin"; 205 process(stdin); 206 } 207 exit(0); 208 } 209 210 usage() 211 { 212 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 213 exit(1); 214 } 215 216 process(f) 217 FILE *f; 218 { 219 register int i, n; 220 char mac[5]; /* The current macro or nroff command */ 221 int pl; 222 223 stktop = -1; 224 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 225 if (line[0] == '.') { 226 /* 227 * find and isolate the macro/command name. 228 */ 229 strncpy(mac, line+1, 4); 230 if (isspace(mac[0])) { 231 pe(lineno); 232 printf("Empty command\n"); 233 } else if (isspace(mac[1])) { 234 mac[1] = 0; 235 } else if (isspace(mac[2])) { 236 mac[2] = 0; 237 } else if (mac[0] != '\\' || mac[1] != '\"') { 238 pe(lineno); 239 printf("Command too long\n"); 240 } 241 242 /* 243 * Is it a known command? 244 */ 245 checkknown(mac); 246 247 /* 248 * Should we add it? 249 */ 250 if (eq(mac, "de")) 251 addcmd(line); 252 253 chkcmd(line, mac); 254 } 255 256 /* 257 * At this point we process the line looking 258 * for \s and \f. 259 */ 260 for (i=0; line[i]; i++) 261 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 262 if (!sflag && line[++i]=='s') { 263 pl = line[++i]; 264 if (isdigit(pl)) { 265 n = pl - '0'; 266 pl = ' '; 267 } else 268 n = 0; 269 while (isdigit(line[++i])) 270 n = 10 * n + line[i] - '0'; 271 i--; 272 if (n == 0) { 273 if (stk[stktop].opno == SZ) { 274 stktop--; 275 } else { 276 pe(lineno); 277 printf("unmatched \\s0\n"); 278 } 279 } else { 280 stk[++stktop].opno = SZ; 281 stk[stktop].pl = pl; 282 stk[stktop].parm = n; 283 stk[stktop].lno = lineno; 284 } 285 } else if (!fflag && line[i]=='f') { 286 n = line[++i]; 287 if (n == 'P') { 288 if (stk[stktop].opno == FT) { 289 stktop--; 290 } else { 291 pe(lineno); 292 printf("unmatched \\fP\n"); 293 } 294 } else { 295 stk[++stktop].opno = FT; 296 stk[stktop].pl = 1; 297 stk[stktop].parm = n; 298 stk[stktop].lno = lineno; 299 } 300 } 301 } 302 } 303 /* 304 * We've hit the end and look at all this stuff that hasn't been 305 * matched yet! Complain, complain. 306 */ 307 for (i=stktop; i>=0; i--) { 308 complain(i); 309 } 310 } 311 312 complain(i) 313 { 314 pe(stk[i].lno); 315 printf("Unmatched "); 316 prop(i); 317 printf("\n"); 318 } 319 320 prop(i) 321 { 322 if (stk[i].pl == 0) 323 printf(".%s", br[stk[i].opno].opbr); 324 else switch(stk[i].opno) { 325 case SZ: 326 printf("\\s%c%d", stk[i].pl, stk[i].parm); 327 break; 328 case FT: 329 printf("\\f%c", stk[i].parm); 330 break; 331 default: 332 printf("Bug: stk[%d].opno = %d = .%s, .%s", 333 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 334 } 335 } 336 337 chkcmd(line, mac) 338 char *line; 339 char *mac; 340 { 341 register int i, n; 342 343 /* 344 * Check to see if it matches top of stack. 345 */ 346 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 347 stktop--; /* OK. Pop & forget */ 348 else { 349 /* No. Maybe it's an opener */ 350 for (i=0; br[i].opbr; i++) { 351 if (eq(mac, br[i].opbr)) { 352 /* Found. Push it. */ 353 stktop++; 354 stk[stktop].opno = i; 355 stk[stktop].pl = 0; 356 stk[stktop].parm = 0; 357 stk[stktop].lno = lineno; 358 break; 359 } 360 /* 361 * Maybe it's an unmatched closer. 362 * NOTE: this depends on the fact 363 * that none of the closers can be 364 * openers too. 365 */ 366 if (eq(mac, br[i].clbr)) { 367 nomatch(mac); 368 break; 369 } 370 } 371 } 372 } 373 374 nomatch(mac) 375 char *mac; 376 { 377 register int i, j; 378 379 /* 380 * Look for a match further down on stack 381 * If we find one, it suggests that the stuff in 382 * between is supposed to match itself. 383 */ 384 for (j=stktop; j>=0; j--) 385 if (eq(mac,br[stk[j].opno].clbr)) { 386 /* Found. Make a good diagnostic. */ 387 if (j == stktop-2) { 388 /* 389 * Check for special case \fx..\fR and don't 390 * complain. 391 */ 392 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 393 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 394 stktop = j -1; 395 return; 396 } 397 /* 398 * We have two unmatched frobs. Chances are 399 * they were intended to match, so we mention 400 * them together. 401 */ 402 pe(stk[j+1].lno); 403 prop(j+1); 404 printf(" does not match %d: ", stk[j+2].lno); 405 prop(j+2); 406 printf("\n"); 407 } else for (i=j+1; i <= stktop; i++) { 408 complain(i); 409 } 410 stktop = j-1; 411 return; 412 } 413 /* Didn't find one. Throw this away. */ 414 pe(lineno); 415 printf("Unmatched .%s\n", mac); 416 } 417 418 /* eq: are two strings equal? */ 419 eq(s1, s2) 420 char *s1, *s2; 421 { 422 return (strcmp(s1, s2) == 0); 423 } 424 425 /* print the first part of an error message, given the line number */ 426 pe(lineno) 427 int lineno; 428 { 429 if (nfiles > 1) 430 printf("%s: ", cfilename); 431 printf("%d: ", lineno); 432 } 433 434 checkknown(mac) 435 char *mac; 436 { 437 438 if (eq(mac, ".")) 439 return; 440 if (binsrch(mac) >= 0) 441 return; 442 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 443 return; 444 445 pe(lineno); 446 printf("Unknown command: .%s\n", mac); 447 } 448 449 /* 450 * We have a .de xx line in "line". Add xx to the list of known commands. 451 */ 452 addcmd(line) 453 char *line; 454 { 455 char *mac; 456 457 /* grab the macro being defined */ 458 mac = line+4; 459 while (isspace(*mac)) 460 mac++; 461 if (*mac == 0) { 462 pe(lineno); 463 printf("illegal define: %s\n", line); 464 return; 465 } 466 mac[2] = 0; 467 if (isspace(mac[1]) || mac[1] == '\\') 468 mac[1] = 0; 469 if (ncmds >= MAXCMDS) { 470 printf("Only %d known commands allowed\n", MAXCMDS); 471 exit(1); 472 } 473 addmac(mac); 474 } 475 476 /* 477 * Add mac to the list. We should really have some kind of tree 478 * structure here but this is a quick-and-dirty job and I just don't 479 * have time to mess with it. (I wonder if this will come back to haunt 480 * me someday?) Anyway, I claim that .de is fairly rare in user 481 * nroff programs, and the register loop below is pretty fast. 482 */ 483 addmac(mac) 484 char *mac; 485 { 486 register char **src, **dest, **loc; 487 488 binsrch(mac); /* it's OK to redefine something */ 489 /* binsrch sets slot as a side effect */ 490 #ifdef DEBUG 491 printf("binsrch(%s) -> %d\n", mac, slot); 492 #endif 493 loc = &knowncmds[slot]; 494 src = &knowncmds[ncmds-1]; 495 dest = src+1; 496 while (dest > loc) 497 *dest-- = *src--; 498 *loc = malloc(3); 499 strcpy(*loc, mac); 500 ncmds++; 501 #ifdef DEBUG 502 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 503 #endif 504 } 505 506 /* 507 * Do a binary search in knowncmds for mac. 508 * If found, return the index. If not, return -1. 509 */ 510 binsrch(mac) 511 char *mac; 512 { 513 register char *p; /* pointer to current cmd in list */ 514 register int d; /* difference if any */ 515 register int mid; /* mid point in binary search */ 516 register int top, bot; /* boundaries of bin search, inclusive */ 517 518 top = ncmds-1; 519 bot = 0; 520 while (top >= bot) { 521 mid = (top+bot)/2; 522 p = knowncmds[mid]; 523 d = p[0] - mac[0]; 524 if (d == 0) 525 d = p[1] - mac[1]; 526 if (d == 0) 527 return mid; 528 if (d < 0) 529 bot = mid + 1; 530 else 531 top = mid - 1; 532 } 533 slot = bot; /* place it would have gone */ 534 return -1; 535 } 536 537 538