1 static char *sccsid = "@(#)checknr.c 4.4 (Berkeley) 05/13/81"; 2 /* 3 * checknr: check an nroff/troff input file for matching macro calls. 4 * we also attempt to match size and font changes, but only the embedded 5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 6 * later but for now think of these restrictions as contributions to 7 * structured typesetting. 8 */ 9 #include <stdio.h> 10 #include <ctype.h> 11 12 #define MAXSTK 100 /* Stack size */ 13 #define MAXBR 100 /* Max number of bracket pairs known */ 14 #define MAXCMDS 500 /* Max number of commands known */ 15 16 /* 17 * The stack on which we remember what we've seen so far. 18 */ 19 struct stkstr { 20 int opno; /* number of opening bracket */ 21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 22 int parm; /* parm to size, font, etc */ 23 int lno; /* line number the thing came in in */ 24 } stk[MAXSTK]; 25 int stktop; 26 27 /* 28 * The kinds of opening and closing brackets. 29 */ 30 struct brstr { 31 char *opbr; 32 char *clbr; 33 } br[MAXBR] = { 34 /* A few bare bones troff commands */ 35 #define SZ 0 36 "sz", "sz", /* also \s */ 37 #define FT 1 38 "ft", "ft", /* also \f */ 39 /* the -mm package */ 40 "AL", "LE", 41 "AS", "AE", 42 "BL", "LE", 43 "BS", "BE", 44 "DF", "DE", 45 "DL", "LE", 46 "DS", "DE", 47 "FS", "FE", 48 "ML", "LE", 49 "NS", "NE", 50 "RL", "LE", 51 "VL", "LE", 52 /* the -ms package */ 53 "AB", "AE", 54 "CD", "DE", 55 "DS", "DE", 56 "FS", "FE", 57 "ID", "DE", 58 "KF", "KE", 59 "KS", "KE", 60 "LD", "DE", 61 "LG", "NL", 62 "QS", "QE", 63 "RS", "RE", 64 "SM", "NL", 65 /* The -me package */ 66 "(b", ")b", 67 "(c", ")c", 68 "(d", ")d", 69 "(f", ")f", 70 "(l", ")l", 71 "(q", ")q", 72 "(x", ")x", 73 "(z", ")z", 74 /* Things needed by preprocessors */ 75 "EQ", "EN", 76 "TS", "TE", 77 /* Refer */ 78 "[", "]", 79 0, 0 80 }; 81 82 /* 83 * All commands known to nroff, plus macro packages. 84 * Used so we can complain about unrecognized commands. 85 */ 86 char *knowncmds[MAXCMDS] = { 87 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 88 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 89 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 90 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 91 "AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD", 92 "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D", 93 "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", 94 "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", 95 "FV", "FX", "H", "HC", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM", 96 "IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG", 97 "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", 98 "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "PF", "PH", "PP", "PT", 99 "PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS", 100 "RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY", 101 "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS", 102 "TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[", "[-", 103 "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 104 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 105 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 106 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 107 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 108 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 109 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 110 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 111 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 112 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 113 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 114 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 115 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr", 116 0 117 }; 118 119 int lineno; /* current line number in input file */ 120 char line[256]; /* the current line */ 121 char *cfilename; /* name of current file */ 122 int nfiles; /* number of files to process */ 123 int fflag; /* -f: ignore \f */ 124 int sflag; /* -s: ignore \s */ 125 int ncmds; /* size of knowncmds */ 126 int slot; /* slot in knowncmds found by binsrch */ 127 128 char *malloc(); 129 130 main(argc, argv) 131 int argc; 132 char **argv; 133 { 134 FILE *f; 135 int i; 136 char *cp; 137 char b1[4]; 138 139 if (argc <= 1) 140 usage(); 141 /* Figure out how many known commands there are */ 142 while (knowncmds[ncmds]) 143 ncmds++; 144 while (argc > 1 && argv[1][0] == '-') { 145 switch(argv[1][1]) { 146 147 /* -a: add pairs of macros */ 148 case 'a': 149 i = strlen(argv[1]) - 2; 150 if (i % 6 != 0) 151 usage(); 152 /* look for empty macro slots */ 153 for (i=0; br[i].opbr; i++) 154 ; 155 for (cp=argv[1]+3; cp[-1]; cp += 6) { 156 br[i].opbr = malloc(3); 157 strncpy(br[i].opbr, cp, 2); 158 br[i].clbr = malloc(3); 159 strncpy(br[i].clbr, cp+3, 2); 160 addmac(br[i].opbr); /* knows pairs are also known cmds */ 161 addmac(br[i].clbr); 162 i++; 163 } 164 break; 165 166 /* -c: add known commands */ 167 case 'c': 168 i = strlen(argv[1]) - 2; 169 if (i % 3 != 0) 170 usage(); 171 for (cp=argv[1]+3; cp[-1]; cp += 3) { 172 if (cp[2] && cp[2] != '.') 173 usage(); 174 strncpy(b1, cp, 2); 175 addmac(b1); 176 } 177 break; 178 179 /* -f: ignore font changes */ 180 case 'f': 181 fflag = 1; 182 break; 183 184 /* -s: ignore size changes */ 185 case 's': 186 sflag = 1; 187 break; 188 default: 189 usage(); 190 } 191 argc--; argv++; 192 } 193 194 nfiles = argc - 1; 195 196 if (nfiles > 0) { 197 for (i=1; i<argc; i++) { 198 cfilename = argv[i]; 199 f = fopen(cfilename, "r"); 200 if (f == NULL) 201 perror(cfilename); 202 else 203 process(f); 204 } 205 } else { 206 cfilename = "stdin"; 207 process(stdin); 208 } 209 exit(0); 210 } 211 212 usage() 213 { 214 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 215 exit(1); 216 } 217 218 process(f) 219 FILE *f; 220 { 221 register int i, n; 222 char mac[5]; /* The current macro or nroff command */ 223 int pl; 224 225 stktop = -1; 226 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 227 if (line[0] == '.') { 228 /* 229 * find and isolate the macro/command name. 230 */ 231 strncpy(mac, line+1, 4); 232 if (isspace(mac[0])) { 233 pe(lineno); 234 printf("Empty command\n"); 235 } else if (isspace(mac[1])) { 236 mac[1] = 0; 237 } else if (isspace(mac[2])) { 238 mac[2] = 0; 239 } else if (mac[0] != '\\' || mac[1] != '\"') { 240 pe(lineno); 241 printf("Command too long\n"); 242 } 243 244 /* 245 * Is it a known command? 246 */ 247 checkknown(mac); 248 249 /* 250 * Should we add it? 251 */ 252 if (eq(mac, "de")) 253 addcmd(line); 254 255 chkcmd(line, mac); 256 } 257 258 /* 259 * At this point we process the line looking 260 * for \s and \f. 261 */ 262 for (i=0; line[i]; i++) 263 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 264 if (!sflag && line[++i]=='s') { 265 pl = line[++i]; 266 if (isdigit(pl)) { 267 n = pl - '0'; 268 pl = ' '; 269 } else 270 n = 0; 271 while (isdigit(line[++i])) 272 n = 10 * n + line[i] - '0'; 273 i--; 274 if (n == 0) { 275 if (stk[stktop].opno == SZ) { 276 stktop--; 277 } else { 278 pe(lineno); 279 printf("unmatched \\s0\n"); 280 } 281 } else { 282 stk[++stktop].opno = SZ; 283 stk[stktop].pl = pl; 284 stk[stktop].parm = n; 285 stk[stktop].lno = lineno; 286 } 287 } else if (!fflag && line[i]=='f') { 288 n = line[++i]; 289 if (n == 'P') { 290 if (stk[stktop].opno == FT) { 291 stktop--; 292 } else { 293 pe(lineno); 294 printf("unmatched \\fP\n"); 295 } 296 } else { 297 stk[++stktop].opno = FT; 298 stk[stktop].pl = 1; 299 stk[stktop].parm = n; 300 stk[stktop].lno = lineno; 301 } 302 } 303 } 304 } 305 /* 306 * We've hit the end and look at all this stuff that hasn't been 307 * matched yet! Complain, complain. 308 */ 309 for (i=stktop; i>=0; i--) { 310 complain(i); 311 } 312 } 313 314 complain(i) 315 { 316 pe(stk[i].lno); 317 printf("Unmatched "); 318 prop(i); 319 printf("\n"); 320 } 321 322 prop(i) 323 { 324 if (stk[i].pl == 0) 325 printf(".%s", br[stk[i].opno].opbr); 326 else switch(stk[i].opno) { 327 case SZ: 328 printf("\\s%c%d", stk[i].pl, stk[i].parm); 329 break; 330 case FT: 331 printf("\\f%c", stk[i].parm); 332 break; 333 default: 334 printf("Bug: stk[%d].opno = %d = .%s, .%s", 335 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 336 } 337 } 338 339 chkcmd(line, mac) 340 char *line; 341 char *mac; 342 { 343 register int i, n; 344 345 /* 346 * Check to see if it matches top of stack. 347 */ 348 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 349 stktop--; /* OK. Pop & forget */ 350 else { 351 /* No. Maybe it's an opener */ 352 for (i=0; br[i].opbr; i++) { 353 if (eq(mac, br[i].opbr)) { 354 /* Found. Push it. */ 355 stktop++; 356 stk[stktop].opno = i; 357 stk[stktop].pl = 0; 358 stk[stktop].parm = 0; 359 stk[stktop].lno = lineno; 360 break; 361 } 362 /* 363 * Maybe it's an unmatched closer. 364 * NOTE: this depends on the fact 365 * that none of the closers can be 366 * openers too. 367 */ 368 if (eq(mac, br[i].clbr)) { 369 nomatch(mac); 370 break; 371 } 372 } 373 } 374 } 375 376 nomatch(mac) 377 char *mac; 378 { 379 register int i, j; 380 381 /* 382 * Look for a match further down on stack 383 * If we find one, it suggests that the stuff in 384 * between is supposed to match itself. 385 */ 386 for (j=stktop; j>=0; j--) 387 if (eq(mac,br[stk[j].opno].clbr)) { 388 /* Found. Make a good diagnostic. */ 389 if (j == stktop-2) { 390 /* 391 * Check for special case \fx..\fR and don't 392 * complain. 393 */ 394 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 395 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 396 stktop = j -1; 397 return; 398 } 399 /* 400 * We have two unmatched frobs. Chances are 401 * they were intended to match, so we mention 402 * them together. 403 */ 404 pe(stk[j+1].lno); 405 prop(j+1); 406 printf(" does not match %d: ", stk[j+2].lno); 407 prop(j+2); 408 printf("\n"); 409 } else for (i=j+1; i <= stktop; i++) { 410 complain(i); 411 } 412 stktop = j-1; 413 return; 414 } 415 /* Didn't find one. Throw this away. */ 416 pe(lineno); 417 printf("Unmatched .%s\n", mac); 418 } 419 420 /* eq: are two strings equal? */ 421 eq(s1, s2) 422 char *s1, *s2; 423 { 424 return (strcmp(s1, s2) == 0); 425 } 426 427 /* print the first part of an error message, given the line number */ 428 pe(lineno) 429 int lineno; 430 { 431 if (nfiles > 1) 432 printf("%s: ", cfilename); 433 printf("%d: ", lineno); 434 } 435 436 checkknown(mac) 437 char *mac; 438 { 439 440 if (eq(mac, ".")) 441 return; 442 if (binsrch(mac) >= 0) 443 return; 444 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 445 return; 446 447 pe(lineno); 448 printf("Unknown command: .%s\n", mac); 449 } 450 451 /* 452 * We have a .de xx line in "line". Add xx to the list of known commands. 453 */ 454 addcmd(line) 455 char *line; 456 { 457 char *mac; 458 459 /* grab the macro being defined */ 460 mac = line+4; 461 while (isspace(*mac)) 462 mac++; 463 if (*mac == 0) { 464 pe(lineno); 465 printf("illegal define: %s\n", line); 466 return; 467 } 468 mac[2] = 0; 469 if (isspace(mac[1]) || mac[1] == '\\') 470 mac[1] = 0; 471 if (ncmds >= MAXCMDS) { 472 printf("Only %d known commands allowed\n", MAXCMDS); 473 exit(1); 474 } 475 addmac(mac); 476 } 477 478 /* 479 * Add mac to the list. We should really have some kind of tree 480 * structure here but this is a quick-and-dirty job and I just don't 481 * have time to mess with it. (I wonder if this will come back to haunt 482 * me someday?) Anyway, I claim that .de is fairly rare in user 483 * nroff programs, and the register loop below is pretty fast. 484 */ 485 addmac(mac) 486 char *mac; 487 { 488 register char **src, **dest, **loc; 489 490 binsrch(mac); /* it's OK to redefine something */ 491 /* binsrch sets slot as a side effect */ 492 #ifdef DEBUG 493 printf("binsrch(%s) -> %d\n", mac, slot); 494 #endif 495 loc = &knowncmds[slot]; 496 src = &knowncmds[ncmds-1]; 497 dest = src+1; 498 while (dest > loc) 499 *dest-- = *src--; 500 *loc = malloc(3); 501 strcpy(*loc, mac); 502 ncmds++; 503 #ifdef DEBUG 504 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 505 #endif 506 } 507 508 /* 509 * Do a binary search in knowncmds for mac. 510 * If found, return the index. If not, return -1. 511 */ 512 binsrch(mac) 513 char *mac; 514 { 515 register char *p; /* pointer to current cmd in list */ 516 register int d; /* difference if any */ 517 register int mid; /* mid point in binary search */ 518 register int top, bot; /* boundaries of bin search, inclusive */ 519 520 top = ncmds-1; 521 bot = 0; 522 while (top >= bot) { 523 mid = (top+bot)/2; 524 p = knowncmds[mid]; 525 d = p[0] - mac[0]; 526 if (d == 0) 527 d = p[1] - mac[1]; 528 if (d == 0) 529 return mid; 530 if (d < 0) 531 bot = mid + 1; 532 else 533 top = mid - 1; 534 } 535 slot = bot; /* place it would have gone */ 536 return -1; 537 } 538 539 540