1 static char *sccsid = "@(#)checknr.c 4.1 (Berkeley) 10/01/80"; 2 /* 3 * checknr: check an nroff/troff input file for matching macro calls. 4 * we also attempt to match size and font changes, but only the embedded 5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 6 * later but for now think of these restrictions as contributions to 7 * structured typesetting. 8 */ 9 #include <stdio.h> 10 #include <ctype.h> 11 12 #define MAXSTK 100 /* Stack size */ 13 #define MAXBR 100 /* Max number of bracket pairs known */ 14 #define MAXCMDS 500 /* Max number of commands known */ 15 16 /* 17 * The stack on which we remember what we've seen so far. 18 */ 19 struct stkstr { 20 int opno; /* number of opening bracket */ 21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 22 int parm; /* parm to size, font, etc */ 23 int lno; /* line number the thing came in in */ 24 } stk[MAXSTK]; 25 int stktop; 26 27 /* 28 * The kinds of opening and closing brackets. 29 */ 30 struct brstr { 31 char *opbr; 32 char *clbr; 33 } br[MAXBR] = { 34 /* A few bare bones troff commands */ 35 #define SZ 0 36 "sz", "sz", /* also \s */ 37 #define FT 1 38 "ft", "ft", /* also \f */ 39 /* the -ms package */ 40 "AB", "AE", 41 "RS", "RE", 42 "LG", "NL", 43 "SM", "NL", 44 "FS", "FE", 45 "DS", "DE", 46 "CD", "DE", 47 "LD", "DE", 48 "ID", "DE", 49 "KS", "KE", 50 "KF", "KE", 51 "QS", "QE", 52 /* Things needed by preprocessors */ 53 "TS", "TE", 54 "EQ", "EN", 55 /* The -me package */ 56 "(l", ")l", 57 "(q", ")q", 58 "(b", ")b", 59 "(z", ")z", 60 "(c", ")c", 61 "(d", ")d", 62 "(f", ")f", 63 "(x", ")x", 64 0, 0 65 }; 66 67 /* 68 * All commands known to nroff, plus ms and me. 69 * Used so we can complain about unrecognized commands. 70 */ 71 char *knowncmds[MAXCMDS] = { 72 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", 73 "(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", 74 ")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", 75 "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h", 76 "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AB", "AE", 77 "AE", "AI", "AI", "AT", "AU", "AU", "AX", "B", "B1", "B2", 78 "BD", "BG", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D", 79 "DA", "DE", "DF", "DS", "EG", "EM", "EN", "EQ", "EQ", "FA", 80 "FE", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", 81 "HO", "I", "ID", "IE", "IH", "IM", "IP", "IZ", "KD", "KE", 82 "KF", "KQ", "KS", "LB", "LD", "LG", "LP", "MC", "ME", "MF", 83 "MH", "MR", "ND", "NH", "NL", "NP", "OK", "PP", "PT", "PY", 84 "QE", "QP", "QS", "R", "RA", "RC", "RE", "RP", "RQ", "RS", 85 "RT", "S0", "S2", "S3", "SG", "SH", "SM", "SY", "TA", "TC", 86 "TD", "TE", "TH", "TL", "TL", "TM", "TQ", "TR", "TS", "TS", 87 "TX", "UL", "US", "UX", "WH", "XD", "XF", "XK", "XP", "[-", 88 "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]-", 89 "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as", 90 "b", "ba", "bc", "bd", "bi", "bl", "bp", "bp", "br", "bx", 91 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", 92 "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", 93 "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", 94 "fo", "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", 95 "hx", "hy", "i", "ie", "if", "ig", "in", "ip", "it", "ix", 96 "lc", "lg", "li", "ll", "ll", "ln", "lo", "lp", "ls", "lt", 97 "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na", 98 "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 99 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", 100 "po", "pp", "ps", "q", "r", "rb", "rd", "re", "re", "rm", 101 "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", 102 "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", 103 "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "yr", 104 0 105 }; 106 107 int lineno; /* current line number in input file */ 108 char line[256]; /* the current line */ 109 char *cfilename; /* name of current file */ 110 int nfiles; /* number of files to process */ 111 int fflag; /* -f: ignore \f */ 112 int sflag; /* -s: ignore \s */ 113 int ncmds; /* size of knowncmds */ 114 int slot; /* slot in knowncmds found by binsrch */ 115 116 char *malloc(); 117 118 main(argc, argv) 119 int argc; 120 char **argv; 121 { 122 FILE *f; 123 int i; 124 char *cp; 125 126 if (argc <= 1) 127 goto usage; 128 while (argc > 1 && argv[1][0] == '-') { 129 switch(argv[1][1]) { 130 case 'a': 131 /* -a: add pairs of macros */ 132 i = strlen(argv[1]) - 2; 133 if (i % 6 != 0) { 134 usage: 135 printf("Usage: nrc -s -f -a.xx.yy.xx.yy... (.xx, .yy)\n"); 136 break; 137 } 138 /* look for empty macro slots */ 139 for (i=0; br[i].opbr; i++) 140 ; 141 for (cp=argv[1]+3; cp[-1]; cp += 6) { 142 br[i].opbr = cp; 143 br[i].clbr = cp+3; 144 cp[2] = cp[5] = 0; 145 i++; 146 } 147 break; 148 case 'f': 149 fflag = 1; 150 break; 151 case 's': 152 sflag = 1; 153 break; 154 default: 155 printf("Illegal flag: %s\n", argv[1]); 156 break; 157 } 158 argc--; argv++; 159 } 160 161 nfiles = argc - 1; 162 163 if (nfiles > 0) { 164 for (i=1; i<argc; i++) { 165 cfilename = argv[i]; 166 f = fopen(cfilename, "r"); 167 if (f == NULL) 168 perror(cfilename); 169 else 170 process(f); 171 } 172 } else { 173 cfilename = "stdin"; 174 process(stdin); 175 } 176 exit(0); 177 } 178 179 process(f) 180 FILE *f; 181 { 182 register int i, n; 183 char mac[5]; /* The current macro or nroff command */ 184 int pl; 185 186 stktop = -1; 187 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 188 if (line[0] == '.') { 189 /* 190 * find and isolate the macro/command name. 191 */ 192 strncpy(mac, line+1, 4); 193 if (isspace(mac[0])) { 194 pe(lineno); 195 printf("Empty command\n"); 196 } else if (isspace(mac[1])) { 197 mac[1] = 0; 198 } else if (isspace(mac[2])) { 199 mac[2] = 0; 200 } else if (mac[2] != '\\' || mac[3] != '\"') { 201 pe(lineno); 202 printf("Command too long\n"); 203 } 204 205 /* 206 * Is it a known command? 207 */ 208 checkknown(mac); 209 210 /* 211 * Should we add it? 212 */ 213 if (eq(mac, "de")) 214 addcmd(line); 215 216 chkcmd(line, mac); 217 } 218 219 /* 220 * At this point we process the line looking 221 * for \s and \f. 222 */ 223 for (i=0; line[i]; i++) 224 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 225 if (!sflag && line[++i]=='s') { 226 pl = line[++i]; 227 if (isdigit(pl)) { 228 n = pl - '0'; 229 pl = ' '; 230 } else 231 n = 0; 232 while (isdigit(line[++i])) 233 n = 10 * n + line[i] - '0'; 234 i--; 235 if (n == 0) { 236 if (stk[stktop].opno == SZ) { 237 stktop--; 238 } else { 239 pe(lineno); 240 printf("unmatched \\s0\n"); 241 } 242 } else { 243 stk[++stktop].opno = SZ; 244 stk[stktop].pl = pl; 245 stk[stktop].parm = n; 246 stk[stktop].lno = lineno; 247 } 248 } else if (!fflag && line[i]=='f') { 249 n = line[++i]; 250 if (n == 'P') { 251 if (stk[stktop].opno == FT) { 252 stktop--; 253 } else { 254 pe(lineno); 255 printf("unmatched \\fP\n"); 256 } 257 } else { 258 stk[++stktop].opno = FT; 259 stk[stktop].pl = 1; 260 stk[stktop].parm = n; 261 stk[stktop].lno = lineno; 262 } 263 } 264 } 265 } 266 /* 267 * We've hit the end and look at all this stuff that hasn't been 268 * matched yet! Complain, complain. 269 */ 270 for (i=stktop; i>=0; i--) { 271 complain(i); 272 } 273 } 274 275 complain(i) 276 { 277 pe(stk[i].lno); 278 printf("Unmatched "); 279 prop(i); 280 printf("\n"); 281 } 282 283 prop(i) 284 { 285 if (stk[i].pl == 0) 286 printf(".%s", br[stk[i].opno].opbr); 287 else switch(stk[i].opno) { 288 case SZ: 289 printf("\\s%c%d", stk[i].pl, stk[i].parm); 290 break; 291 case FT: 292 printf("\\f%c", stk[i].parm); 293 break; 294 default: 295 printf("Bug: stk[%d].opno = %d = .%s, .%s", 296 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 297 } 298 } 299 300 chkcmd(line, mac) 301 char *line; 302 char *mac; 303 { 304 register int i, n; 305 306 /* 307 * Check to see if it matches top of stack. 308 */ 309 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 310 stktop--; /* OK. Pop & forget */ 311 else { 312 /* No. Maybe it's an opener */ 313 for (i=0; br[i].opbr; i++) { 314 if (eq(mac, br[i].opbr)) { 315 /* Found. Push it. */ 316 stktop++; 317 stk[stktop].opno = i; 318 stk[stktop].pl = 0; 319 stk[stktop].parm = 0; 320 stk[stktop].lno = lineno; 321 break; 322 } 323 /* 324 * Maybe it's an unmatched closer. 325 * NOTE: this depends on the fact 326 * that none of the closers can be 327 * openers too. 328 */ 329 if (eq(mac, br[i].clbr)) { 330 nomatch(mac); 331 break; 332 } 333 } 334 } 335 } 336 337 nomatch(mac) 338 char *mac; 339 { 340 register int i, j; 341 342 /* 343 * Look for a match further down on stack 344 * If we find one, it suggests that the stuff in 345 * between is supposed to match itself. 346 */ 347 for (j=stktop; j>=0; j--) 348 if (eq(mac,br[stk[j].opno].clbr)) { 349 /* Found. Make a good diagnostic. */ 350 if (j == stktop-2) { 351 /* 352 * Check for special case \fx..\fR and don't 353 * complain. 354 */ 355 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 356 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 357 stktop = j -1; 358 return; 359 } 360 /* 361 * We have two unmatched frobs. Chances are 362 * they were intended to match, so we mention 363 * them together. 364 */ 365 pe(stk[j+1].lno); 366 prop(j+1); 367 printf(" does not match %d: ", stk[j+2].lno); 368 prop(j+2); 369 printf("\n"); 370 } else for (i=j+1; i <= stktop; i++) { 371 complain(i); 372 } 373 stktop = j-1; 374 return; 375 } 376 /* Didn't find one. Throw this away. */ 377 pe(lineno); 378 printf("Unmatched .%s\n", mac); 379 } 380 381 /* eq: are two strings equal? */ 382 eq(s1, s2) 383 char *s1, *s2; 384 { 385 return (strcmp(s1, s2) == 0); 386 } 387 388 /* print the first part of an error message, given the line number */ 389 pe(lineno) 390 int lineno; 391 { 392 if (nfiles > 1) 393 printf("%s: ", cfilename); 394 printf("%d: ", lineno); 395 } 396 397 checkknown(mac) 398 char *mac; 399 { 400 /* First time figure out ncmds. */ 401 if (ncmds == 0) { 402 while (knowncmds[ncmds]) 403 ncmds++; 404 } 405 406 if (eq(mac, ".")) 407 return; 408 if (binsrch(mac) >= 0) 409 return; 410 411 pe(lineno); 412 printf("Unknown command: .%s\n", mac); 413 } 414 415 /* 416 * We have a .de xx line in "line". Add xx to the list of known commands. 417 */ 418 addcmd(line) 419 char *line; 420 { 421 char *mac; 422 register char **src, **dest, **loc; 423 424 /* grab the macro being defined */ 425 mac = line+4; 426 while (isspace(*mac)) 427 mac++; 428 if (*mac == 0) { 429 pe(lineno); 430 printf("illegal define: %s\n", line); 431 return; 432 } 433 mac[2] = 0; 434 if (isspace(mac[1]) || mac[1] == '\\') 435 mac[1] = 0; 436 if (ncmds >= MAXCMDS) { 437 printf("Only %d known commands allowed\n", MAXCMDS); 438 exit(1); 439 } 440 441 /* 442 * Add mac to the list. We should really have some kind of tree 443 * structure here but this is a quick-and-dirty job and I just don't 444 * have time to mess with it. (I wonder if this will come back to haunt 445 * me someday?) Anyway, I claim that .de is fairly rare in user 446 * nroff programs, and the register loop below is pretty fast. 447 */ 448 binsrch(mac); /* it's OK to redefine something */ 449 /* binsrch sets slot as a side effect */ 450 loc = &knowncmds[slot]; 451 src = &knowncmds[ncmds-1]; 452 dest = src+1; 453 while (dest > loc) 454 *dest-- = *src--; 455 *loc = malloc(3); 456 strcpy(*loc, mac); 457 ncmds++; 458 } 459 460 /* 461 * Do a binary search in knowncmds for mac. 462 * If found, return the index. If not, return -1. 463 */ 464 binsrch(mac) 465 char *mac; 466 { 467 register char *p; /* pointer to current cmd in list */ 468 register int d; /* difference if any */ 469 register int mid; /* mid point in binary search */ 470 register int top, bot; /* boundaries of bin search, inclusive */ 471 472 top = ncmds-1; 473 bot = 0; 474 while (top >= bot) { 475 mid = (top+bot)/2; 476 p = knowncmds[mid]; 477 d = p[0] - mac[0]; 478 if (d == 0) 479 d = p[1] - mac[1]; 480 if (d == 0) 481 return mid; 482 if (d < 0) 483 bot = mid + 1; 484 else 485 top = mid - 1; 486 } 487 slot = bot; /* place it would have gone */ 488 return -1; 489 } 490