1 /* $NetBSD: checknr.c,v 1.10 2002/01/21 18:28:00 wiz Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 45 #else 46 __RCSID("$NetBSD: checknr.c,v 1.10 2002/01/21 18:28:00 wiz Exp $"); 47 #endif 48 #endif /* not lint */ 49 50 /* 51 * checknr: check an nroff/troff input file for matching macro calls. 52 * we also attempt to match size and font changes, but only the embedded 53 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 54 * later but for now think of these restrictions as contributions to 55 * structured typesetting. 56 */ 57 #include <ctype.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 62 #define MAXSTK 100 /* Stack size */ 63 #define MAXBR 100 /* Max number of bracket pairs known */ 64 #define MAXCMDS 500 /* Max number of commands known */ 65 66 /* 67 * The stack on which we remember what we've seen so far. 68 */ 69 struct stkstr { 70 int opno; /* number of opening bracket */ 71 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 72 int parm; /* parm to size, font, etc */ 73 int lno; /* line number the thing came in in */ 74 } stk[MAXSTK]; 75 int stktop; 76 77 /* 78 * The kinds of opening and closing brackets. 79 */ 80 struct brstr { 81 char *opbr; 82 char *clbr; 83 } br[MAXBR] = { 84 /* A few bare bones troff commands */ 85 #define SZ 0 86 { "sz", "sz"}, /* also \s */ 87 #define FT 1 88 { "ft", "ft"}, /* also \f */ 89 /* the -mm package */ 90 {"AL", "LE"}, 91 {"AS", "AE"}, 92 {"BL", "LE"}, 93 {"BS", "BE"}, 94 {"DF", "DE"}, 95 {"DL", "LE"}, 96 {"DS", "DE"}, 97 {"FS", "FE"}, 98 {"ML", "LE"}, 99 {"NS", "NE"}, 100 {"RL", "LE"}, 101 {"VL", "LE"}, 102 /* the -ms package */ 103 {"AB", "AE"}, 104 {"BD", "DE"}, 105 {"CD", "DE"}, 106 {"DS", "DE"}, 107 {"FS", "FE"}, 108 {"ID", "DE"}, 109 {"KF", "KE"}, 110 {"KS", "KE"}, 111 {"LD", "DE"}, 112 {"LG", "NL"}, 113 {"QS", "QE"}, 114 {"RS", "RE"}, 115 {"SM", "NL"}, 116 {"XA", "XE"}, 117 {"XS", "XE"}, 118 /* The -me package */ 119 {"(b", ")b"}, 120 {"(c", ")c"}, 121 {"(d", ")d"}, 122 {"(f", ")f"}, 123 {"(l", ")l"}, 124 {"(q", ")q"}, 125 {"(x", ")x"}, 126 {"(z", ")z"}, 127 /* The -mdoc package */ 128 {"Ao", "Ac"}, 129 {"Bd", "Ed"}, 130 {"Bk", "Ek"}, 131 {"Bo", "Bc"}, 132 {"Do", "Dc"}, 133 {"Fo", "Fc"}, 134 {"Oo", "Oc"}, 135 {"Po", "Pc"}, 136 {"Qo", "Qc"}, 137 {"Rs", "Re"}, 138 {"So", "Sc"}, 139 {"Xo", "Xc"}, 140 /* Things needed by preprocessors */ 141 {"EQ", "EN"}, 142 {"TS", "TE"}, 143 /* Refer */ 144 {"[", "]"}, 145 {0, 0}, 146 }; 147 148 /* 149 * All commands known to nroff, plus macro packages. 150 * Used so we can complain about unrecognized commands. 151 */ 152 char *knowncmds[MAXCMDS] = { 153 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", 154 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", 155 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", 156 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", 157 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", 158 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", 159 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1", 160 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf", 161 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", 162 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", 163 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", 164 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", 165 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", 166 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx", 167 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM", 168 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB", 169 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF", 170 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", 171 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", 172 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", 173 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", 174 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", 175 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", 176 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy", 177 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", 178 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", 179 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", 180 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", 181 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", 182 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", 183 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de", 184 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", 185 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft", 186 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie", 187 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 188 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", 189 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", 190 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", 191 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro", 192 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st", 193 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u", 194 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0 195 }; 196 197 int lineno; /* current line number in input file */ 198 char *cfilename; /* name of current file */ 199 int nfiles; /* number of files to process */ 200 int fflag; /* -f: ignore \f */ 201 int sflag; /* -s: ignore \s */ 202 int ncmds; /* size of knowncmds */ 203 int slot; /* slot in knowncmds found by binsrch */ 204 205 void addcmd(char *); 206 void addmac(char *); 207 int binsrch(char *); 208 void checkknown(char *); 209 void chkcmd(char *, char *); 210 void complain(int); 211 int eq(const void *, const void *); 212 int main(int, char **); 213 void nomatch(char *); 214 void pe(int); 215 void process(FILE *); 216 void prop(int); 217 void usage(void); 218 219 int 220 main(int argc, char **argv) 221 { 222 FILE *f; 223 int i; 224 char *cp; 225 char b1[4]; 226 227 /* Figure out how many known commands there are */ 228 while (knowncmds[ncmds]) 229 ncmds++; 230 while (argc > 1 && argv[1][0] == '-') { 231 switch(argv[1][1]) { 232 233 /* -a: add pairs of macros */ 234 case 'a': 235 i = strlen(argv[1]) - 2; 236 if (i % 6 != 0) 237 usage(); 238 /* look for empty macro slots */ 239 for (i=0; br[i].opbr; i++) 240 ; 241 for (cp=argv[1]+3; cp[-1]; cp += 6) { 242 br[i].opbr = malloc(3); 243 strncpy(br[i].opbr, cp, 2); 244 br[i].clbr = malloc(3); 245 strncpy(br[i].clbr, cp+3, 2); 246 addmac(br[i].opbr); /* knows pairs are also known cmds */ 247 addmac(br[i].clbr); 248 i++; 249 } 250 break; 251 252 /* -c: add known commands */ 253 case 'c': 254 i = strlen(argv[1]) - 2; 255 if (i % 3 != 0) 256 usage(); 257 for (cp=argv[1]+3; cp[-1]; cp += 3) { 258 if (cp[2] && cp[2] != '.') 259 usage(); 260 strncpy(b1, cp, 2); 261 addmac(b1); 262 } 263 break; 264 265 /* -f: ignore font changes */ 266 case 'f': 267 fflag = 1; 268 break; 269 270 /* -s: ignore size changes */ 271 case 's': 272 sflag = 1; 273 break; 274 default: 275 usage(); 276 } 277 argc--; argv++; 278 } 279 280 nfiles = argc - 1; 281 282 if (nfiles > 0) { 283 for (i=1; i<argc; i++) { 284 cfilename = argv[i]; 285 f = fopen(cfilename, "r"); 286 if (f == NULL) 287 perror(cfilename); 288 else 289 process(f); 290 fclose(f); 291 } 292 } else { 293 cfilename = "stdin"; 294 process(stdin); 295 } 296 exit(0); 297 } 298 299 void 300 usage(void) 301 { 302 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 303 exit(1); 304 } 305 306 void 307 process(FILE *f) 308 { 309 int i, n; 310 char line[256]; /* the current line */ 311 char mac[5]; /* The current macro or nroff command */ 312 int pl; 313 314 stktop = -1; 315 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 316 if (line[0] == '.') { 317 /* 318 * find and isolate the macro/command name. 319 */ 320 strncpy(mac, line+1, 4); 321 if (isspace((unsigned char)mac[0])) { 322 pe(lineno); 323 printf("Empty command\n"); 324 } else if (isspace((unsigned char)mac[1])) { 325 mac[1] = 0; 326 } else if (isspace((unsigned char)mac[2])) { 327 mac[2] = 0; 328 } else if (mac[0] != '\\' || mac[1] != '\"') { 329 pe(lineno); 330 printf("Command too long\n"); 331 } 332 333 /* 334 * Is it a known command? 335 */ 336 checkknown(mac); 337 338 /* 339 * Should we add it? 340 */ 341 if (eq(mac, "de")) 342 addcmd(line); 343 344 chkcmd(line, mac); 345 } 346 347 /* 348 * At this point we process the line looking 349 * for \s and \f. 350 */ 351 for (i=0; line[i]; i++) 352 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 353 if (!sflag && line[++i]=='s') { 354 pl = line[++i]; 355 if (isdigit((unsigned char)pl)) { 356 n = pl - '0'; 357 pl = ' '; 358 } else 359 n = 0; 360 while (isdigit((unsigned char)line[++i])) 361 n = 10 * n + line[i] - '0'; 362 i--; 363 if (n == 0) { 364 if (stk[stktop].opno == SZ) { 365 stktop--; 366 } else { 367 pe(lineno); 368 printf("unmatched \\s0\n"); 369 } 370 } else { 371 stk[++stktop].opno = SZ; 372 stk[stktop].pl = pl; 373 stk[stktop].parm = n; 374 stk[stktop].lno = lineno; 375 } 376 } else if (!fflag && line[i]=='f') { 377 n = line[++i]; 378 if (n == 'P') { 379 if (stk[stktop].opno == FT) { 380 stktop--; 381 } else { 382 pe(lineno); 383 printf("unmatched \\fP\n"); 384 } 385 } else { 386 stk[++stktop].opno = FT; 387 stk[stktop].pl = 1; 388 stk[stktop].parm = n; 389 stk[stktop].lno = lineno; 390 } 391 } 392 } 393 } 394 /* 395 * We've hit the end and look at all this stuff that hasn't been 396 * matched yet! Complain, complain. 397 */ 398 for (i=stktop; i>=0; i--) { 399 complain(i); 400 } 401 } 402 403 void 404 complain(int i) 405 { 406 pe(stk[i].lno); 407 printf("Unmatched "); 408 prop(i); 409 printf("\n"); 410 } 411 412 void 413 prop(int i) 414 { 415 if (stk[i].pl == 0) 416 printf(".%s", br[stk[i].opno].opbr); 417 else switch(stk[i].opno) { 418 case SZ: 419 printf("\\s%c%d", stk[i].pl, stk[i].parm); 420 break; 421 case FT: 422 printf("\\f%c", stk[i].parm); 423 break; 424 default: 425 printf("Bug: stk[%d].opno = %d = .%s, .%s", 426 i, stk[i].opno, br[stk[i].opno].opbr, 427 br[stk[i].opno].clbr); 428 } 429 } 430 431 void 432 chkcmd(char *line, char *mac) 433 { 434 int i; 435 436 /* 437 * Check to see if it matches top of stack. 438 */ 439 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 440 stktop--; /* OK. Pop & forget */ 441 else { 442 /* No. Maybe it's an opener */ 443 for (i=0; br[i].opbr; i++) { 444 if (eq(mac, br[i].opbr)) { 445 /* Found. Push it. */ 446 stktop++; 447 stk[stktop].opno = i; 448 stk[stktop].pl = 0; 449 stk[stktop].parm = 0; 450 stk[stktop].lno = lineno; 451 break; 452 } 453 /* 454 * Maybe it's an unmatched closer. 455 * NOTE: this depends on the fact 456 * that none of the closers can be 457 * openers too. 458 */ 459 if (eq(mac, br[i].clbr)) { 460 nomatch(mac); 461 break; 462 } 463 } 464 } 465 } 466 467 void 468 nomatch(char *mac) 469 { 470 int i, j; 471 472 /* 473 * Look for a match further down on stack 474 * If we find one, it suggests that the stuff in 475 * between is supposed to match itself. 476 */ 477 for (j=stktop; j>=0; j--) 478 if (eq(mac,br[stk[j].opno].clbr)) { 479 /* Found. Make a good diagnostic. */ 480 if (j == stktop-2) { 481 /* 482 * Check for special case \fx..\fR and don't 483 * complain. 484 */ 485 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 486 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 487 stktop = j -1; 488 return; 489 } 490 /* 491 * We have two unmatched frobs. Chances are 492 * they were intended to match, so we mention 493 * them together. 494 */ 495 pe(stk[j+1].lno); 496 prop(j+1); 497 printf(" does not match %d: ", stk[j+2].lno); 498 prop(j+2); 499 printf("\n"); 500 } else for (i=j+1; i <= stktop; i++) { 501 complain(i); 502 } 503 stktop = j-1; 504 return; 505 } 506 /* Didn't find one. Throw this away. */ 507 pe(lineno); 508 printf("Unmatched .%s\n", mac); 509 } 510 511 /* eq: are two strings equal? */ 512 int 513 eq(const void *s1, const void *s2) 514 { 515 return (strcmp((char *)s1, (char *)s2) == 0); 516 } 517 518 /* print the first part of an error message, given the line number */ 519 void 520 pe(int pelineno) 521 { 522 if (nfiles > 1) 523 printf("%s: ", cfilename); 524 printf("%d: ", pelineno); 525 } 526 527 void 528 checkknown(char *mac) 529 { 530 531 if (eq(mac, ".")) 532 return; 533 if (binsrch(mac) >= 0) 534 return; 535 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 536 return; 537 538 pe(lineno); 539 printf("Unknown command: .%s\n", mac); 540 } 541 542 /* 543 * We have a .de xx line in "line". Add xx to the list of known commands. 544 */ 545 void 546 addcmd(char *line) 547 { 548 char *mac; 549 550 /* grab the macro being defined */ 551 mac = line+4; 552 while (isspace((unsigned char)*mac)) 553 mac++; 554 if (*mac == 0) { 555 pe(lineno); 556 printf("illegal define: %s\n", line); 557 return; 558 } 559 mac[2] = 0; 560 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 561 mac[1] = 0; 562 if (ncmds >= MAXCMDS) { 563 printf("Only %d known commands allowed\n", MAXCMDS); 564 exit(1); 565 } 566 addmac(mac); 567 } 568 569 /* 570 * Add mac to the list. We should really have some kind of tree 571 * structure here but this is a quick-and-dirty job and I just don't 572 * have time to mess with it. (I wonder if this will come back to haunt 573 * me someday?) Anyway, I claim that .de is fairly rare in user 574 * nroff programs, and the register loop below is pretty fast. 575 */ 576 void 577 addmac(char *mac) 578 { 579 char **src, **dest, **loc; 580 581 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 582 #ifdef DEBUG 583 printf("binsrch(%s) -> already in table\n", mac); 584 #endif /* DEBUG */ 585 return; 586 } 587 /* binsrch sets slot as a side effect */ 588 #ifdef DEBUG 589 printf("binsrch(%s) -> %d\n", mac, slot); 590 #endif 591 loc = &knowncmds[slot]; 592 src = &knowncmds[ncmds-1]; 593 dest = src+1; 594 while (dest > loc) 595 *dest-- = *src--; 596 *loc = malloc(3); 597 strcpy(*loc, mac); 598 ncmds++; 599 #ifdef DEBUG 600 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], 601 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], 602 knowncmds[slot+2], ncmds); 603 #endif 604 } 605 606 /* 607 * Do a binary search in knowncmds for mac. 608 * If found, return the index. If not, return -1. 609 */ 610 int 611 binsrch(char *mac) 612 { 613 char *p; /* pointer to current cmd in list */ 614 int d; /* difference if any */ 615 int mid; /* mid point in binary search */ 616 int top, bot; /* boundaries of bin search, inclusive */ 617 618 top = ncmds-1; 619 bot = 0; 620 while (top >= bot) { 621 mid = (top+bot)/2; 622 p = knowncmds[mid]; 623 d = p[0] - mac[0]; 624 if (d == 0) 625 d = p[1] - mac[1]; 626 if (d == 0) 627 return mid; 628 if (d < 0) 629 bot = mid + 1; 630 else 631 top = mid - 1; 632 } 633 slot = bot; /* place it would have gone */ 634 return -1; 635 } 636