1 /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 #if 0 39 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93"; 40 #else 41 __RCSID("$NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $"); 42 #endif 43 #endif /* not lint */ 44 45 #include <sys/types.h> 46 #include <sys/file.h> 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 56 #include "hexdump.h" 57 58 FU *endfu; /* format at end-of-data */ 59 60 void 61 addfile(name) 62 char *name; 63 { 64 char *p; 65 FILE *fp; 66 int ch; 67 char buf[2048 + 1]; 68 69 if ((fp = fopen(name, "r")) == NULL) 70 err(1, "fopen %s", name); 71 while (fgets(buf, sizeof(buf), fp)) { 72 if (!(p = strchr(buf, '\n'))) { 73 warnx("line too long."); 74 while ((ch = getchar()) != '\n' && ch != EOF); 75 continue; 76 } 77 *p = '\0'; 78 for (p = buf; *p && isspace((unsigned char)*p); ++p); 79 if (!*p || *p == '#') 80 continue; 81 add(p); 82 } 83 (void)fclose(fp); 84 } 85 86 void 87 add(fmt) 88 const char *fmt; 89 { 90 const char *p; 91 static FS **nextfs; 92 FS *tfs; 93 FU *tfu, **nextfu; 94 const char *savep; 95 96 /* start new linked list of format units */ 97 tfs = emalloc(sizeof(FS)); 98 if (!fshead) 99 fshead = tfs; 100 else 101 *nextfs = tfs; 102 nextfs = &tfs->nextfs; 103 nextfu = &tfs->nextfu; 104 105 /* take the format string and break it up into format units */ 106 for (p = fmt;;) { 107 /* skip leading white space */ 108 for (; isspace((unsigned char)*p); ++p); 109 if (!*p) 110 break; 111 112 /* allocate a new format unit and link it in */ 113 tfu = emalloc(sizeof(FU)); 114 *nextfu = tfu; 115 nextfu = &tfu->nextfu; 116 tfu->reps = 1; 117 118 /* if leading digit, repetition count */ 119 if (isdigit((unsigned char)*p)) { 120 for (savep = p; isdigit((unsigned char)*p); ++p); 121 if (!isspace((unsigned char)*p) && *p != '/') 122 badfmt(fmt); 123 /* may overwrite either white space or slash */ 124 tfu->reps = atoi(savep); 125 tfu->flags = F_SETREP; 126 /* skip trailing white space */ 127 for (++p; isspace((unsigned char)*p); ++p); 128 } 129 130 /* skip slash and trailing white space */ 131 if (*p == '/') 132 while (isspace((unsigned char)*++p)); 133 134 /* byte count */ 135 if (isdigit((unsigned char)*p)) { 136 for (savep = p; isdigit((unsigned char)*p); ++p); 137 if (!isspace((unsigned char)*p)) 138 badfmt(fmt); 139 tfu->bcnt = atoi(savep); 140 /* skip trailing white space */ 141 for (++p; isspace((unsigned char)*p); ++p); 142 } 143 144 /* format */ 145 if (*p != '"') 146 badfmt(fmt); 147 for (savep = ++p; *p != '"';) 148 if (*p++ == 0) 149 badfmt(fmt); 150 if (!(tfu->fmt = malloc(p - savep + 1))) 151 nomem(); 152 (void) strncpy(tfu->fmt, savep, p - savep); 153 tfu->fmt[p - savep] = '\0'; 154 escape(tfu->fmt); 155 p++; 156 } 157 } 158 159 static const char *spec = ".#-+ 0123456789"; 160 161 int 162 size(fs) 163 FS *fs; 164 { 165 FU *fu; 166 int bcnt, cursize; 167 char *fmt; 168 int prec; 169 170 /* figure out the data block size needed for each format unit */ 171 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 172 if (fu->bcnt) { 173 cursize += fu->bcnt * fu->reps; 174 continue; 175 } 176 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 177 if (*fmt != '%') 178 continue; 179 /* 180 * skip any special chars -- save precision in 181 * case it's a %s format. 182 */ 183 while (strchr(spec + 1, *++fmt)); 184 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) { 185 prec = atoi(fmt); 186 while (isdigit((unsigned char)*++fmt)); 187 } 188 switch(*fmt) { 189 case 'c': 190 bcnt += 1; 191 break; 192 case 'd': case 'i': case 'o': case 'u': 193 case 'x': case 'X': 194 bcnt += 4; 195 break; 196 case 'e': case 'E': case 'f': case 'g': case 'G': 197 bcnt += 8; 198 break; 199 case 's': 200 bcnt += prec; 201 break; 202 case '_': 203 switch(*++fmt) { 204 case 'c': case 'p': case 'u': 205 bcnt += 1; 206 break; 207 } 208 } 209 } 210 cursize += bcnt * fu->reps; 211 } 212 return (cursize); 213 } 214 215 void 216 rewrite(fs) 217 FS *fs; 218 { 219 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 220 PR *pr, **nextpr; 221 FU *fu; 222 char *p1, *p2; 223 char savech, *fmtp, cs[3]; 224 int nconv, prec; 225 226 nextpr = NULL; 227 prec = 0; 228 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 229 /* 230 * Break each format unit into print units; each conversion 231 * character gets its own. 232 */ 233 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 234 pr = emalloc(sizeof(PR)); 235 if (!fu->nextpr) 236 fu->nextpr = pr; 237 else 238 *nextpr = pr; 239 240 /* Skip preceding text and up to the next % sign. */ 241 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 242 243 /* Only text in the string. */ 244 if (!*p1) { 245 pr->fmt = fmtp; 246 pr->flags = F_TEXT; 247 break; 248 } 249 250 /* 251 * Get precision for %s -- if have a byte count, don't 252 * need it. 253 */ 254 if (fu->bcnt) { 255 sokay = USEBCNT; 256 /* Skip to conversion character. */ 257 for (++p1; strchr(spec, *p1); ++p1); 258 } else { 259 /* Skip any special chars, field width. */ 260 while (strchr(spec + 1, *++p1)); 261 if (*p1 == '.' && 262 isdigit((unsigned char)*++p1)) { 263 sokay = USEPREC; 264 prec = atoi(p1); 265 while (isdigit((unsigned char)*++p1)) 266 continue; 267 } else 268 sokay = NOTOKAY; 269 } 270 271 p2 = p1 + 1; /* Set end pointer. */ 272 cs[0] = *p1; /* Set conversion string. */ 273 cs[1] = '\0'; 274 275 /* 276 * Figure out the byte count for each conversion; 277 * rewrite the format as necessary, set up blank- 278 * padding for end of data. 279 */ 280 switch(cs[0]) { 281 case 'c': 282 pr->flags = F_CHAR; 283 switch(fu->bcnt) { 284 case 0: case 1: 285 pr->bcnt = 1; 286 break; 287 default: 288 p1[1] = '\0'; 289 badcnt(p1); 290 } 291 break; 292 case 'd': case 'i': 293 pr->flags = F_INT; 294 goto isint; 295 case 'o': case 'u': case 'x': case 'X': 296 pr->flags = F_UINT; 297 isint: cs[2] = '\0'; 298 cs[1] = cs[0]; 299 cs[0] = 'q'; 300 switch(fu->bcnt) { 301 case 0: case 4: 302 pr->bcnt = 4; 303 break; 304 case 1: 305 pr->bcnt = 1; 306 break; 307 case 2: 308 pr->bcnt = 2; 309 break; 310 case 8: 311 pr->bcnt = 8; 312 break; 313 default: 314 p1[1] = '\0'; 315 badcnt(p1); 316 } 317 break; 318 case 'e': case 'E': case 'f': case 'g': case 'G': 319 pr->flags = F_DBL; 320 switch(fu->bcnt) { 321 case 0: case 8: 322 pr->bcnt = 8; 323 break; 324 case 4: 325 pr->bcnt = 4; 326 break; 327 default: 328 p1[1] = '\0'; 329 badcnt(p1); 330 } 331 break; 332 case 's': 333 pr->flags = F_STR; 334 switch(sokay) { 335 case NOTOKAY: 336 badsfmt(); 337 case USEBCNT: 338 pr->bcnt = fu->bcnt; 339 break; 340 case USEPREC: 341 pr->bcnt = prec; 342 break; 343 } 344 break; 345 case '_': 346 ++p2; 347 switch(p1[1]) { 348 case 'A': 349 endfu = fu; 350 fu->flags |= F_IGNORE; 351 /* FALLTHROUGH */ 352 case 'a': 353 pr->flags = F_ADDRESS; 354 ++p2; 355 switch(p1[2]) { 356 case 'd': case 'o': case'x': 357 cs[0] = 'q'; 358 cs[1] = p1[2]; 359 cs[2] = '\0'; 360 break; 361 default: 362 p1[3] = '\0'; 363 badconv(p1); 364 } 365 break; 366 case 'c': 367 pr->flags = F_C; 368 /* cs[0] = 'c'; set in conv_c */ 369 goto isint2; 370 case 'p': 371 pr->flags = F_P; 372 cs[0] = 'c'; 373 goto isint2; 374 case 'u': 375 pr->flags = F_U; 376 /* cs[0] = 'c'; set in conv_u */ 377 isint2: switch(fu->bcnt) { 378 case 0: case 1: 379 pr->bcnt = 1; 380 break; 381 default: 382 p1[2] = '\0'; 383 badcnt(p1); 384 } 385 break; 386 default: 387 p1[2] = '\0'; 388 badconv(p1); 389 } 390 break; 391 default: 392 p1[1] = '\0'; 393 badconv(p1); 394 } 395 396 /* 397 * Copy to PR format string, set conversion character 398 * pointer, update original. 399 */ 400 savech = *p2; 401 p1[0] = '\0'; 402 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1); 403 (void)strcpy(pr->fmt, fmtp); 404 (void)strcat(pr->fmt, cs); 405 *p2 = savech; 406 pr->cchar = pr->fmt + (p1 - fmtp); 407 fmtp = p2; 408 409 /* Only one conversion character if byte count. */ 410 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 411 errx(1, 412 "byte count with multiple conversion characters"); 413 } 414 /* 415 * If format unit byte count not specified, figure it out 416 * so can adjust rep count later. 417 */ 418 if (!fu->bcnt) 419 for (pr = fu->nextpr; pr; pr = pr->nextpr) 420 fu->bcnt += pr->bcnt; 421 } 422 /* 423 * If the format string interprets any data at all, and it's 424 * not the same as the blocksize, and its last format unit 425 * interprets any data at all, and has no iteration count, 426 * repeat it as necessary. 427 * 428 * If, rep count is greater than 1, no trailing whitespace 429 * gets output from the last iteration of the format unit. 430 */ 431 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 432 if (!fu->nextfu && fs->bcnt < blocksize && 433 !(fu->flags&F_SETREP) && fu->bcnt) 434 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 435 if (fu->reps > 1) { 436 for (pr = fu->nextpr;; pr = pr->nextpr) 437 if (!pr->nextpr) 438 break; 439 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 440 p2 = isspace((unsigned char)*p1) ? p1 : NULL; 441 if (p2) 442 pr->nospace = p2; 443 } 444 } 445 #ifdef DEBUG 446 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 447 (void)printf("fmt:"); 448 for (pr = fu->nextpr; pr; pr = pr->nextpr) 449 (void)printf(" {%s}", pr->fmt); 450 (void)printf("\n"); 451 } 452 #endif 453 } 454 455 void 456 escape(p1) 457 char *p1; 458 { 459 char *p2; 460 461 /* alphabetic escape sequences have to be done in place */ 462 for (p2 = p1;; ++p1, ++p2) { 463 if (!*p1) { 464 *p2 = *p1; 465 break; 466 } 467 if (*p1 == '\\') 468 switch(*++p1) { 469 case 'a': 470 /* *p2 = '\a'; */ 471 *p2 = '\007'; 472 break; 473 case 'b': 474 *p2 = '\b'; 475 break; 476 case 'f': 477 *p2 = '\f'; 478 break; 479 case 'n': 480 *p2 = '\n'; 481 break; 482 case 'r': 483 *p2 = '\r'; 484 break; 485 case 't': 486 *p2 = '\t'; 487 break; 488 case 'v': 489 *p2 = '\v'; 490 break; 491 default: 492 *p2 = *p1; 493 break; 494 } 495 } 496 } 497 498 void 499 badcnt(s) 500 char *s; 501 { 502 errx(1, "%s: bad byte count", s); 503 } 504 505 void 506 badsfmt() 507 { 508 errx(1, "%%s: requires a precision or a byte count\n"); 509 } 510 511 void 512 badfmt(fmt) 513 const char *fmt; 514 { 515 errx(1, "\"%s\": bad format\n", fmt); 516 } 517 518 void 519 badconv(ch) 520 char *ch; 521 { 522 errx(1, "%%%s: bad conversion character\n", ch); 523 } 524