1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)parse.c 8.1 (Berkeley) 6/6/93 30 * $FreeBSD: src/usr.bin/hexdump/parse.c,v 1.4.2.1 2002/07/23 14:27:06 tjr Exp $ 31 */ 32 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <fcntl.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <ctype.h> 40 #include <string.h> 41 #include "hexdump.h" 42 43 FU *endfu; /* format at end-of-data */ 44 45 void 46 addfile(char *name) 47 { 48 unsigned char *p; 49 FILE *fp; 50 int ch; 51 char buf[2048 + 1]; 52 53 if ((fp = fopen(name, "r")) == NULL) 54 err(1, "%s", name); 55 while (fgets(buf, sizeof(buf), fp)) { 56 if (!(p = strchr(buf, '\n'))) { 57 warnx("line too long"); 58 while ((ch = getchar()) != '\n' && ch != EOF); 59 continue; 60 } 61 *p = '\0'; 62 for (p = buf; *p && isspace(*p); ++p); 63 if (!*p || *p == '#') 64 continue; 65 add(p); 66 } 67 (void)fclose(fp); 68 } 69 70 void 71 add(const char *fmt) 72 { 73 unsigned const char *p, *savep; 74 static FS **nextfs; 75 FS *tfs; 76 FU *tfu, **nextfu; 77 78 /* start new linked list of format units */ 79 if ((tfs = calloc(1, sizeof(FS))) == NULL) 80 err(1, NULL); 81 if (!fshead) 82 fshead = tfs; 83 else 84 *nextfs = tfs; 85 nextfs = &tfs->nextfs; 86 nextfu = &tfs->nextfu; 87 88 /* take the format string and break it up into format units */ 89 for (p = fmt;;) { 90 /* skip leading white space */ 91 for (; isspace(*p); ++p); 92 if (!*p) 93 break; 94 95 /* allocate a new format unit and link it in */ 96 if ((tfu = calloc(1, sizeof(FU))) == NULL) 97 err(1, NULL); 98 *nextfu = tfu; 99 nextfu = &tfu->nextfu; 100 tfu->reps = 1; 101 102 /* if leading digit, repetition count */ 103 if (isdigit(*p)) { 104 for (savep = p; isdigit(*p); ++p); 105 if (!isspace(*p) && *p != '/') 106 badfmt(fmt); 107 /* may overwrite either white space or slash */ 108 tfu->reps = atoi(savep); 109 tfu->flags = F_SETREP; 110 /* skip trailing white space */ 111 for (++p; isspace(*p); ++p); 112 } 113 114 /* skip slash and trailing white space */ 115 if (*p == '/') 116 while (isspace(*++p)); 117 118 /* byte count */ 119 if (isdigit(*p)) { 120 for (savep = p; isdigit(*p); ++p); 121 if (!isspace(*p)) 122 badfmt(fmt); 123 tfu->bcnt = atoi(savep); 124 /* skip trailing white space */ 125 for (++p; isspace(*p); ++p); 126 } 127 128 /* format */ 129 if (*p != '"') 130 badfmt(fmt); 131 for (savep = ++p; *p != '"';) 132 if (*p++ == 0) 133 badfmt(fmt); 134 if (!(tfu->fmt = malloc(p - savep + 1))) 135 err(1, NULL); 136 (void) strncpy(tfu->fmt, savep, p - savep); 137 tfu->fmt[p - savep] = '\0'; 138 escape(tfu->fmt); 139 p++; 140 } 141 } 142 143 static const char *spec = ".#-+ 0123456789"; 144 145 int 146 size(FS *fs) 147 { 148 FU *fu; 149 int bcnt, cursize; 150 unsigned char *fmt; 151 int prec; 152 153 /* figure out the data block size needed for each format unit */ 154 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 155 if (fu->bcnt) { 156 cursize += fu->bcnt * fu->reps; 157 continue; 158 } 159 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 160 if (*fmt != '%') 161 continue; 162 /* 163 * skip any special chars -- save precision in 164 * case it's a %s format. 165 */ 166 while (strchr(spec + 1, *++fmt)); 167 if (*fmt == '.' && isdigit(*++fmt)) { 168 prec = atoi(fmt); 169 while (isdigit(*++fmt)); 170 } 171 switch(*fmt) { 172 case 'c': 173 bcnt += 1; 174 break; 175 case 'd': case 'i': case 'o': case 'u': 176 case 'x': case 'X': 177 bcnt += 4; 178 break; 179 case 'e': case 'E': case 'f': case 'g': case 'G': 180 bcnt += 8; 181 break; 182 case 's': 183 bcnt += prec; 184 break; 185 case '_': 186 switch(*++fmt) { 187 case 'c': case 'p': case 'u': 188 bcnt += 1; 189 break; 190 } 191 } 192 } 193 cursize += bcnt * fu->reps; 194 } 195 return (cursize); 196 } 197 198 void 199 rewrite(FS *fs) 200 { 201 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 202 PR *pr, **nextpr = NULL; 203 FU *fu; 204 unsigned char *p1, *p2, *fmtp; 205 char savech, cs[3]; 206 int nconv, prec = 0; 207 208 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 209 /* 210 * Break each format unit into print units; each conversion 211 * character gets its own. 212 */ 213 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 214 if ((pr = calloc(1, sizeof(PR))) == NULL) 215 err(1, NULL); 216 if (!fu->nextpr) 217 fu->nextpr = pr; 218 else 219 *nextpr = pr; 220 221 /* Skip preceding text and up to the next % sign. */ 222 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 223 224 /* Only text in the string. */ 225 if (!*p1) { 226 pr->fmt = fmtp; 227 pr->flags = F_TEXT; 228 break; 229 } 230 231 /* 232 * Get precision for %s -- if have a byte count, don't 233 * need it. 234 */ 235 if (fu->bcnt) { 236 sokay = USEBCNT; 237 /* Skip to conversion character. */ 238 for (++p1; strchr(spec, *p1); ++p1); 239 } else { 240 /* Skip any special chars, field width. */ 241 while (strchr(spec + 1, *++p1)); 242 if (*p1 == '.' && isdigit(*++p1)) { 243 sokay = USEPREC; 244 prec = atoi(p1); 245 while (isdigit(*++p1)); 246 } else 247 sokay = NOTOKAY; 248 } 249 250 p2 = p1 + 1; /* Set end pointer. */ 251 cs[0] = *p1; /* Set conversion string. */ 252 cs[1] = '\0'; 253 254 /* 255 * Figure out the byte count for each conversion; 256 * rewrite the format as necessary, set up blank- 257 * padding for end of data. 258 */ 259 switch(cs[0]) { 260 case 'c': 261 pr->flags = F_CHAR; 262 switch(fu->bcnt) { 263 case 0: case 1: 264 pr->bcnt = 1; 265 break; 266 default: 267 p1[1] = '\0'; 268 badcnt(p1); 269 } 270 break; 271 case 'd': case 'i': 272 pr->flags = F_INT; 273 goto isint; 274 case 'o': case 'u': case 'x': case 'X': 275 pr->flags = F_UINT; 276 isint: cs[2] = '\0'; 277 cs[1] = cs[0]; 278 cs[0] = 'q'; 279 switch(fu->bcnt) { 280 case 0: 281 pr->bcnt = 4; 282 break; 283 case 1: 284 case 2: 285 case 4: 286 case 8: 287 pr->bcnt = fu->bcnt; 288 break; 289 default: 290 p1[1] = '\0'; 291 badcnt(p1); 292 } 293 break; 294 case 'e': case 'E': case 'f': case 'g': case 'G': 295 pr->flags = F_DBL; 296 switch(fu->bcnt) { 297 case 0: case 8: 298 pr->bcnt = 8; 299 break; 300 case 4: 301 pr->bcnt = 4; 302 break; 303 default: 304 if (fu->bcnt == sizeof(long double)) { 305 cs[2] = '\0'; 306 cs[1] = cs[0]; 307 cs[0] = 'L'; 308 pr->bcnt = sizeof(long double); 309 } else { 310 p1[1] = '\0'; 311 badcnt(p1); 312 } 313 } 314 break; 315 case 's': 316 pr->flags = F_STR; 317 switch(sokay) { 318 case NOTOKAY: 319 badsfmt(); 320 case USEBCNT: 321 pr->bcnt = fu->bcnt; 322 break; 323 case USEPREC: 324 pr->bcnt = prec; 325 break; 326 } 327 break; 328 case '_': 329 ++p2; 330 switch(p1[1]) { 331 case 'A': 332 endfu = fu; 333 fu->flags |= F_IGNORE; 334 /* FALLTHROUGH */ 335 case 'a': 336 pr->flags = F_ADDRESS; 337 ++p2; 338 switch(p1[2]) { 339 case 'd': case 'o': case'x': 340 cs[0] = 'q'; 341 cs[1] = p1[2]; 342 cs[2] = '\0'; 343 break; 344 default: 345 p1[3] = '\0'; 346 badconv(p1); 347 } 348 break; 349 case 'c': 350 pr->flags = F_C; 351 /* cs[0] = 'c'; set in conv_c */ 352 goto isint2; 353 case 'p': 354 pr->flags = F_P; 355 cs[0] = 'c'; 356 goto isint2; 357 case 'u': 358 pr->flags = F_U; 359 /* cs[0] = 'c'; set in conv_u */ 360 isint2: switch(fu->bcnt) { 361 case 0: case 1: 362 pr->bcnt = 1; 363 break; 364 default: 365 p1[2] = '\0'; 366 badcnt(p1); 367 } 368 break; 369 default: 370 p1[2] = '\0'; 371 badconv(p1); 372 } 373 break; 374 default: 375 p1[1] = '\0'; 376 badconv(p1); 377 } 378 379 /* 380 * Copy to PR format string, set conversion character 381 * pointer, update original. 382 */ 383 savech = *p2; 384 p1[0] = '\0'; 385 if ((pr->fmt = calloc(1, strlen(fmtp) + 2)) == NULL) 386 err(1, NULL); 387 (void)strcpy(pr->fmt, fmtp); 388 (void)strcat(pr->fmt, cs); 389 *p2 = savech; 390 pr->cchar = pr->fmt + (p1 - fmtp); 391 fmtp = p2; 392 393 /* Only one conversion character if byte count. */ 394 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) { 395 errx(1, "byte count with multiple " 396 "conversion characters"); 397 } 398 } 399 /* 400 * If format unit byte count not specified, figure it out 401 * so can adjust rep count later. 402 */ 403 if (!fu->bcnt) 404 for (pr = fu->nextpr; pr; pr = pr->nextpr) 405 fu->bcnt += pr->bcnt; 406 } 407 /* 408 * If the format string interprets any data at all, and it's 409 * not the same as the blocksize, and its last format unit 410 * interprets any data at all, and has no iteration count, 411 * repeat it as necessary. 412 * 413 * If, rep count is greater than 1, no trailing whitespace 414 * gets output from the last iteration of the format unit. 415 */ 416 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 417 if (!fu->nextfu && fs->bcnt < blocksize && 418 !(fu->flags&F_SETREP) && fu->bcnt) 419 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 420 if (fu->reps > 1) { 421 for (pr = fu->nextpr;; pr = pr->nextpr) 422 if (!pr->nextpr) 423 break; 424 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 425 p2 = isspace(*p1) ? p1 : NULL; 426 if (p2) 427 pr->nospace = p2; 428 } 429 } 430 #ifdef DEBUG 431 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 432 (void)printf("fmt:"); 433 for (pr = fu->nextpr; pr; pr = pr->nextpr) 434 (void)printf(" {%s}", pr->fmt); 435 (void)printf("\n"); 436 } 437 #endif 438 } 439 440 void 441 escape(char *p1) 442 { 443 char *p2; 444 445 /* alphabetic escape sequences have to be done in place */ 446 for (p2 = p1;; ++p1, ++p2) { 447 if (!*p1) { 448 *p2 = *p1; 449 break; 450 } 451 if (*p1 == '\\') 452 switch(*++p1) { 453 case 'a': 454 /* *p2 = '\a'; */ 455 *p2 = '\007'; 456 break; 457 case 'b': 458 *p2 = '\b'; 459 break; 460 case 'f': 461 *p2 = '\f'; 462 break; 463 case 'n': 464 *p2 = '\n'; 465 break; 466 case 'r': 467 *p2 = '\r'; 468 break; 469 case 't': 470 *p2 = '\t'; 471 break; 472 case 'v': 473 *p2 = '\v'; 474 break; 475 default: 476 *p2 = *p1; 477 break; 478 } 479 } 480 } 481 482 void 483 badcnt(char *s) 484 { 485 errx(1, "%s: bad byte count", s); 486 } 487 488 void 489 badsfmt(void) 490 { 491 errx(1, "%%s: requires a precision or a byte count"); 492 } 493 494 void 495 badfmt(const char *fmt) 496 { 497 errx(1, "\"%s\": bad format", fmt); 498 } 499 500 void 501 badconv(char *ch) 502 { 503 errx(1, "%%%s: bad conversion character", ch); 504 } 505