1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)parse.c 8.1 (Berkeley) 6/6/93 30 * $FreeBSD: src/usr.bin/hexdump/parse.c,v 1.4.2.1 2002/07/23 14:27:06 tjr Exp $ 31 * $DragonFly: src/usr.bin/hexdump/parse.c,v 1.6 2005/04/10 20:55:38 drhodus Exp $ 32 */ 33 34 #include <sys/types.h> 35 36 #include <err.h> 37 #include <fcntl.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <ctype.h> 41 #include <string.h> 42 #include "hexdump.h" 43 44 FU *endfu; /* format at end-of-data */ 45 46 void 47 addfile(char *name) 48 { 49 unsigned char *p; 50 FILE *fp; 51 int ch; 52 char buf[2048 + 1]; 53 54 if ((fp = fopen(name, "r")) == NULL) 55 err(1, "%s", name); 56 while (fgets(buf, sizeof(buf), fp)) { 57 if (!(p = strchr(buf, '\n'))) { 58 warnx("line too long"); 59 while ((ch = getchar()) != '\n' && ch != EOF); 60 continue; 61 } 62 *p = '\0'; 63 for (p = buf; *p && isspace(*p); ++p); 64 if (!*p || *p == '#') 65 continue; 66 add(p); 67 } 68 (void)fclose(fp); 69 } 70 71 void 72 add(const char *fmt) 73 { 74 unsigned const char *p, *savep; 75 static FS **nextfs; 76 FS *tfs; 77 FU *tfu, **nextfu; 78 79 /* start new linked list of format units */ 80 if ((tfs = calloc(1, sizeof(FS))) == NULL) 81 err(1, NULL); 82 if (!fshead) 83 fshead = tfs; 84 else 85 *nextfs = tfs; 86 nextfs = &tfs->nextfs; 87 nextfu = &tfs->nextfu; 88 89 /* take the format string and break it up into format units */ 90 for (p = fmt;;) { 91 /* skip leading white space */ 92 for (; isspace(*p); ++p); 93 if (!*p) 94 break; 95 96 /* allocate a new format unit and link it in */ 97 if ((tfu = calloc(1, sizeof(FU))) == NULL) 98 err(1, NULL); 99 *nextfu = tfu; 100 nextfu = &tfu->nextfu; 101 tfu->reps = 1; 102 103 /* if leading digit, repetition count */ 104 if (isdigit(*p)) { 105 for (savep = p; isdigit(*p); ++p); 106 if (!isspace(*p) && *p != '/') 107 badfmt(fmt); 108 /* may overwrite either white space or slash */ 109 tfu->reps = atoi(savep); 110 tfu->flags = F_SETREP; 111 /* skip trailing white space */ 112 for (++p; isspace(*p); ++p); 113 } 114 115 /* skip slash and trailing white space */ 116 if (*p == '/') 117 while (isspace(*++p)); 118 119 /* byte count */ 120 if (isdigit(*p)) { 121 for (savep = p; isdigit(*p); ++p); 122 if (!isspace(*p)) 123 badfmt(fmt); 124 tfu->bcnt = atoi(savep); 125 /* skip trailing white space */ 126 for (++p; isspace(*p); ++p); 127 } 128 129 /* format */ 130 if (*p != '"') 131 badfmt(fmt); 132 for (savep = ++p; *p != '"';) 133 if (*p++ == 0) 134 badfmt(fmt); 135 if (!(tfu->fmt = malloc(p - savep + 1))) 136 err(1, NULL); 137 (void) strncpy(tfu->fmt, savep, p - savep); 138 tfu->fmt[p - savep] = '\0'; 139 escape(tfu->fmt); 140 p++; 141 } 142 } 143 144 static const char *spec = ".#-+ 0123456789"; 145 146 int 147 size(FS *fs) 148 { 149 FU *fu; 150 int bcnt, cursize; 151 unsigned char *fmt; 152 int prec; 153 154 /* figure out the data block size needed for each format unit */ 155 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 156 if (fu->bcnt) { 157 cursize += fu->bcnt * fu->reps; 158 continue; 159 } 160 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 161 if (*fmt != '%') 162 continue; 163 /* 164 * skip any special chars -- save precision in 165 * case it's a %s format. 166 */ 167 while (strchr(spec + 1, *++fmt)); 168 if (*fmt == '.' && isdigit(*++fmt)) { 169 prec = atoi(fmt); 170 while (isdigit(*++fmt)); 171 } 172 switch(*fmt) { 173 case 'c': 174 bcnt += 1; 175 break; 176 case 'd': case 'i': case 'o': case 'u': 177 case 'x': case 'X': 178 bcnt += 4; 179 break; 180 case 'e': case 'E': case 'f': case 'g': case 'G': 181 bcnt += 8; 182 break; 183 case 's': 184 bcnt += prec; 185 break; 186 case '_': 187 switch(*++fmt) { 188 case 'c': case 'p': case 'u': 189 bcnt += 1; 190 break; 191 } 192 } 193 } 194 cursize += bcnt * fu->reps; 195 } 196 return (cursize); 197 } 198 199 void 200 rewrite(FS *fs) 201 { 202 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 203 PR *pr, **nextpr = NULL; 204 FU *fu; 205 unsigned char *p1, *p2, *fmtp; 206 char savech, cs[3]; 207 int nconv, prec = 0; 208 209 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 210 /* 211 * Break each format unit into print units; each conversion 212 * character gets its own. 213 */ 214 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 215 if ((pr = calloc(1, sizeof(PR))) == NULL) 216 err(1, NULL); 217 if (!fu->nextpr) 218 fu->nextpr = pr; 219 else 220 *nextpr = pr; 221 222 /* Skip preceding text and up to the next % sign. */ 223 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 224 225 /* Only text in the string. */ 226 if (!*p1) { 227 pr->fmt = fmtp; 228 pr->flags = F_TEXT; 229 break; 230 } 231 232 /* 233 * Get precision for %s -- if have a byte count, don't 234 * need it. 235 */ 236 if (fu->bcnt) { 237 sokay = USEBCNT; 238 /* Skip to conversion character. */ 239 for (++p1; strchr(spec, *p1); ++p1); 240 } else { 241 /* Skip any special chars, field width. */ 242 while (strchr(spec + 1, *++p1)); 243 if (*p1 == '.' && isdigit(*++p1)) { 244 sokay = USEPREC; 245 prec = atoi(p1); 246 while (isdigit(*++p1)); 247 } else 248 sokay = NOTOKAY; 249 } 250 251 p2 = p1 + 1; /* Set end pointer. */ 252 cs[0] = *p1; /* Set conversion string. */ 253 cs[1] = '\0'; 254 255 /* 256 * Figure out the byte count for each conversion; 257 * rewrite the format as necessary, set up blank- 258 * padding for end of data. 259 */ 260 switch(cs[0]) { 261 case 'c': 262 pr->flags = F_CHAR; 263 switch(fu->bcnt) { 264 case 0: case 1: 265 pr->bcnt = 1; 266 break; 267 default: 268 p1[1] = '\0'; 269 badcnt(p1); 270 } 271 break; 272 case 'd': case 'i': 273 pr->flags = F_INT; 274 goto isint; 275 case 'o': case 'u': case 'x': case 'X': 276 pr->flags = F_UINT; 277 isint: cs[2] = '\0'; 278 cs[1] = cs[0]; 279 cs[0] = 'q'; 280 switch(fu->bcnt) { 281 case 0: case 4: 282 pr->bcnt = 4; 283 break; 284 case 1: 285 pr->bcnt = 1; 286 break; 287 case 2: 288 pr->bcnt = 2; 289 break; 290 default: 291 p1[1] = '\0'; 292 badcnt(p1); 293 } 294 break; 295 case 'e': case 'E': case 'f': case 'g': case 'G': 296 pr->flags = F_DBL; 297 switch(fu->bcnt) { 298 case 0: case 8: 299 pr->bcnt = 8; 300 break; 301 case 4: 302 pr->bcnt = 4; 303 break; 304 default: 305 if (fu->bcnt == sizeof(long double)) { 306 cs[2] = '\0'; 307 cs[1] = cs[0]; 308 cs[0] = 'L'; 309 pr->bcnt = sizeof(long double); 310 } else { 311 p1[1] = '\0'; 312 badcnt(p1); 313 } 314 } 315 break; 316 case 's': 317 pr->flags = F_STR; 318 switch(sokay) { 319 case NOTOKAY: 320 badsfmt(); 321 case USEBCNT: 322 pr->bcnt = fu->bcnt; 323 break; 324 case USEPREC: 325 pr->bcnt = prec; 326 break; 327 } 328 break; 329 case '_': 330 ++p2; 331 switch(p1[1]) { 332 case 'A': 333 endfu = fu; 334 fu->flags |= F_IGNORE; 335 /* FALLTHROUGH */ 336 case 'a': 337 pr->flags = F_ADDRESS; 338 ++p2; 339 switch(p1[2]) { 340 case 'd': case 'o': case'x': 341 cs[0] = 'q'; 342 cs[1] = p1[2]; 343 cs[2] = '\0'; 344 break; 345 default: 346 p1[3] = '\0'; 347 badconv(p1); 348 } 349 break; 350 case 'c': 351 pr->flags = F_C; 352 /* cs[0] = 'c'; set in conv_c */ 353 goto isint2; 354 case 'p': 355 pr->flags = F_P; 356 cs[0] = 'c'; 357 goto isint2; 358 case 'u': 359 pr->flags = F_U; 360 /* cs[0] = 'c'; set in conv_u */ 361 isint2: switch(fu->bcnt) { 362 case 0: case 1: 363 pr->bcnt = 1; 364 break; 365 default: 366 p1[2] = '\0'; 367 badcnt(p1); 368 } 369 break; 370 default: 371 p1[2] = '\0'; 372 badconv(p1); 373 } 374 break; 375 default: 376 p1[1] = '\0'; 377 badconv(p1); 378 } 379 380 /* 381 * Copy to PR format string, set conversion character 382 * pointer, update original. 383 */ 384 savech = *p2; 385 p1[0] = '\0'; 386 if ((pr->fmt = calloc(1, strlen(fmtp) + 2)) == NULL) 387 err(1, NULL); 388 (void)strcpy(pr->fmt, fmtp); 389 (void)strcat(pr->fmt, cs); 390 *p2 = savech; 391 pr->cchar = pr->fmt + (p1 - fmtp); 392 fmtp = p2; 393 394 /* Only one conversion character if byte count. */ 395 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 396 errx(1, "byte count with multiple conversion characters"); 397 } 398 /* 399 * If format unit byte count not specified, figure it out 400 * so can adjust rep count later. 401 */ 402 if (!fu->bcnt) 403 for (pr = fu->nextpr; pr; pr = pr->nextpr) 404 fu->bcnt += pr->bcnt; 405 } 406 /* 407 * If the format string interprets any data at all, and it's 408 * not the same as the blocksize, and its last format unit 409 * interprets any data at all, and has no iteration count, 410 * repeat it as necessary. 411 * 412 * If, rep count is greater than 1, no trailing whitespace 413 * gets output from the last iteration of the format unit. 414 */ 415 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 416 if (!fu->nextfu && fs->bcnt < blocksize && 417 !(fu->flags&F_SETREP) && fu->bcnt) 418 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 419 if (fu->reps > 1) { 420 for (pr = fu->nextpr;; pr = pr->nextpr) 421 if (!pr->nextpr) 422 break; 423 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 424 p2 = isspace(*p1) ? p1 : NULL; 425 if (p2) 426 pr->nospace = p2; 427 } 428 } 429 #ifdef DEBUG 430 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 431 (void)printf("fmt:"); 432 for (pr = fu->nextpr; pr; pr = pr->nextpr) 433 (void)printf(" {%s}", pr->fmt); 434 (void)printf("\n"); 435 } 436 #endif 437 } 438 439 void 440 escape(char *p1) 441 { 442 char *p2; 443 444 /* alphabetic escape sequences have to be done in place */ 445 for (p2 = p1;; ++p1, ++p2) { 446 if (!*p1) { 447 *p2 = *p1; 448 break; 449 } 450 if (*p1 == '\\') 451 switch(*++p1) { 452 case 'a': 453 /* *p2 = '\a'; */ 454 *p2 = '\007'; 455 break; 456 case 'b': 457 *p2 = '\b'; 458 break; 459 case 'f': 460 *p2 = '\f'; 461 break; 462 case 'n': 463 *p2 = '\n'; 464 break; 465 case 'r': 466 *p2 = '\r'; 467 break; 468 case 't': 469 *p2 = '\t'; 470 break; 471 case 'v': 472 *p2 = '\v'; 473 break; 474 default: 475 *p2 = *p1; 476 break; 477 } 478 } 479 } 480 481 void 482 badcnt(char *s) 483 { 484 errx(1, "%s: bad byte count", s); 485 } 486 487 void 488 badsfmt(void) 489 { 490 errx(1, "%%s: requires a precision or a byte count"); 491 } 492 493 void 494 badfmt(const char *fmt) 495 { 496 errx(1, "\"%s\": bad format", fmt); 497 } 498 499 void 500 badconv(char *ch) 501 { 502 errx(1, "%%%s: bad conversion character", ch); 503 } 504