1 /* $OpenBSD: parse.c,v 1.23 2018/04/26 12:42:51 guenther Exp $ */ 2 /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <ctype.h> 34 #include <err.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #include "hexdump.h" 40 41 FU *endfu; /* format at end-of-data */ 42 43 static __dead void badcnt(char *); 44 static __dead void badconv(char *); 45 static __dead void badfmt(const char *); 46 static __dead void badsfmt(void); 47 static void escape(char *); 48 49 void 50 addfile(char *name) 51 { 52 FILE *fp; 53 size_t len; 54 char *buf, *lbuf, *p; 55 56 if ((fp = fopen(name, "r")) == NULL) 57 err(1, "fopen %s", name); 58 59 lbuf = NULL; 60 while ((buf = fgetln(fp, &len))) { 61 if (buf[len - 1] == '\n') 62 buf[len - 1] = '\0'; 63 else { 64 /* EOF without EOL, copy and add the NUL */ 65 if ((lbuf = malloc(len + 1)) == NULL) 66 err(1, NULL); 67 memcpy(lbuf, buf, len); 68 lbuf[len] = '\0'; 69 buf = lbuf; 70 } 71 for (p = buf; isspace((unsigned char)*p); ++p); 72 if (!*p || *p == '#') 73 continue; 74 add(p); 75 } 76 free(lbuf); 77 (void)fclose(fp); 78 } 79 80 void 81 add(const char *fmt) 82 { 83 const char *p; 84 static FS **nextfs; 85 FS *tfs; 86 FU *tfu, **nextfu; 87 const char *savep; 88 89 /* start new linked list of format units */ 90 if ((tfs = calloc(1, sizeof(FS))) == NULL) 91 err(1, NULL); 92 if (!fshead) 93 fshead = tfs; 94 else 95 *nextfs = tfs; 96 nextfs = &tfs->nextfs; 97 nextfu = &tfs->nextfu; 98 99 /* take the format string and break it up into format units */ 100 for (p = fmt;;) { 101 /* skip leading white space */ 102 for (; isspace((unsigned char)*p); ++p); 103 if (!*p) 104 break; 105 106 /* allocate a new format unit and link it in */ 107 if ((tfu = calloc(1, sizeof(FU))) == NULL) 108 err(1, NULL); 109 *nextfu = tfu; 110 nextfu = &tfu->nextfu; 111 tfu->reps = 1; 112 113 /* if leading digit, repetition count */ 114 if (isdigit((unsigned char)*p)) { 115 for (savep = p; isdigit((unsigned char)*p); ++p); 116 if (!isspace((unsigned char)*p) && *p != '/') 117 badfmt(fmt); 118 /* may overwrite either white space or slash */ 119 tfu->reps = atoi(savep); 120 tfu->flags = F_SETREP; 121 /* skip trailing white space */ 122 for (++p; isspace((unsigned char)*p); ++p); 123 } 124 125 /* skip slash and trailing white space */ 126 if (*p == '/') 127 while (isspace((unsigned char)*++p)); 128 129 /* byte count */ 130 if (isdigit((unsigned char)*p)) { 131 for (savep = p; isdigit((unsigned char)*p); ++p); 132 if (!isspace((unsigned char)*p)) 133 badfmt(fmt); 134 tfu->bcnt = atoi(savep); 135 /* skip trailing white space */ 136 for (++p; isspace((unsigned char)*p); ++p); 137 } 138 139 /* format */ 140 if (*p != '"') 141 badfmt(fmt); 142 for (savep = ++p; *p != '"';) 143 if (*p++ == 0) 144 badfmt(fmt); 145 if ((tfu->fmt = strndup(savep, p - savep)) == NULL) 146 err(1, NULL); 147 escape(tfu->fmt); 148 p++; 149 } 150 } 151 152 static const char *spec = ".#-+ 0123456789"; 153 154 int 155 size(FS *fs) 156 { 157 FU *fu; 158 int bcnt, cursize; 159 char *fmt; 160 int prec; 161 162 /* figure out the data block size needed for each format unit */ 163 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 164 if (fu->bcnt) { 165 cursize += fu->bcnt * fu->reps; 166 continue; 167 } 168 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 169 if (*fmt != '%') 170 continue; 171 /* 172 * skip any special chars -- save precision in 173 * case it's a %s format. 174 */ 175 while (*++fmt && strchr(spec + 1, *fmt)); 176 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) { 177 prec = atoi(fmt); 178 while (isdigit((unsigned char)*++fmt)); 179 } 180 switch(*fmt) { 181 case 'c': 182 bcnt += 1; 183 break; 184 case 'd': case 'i': case 'o': case 'u': 185 case 'x': case 'X': 186 bcnt += 4; 187 break; 188 case 'e': case 'E': case 'f': case 'g': case 'G': 189 bcnt += 8; 190 break; 191 case 's': 192 bcnt += prec; 193 break; 194 case '_': 195 switch(*++fmt) { 196 case 'c': case 'p': case 'u': 197 bcnt += 1; 198 break; 199 } 200 } 201 } 202 cursize += bcnt * fu->reps; 203 } 204 return (cursize); 205 } 206 207 void 208 rewrite(FS *fs) 209 { 210 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 211 PR *pr, **nextpr; 212 FU *fu; 213 char *p1, *p2; 214 char savech, *fmtp, cs[4]; 215 int nconv, prec; 216 217 nextpr = NULL; 218 prec = 0; 219 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 220 /* 221 * Break each format unit into print units; each conversion 222 * character gets its own. 223 */ 224 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 225 if ((pr = calloc(1, sizeof(PR))) == NULL) 226 err(1, NULL); 227 if (!fu->nextpr) 228 fu->nextpr = pr; 229 else 230 *nextpr = pr; 231 232 /* Skip preceding text and up to the next % sign. */ 233 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 234 235 /* Only text in the string. */ 236 if (!*p1) { 237 pr->fmt = fmtp; 238 pr->flags = F_TEXT; 239 break; 240 } 241 242 /* 243 * Get precision for %s -- if have a byte count, don't 244 * need it. 245 */ 246 if (fu->bcnt) { 247 sokay = USEBCNT; 248 /* Skip to conversion character. */ 249 for (++p1; *p1 && strchr(spec, *p1); ++p1); 250 } else { 251 /* Skip any special chars, field width. */ 252 while (*++p1 && strchr(spec + 1, *p1)); 253 if (*p1 == '.' && 254 isdigit((unsigned char)*++p1)) { 255 sokay = USEPREC; 256 prec = atoi(p1); 257 while (isdigit((unsigned char)*++p1)) 258 continue; 259 } else 260 sokay = NOTOKAY; 261 } 262 263 p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */ 264 cs[0] = *p1; /* Set conversion string. */ 265 cs[1] = '\0'; 266 267 /* 268 * Figure out the byte count for each conversion; 269 * rewrite the format as necessary, set up blank- 270 * padding for end of data. 271 */ 272 switch(cs[0]) { 273 case 'c': 274 pr->flags = F_CHAR; 275 switch(fu->bcnt) { 276 case 0: case 1: 277 pr->bcnt = 1; 278 break; 279 default: 280 p1[1] = '\0'; 281 badcnt(p1); 282 } 283 break; 284 case 'd': case 'i': 285 case 'o': case 'u': case 'x': case 'X': 286 if (cs[0] == 'd' || cs[0] == 'i') 287 pr->flags = F_INT; 288 else 289 pr->flags = F_UINT; 290 291 cs[3] = '\0'; 292 cs[2] = cs[0]; 293 cs[1] = 'l'; 294 cs[0] = 'l'; 295 switch(fu->bcnt) { 296 case 0: case 4: 297 pr->bcnt = 4; 298 break; 299 case 1: 300 pr->bcnt = 1; 301 break; 302 case 2: 303 pr->bcnt = 2; 304 break; 305 case 8: 306 pr->bcnt = 8; 307 break; 308 default: 309 p1[1] = '\0'; 310 badcnt(p1); 311 } 312 break; 313 case 'e': case 'E': case 'f': case 'g': case 'G': 314 pr->flags = F_DBL; 315 switch(fu->bcnt) { 316 case 0: case 8: 317 pr->bcnt = 8; 318 break; 319 case 4: 320 pr->bcnt = 4; 321 break; 322 default: 323 p1[1] = '\0'; 324 badcnt(p1); 325 } 326 break; 327 case 's': 328 pr->flags = F_STR; 329 switch(sokay) { 330 case NOTOKAY: 331 badsfmt(); 332 case USEBCNT: 333 pr->bcnt = fu->bcnt; 334 break; 335 case USEPREC: 336 pr->bcnt = prec; 337 break; 338 } 339 break; 340 case '_': 341 ++p2; 342 switch(p1[1]) { 343 case 'A': 344 endfu = fu; 345 fu->flags |= F_IGNORE; 346 /* FALLTHROUGH */ 347 case 'a': 348 pr->flags = F_ADDRESS; 349 ++p2; 350 switch(p1[2]) { 351 case 'd': case 'o': case'x': 352 cs[0] = 'l'; 353 cs[1] = 'l'; 354 cs[2] = p1[2]; 355 cs[3] = '\0'; 356 break; 357 default: 358 if (p1[2]) 359 p1[3] = '\0'; 360 badconv(p1); 361 } 362 break; 363 case 'c': 364 case 'p': 365 case 'u': 366 if (p1[1] == 'c') { 367 pr->flags = F_C; 368 /* cs[0] = 'c'; set in conv_c */ 369 } else if (p1[1] == 'p') { 370 pr->flags = F_P; 371 cs[0] = 'c'; 372 } else { 373 pr->flags = F_U; 374 /* cs[0] = 'c'; set in conv_u */ 375 } 376 377 switch(fu->bcnt) { 378 case 0: case 1: 379 pr->bcnt = 1; 380 break; 381 default: 382 p1[2] = '\0'; 383 badcnt(p1); 384 } 385 break; 386 default: 387 if (p1[1]) 388 p1[2] = '\0'; 389 badconv(p1); 390 } 391 break; 392 default: 393 if (cs[0]) 394 p1[1] = '\0'; 395 badconv(p1); 396 } 397 398 /* 399 * Copy to PR format string, set conversion character 400 * pointer, update original. 401 */ 402 savech = *p2; 403 p1[0] = '\0'; 404 if (asprintf(&pr->fmt, "%s%s", fmtp, cs) == -1) 405 err(1, NULL); 406 *p2 = savech; 407 pr->cchar = pr->fmt + (p1 - fmtp); 408 fmtp = p2; 409 410 /* Only one conversion character if byte count. */ 411 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 412 errx(1, 413 "byte count with multiple conversion characters"); 414 } 415 /* 416 * If format unit byte count not specified, figure it out 417 * so can adjust rep count later. 418 */ 419 if (!fu->bcnt) 420 for (pr = fu->nextpr; pr; pr = pr->nextpr) 421 fu->bcnt += pr->bcnt; 422 } 423 /* 424 * If the format string interprets any data at all, and it's 425 * not the same as the blocksize, and its last format unit 426 * interprets any data at all, and has no iteration count, 427 * repeat it as necessary. 428 * 429 * If, rep count is greater than 1, no trailing whitespace 430 * gets output from the last iteration of the format unit. 431 */ 432 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 433 if (!fu->nextfu && fs->bcnt < blocksize && 434 !(fu->flags&F_SETREP) && fu->bcnt) 435 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 436 if (fu->reps > 1) { 437 if (!fu->nextpr) 438 break; 439 for (pr = fu->nextpr;; pr = pr->nextpr) 440 if (!pr->nextpr) 441 break; 442 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 443 p2 = isspace((unsigned char)*p1) ? p1 : NULL; 444 if (p2) 445 pr->nospace = p2; 446 } 447 } 448 #ifdef DEBUG 449 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 450 (void)printf("fmt:"); 451 for (pr = fu->nextpr; pr; pr = pr->nextpr) 452 (void)printf(" {%s}", pr->fmt); 453 (void)printf("\n"); 454 } 455 #endif 456 } 457 458 static void 459 escape(char *p1) 460 { 461 char *p2; 462 463 /* alphabetic escape sequences have to be done in place */ 464 for (p2 = p1;; ++p1, ++p2) { 465 if (!*p1) { 466 *p2 = *p1; 467 break; 468 } 469 if (*p1 == '\\') { 470 switch(*++p1) { 471 case '\0': 472 *p2++ = '\\'; 473 *p2 = '\0'; 474 return; /* incomplete escape sequence */ 475 case 'a': 476 /* *p2 = '\a'; */ 477 *p2 = '\007'; 478 break; 479 case 'b': 480 *p2 = '\b'; 481 break; 482 case 'f': 483 *p2 = '\f'; 484 break; 485 case 'n': 486 *p2 = '\n'; 487 break; 488 case 'r': 489 *p2 = '\r'; 490 break; 491 case 't': 492 *p2 = '\t'; 493 break; 494 case 'v': 495 *p2 = '\v'; 496 break; 497 default: 498 *p2 = *p1; 499 break; 500 } 501 } else 502 *p2 = *p1; 503 } 504 } 505 506 static __dead void 507 badcnt(char *s) 508 { 509 errx(1, "%s: bad byte count", s); 510 } 511 512 static __dead void 513 badsfmt(void) 514 { 515 errx(1, "%%s: requires a precision or a byte count"); 516 } 517 518 static __dead void 519 badfmt(const char *fmt) 520 { 521 errx(1, "\"%s\": bad format", fmt); 522 } 523 524 static __dead void 525 badconv(char *ch) 526 { 527 errx(1, "%%%s: bad conversion character", ch); 528 } 529