1 /* $OpenBSD: vfscanf.c,v 1.22 2009/04/05 19:29:28 martynas Exp $ */ 2 /*- 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Chris Torek. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <ctype.h> 35 #include <inttypes.h> 36 #include <stdarg.h> 37 #include <stddef.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include "local.h" 41 42 #ifdef FLOATING_POINT 43 #include "floatio.h" 44 #endif 45 46 #define BUF 513 /* Maximum length of numeric string. */ 47 48 /* 49 * Flags used during conversion. 50 */ 51 #define LONG 0x00001 /* l: long or double */ 52 #define LONGDBL 0x00002 /* L: long double; unimplemented */ 53 #define SHORT 0x00004 /* h: short */ 54 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */ 55 #define LLONG 0x00010 /* ll: long long (+ deprecated q: quad) */ 56 #define POINTER 0x00020 /* p: void * (as hex) */ 57 #define SIZEINT 0x00040 /* z: (signed) size_t */ 58 #define MAXINT 0x00080 /* j: intmax_t */ 59 #define PTRINT 0x00100 /* t: ptrdiff_t */ 60 #define NOSKIP 0x00200 /* [ or c: do not skip blanks */ 61 #define SUPPRESS 0x00400 /* *: suppress assignment */ 62 #define UNSIGNED 0x00800 /* %[oupxX] conversions */ 63 64 /* 65 * The following are used in numeric conversions only: 66 * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point; 67 * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral. 68 */ 69 #define SIGNOK 0x01000 /* +/- is (still) legal */ 70 #define HAVESIGN 0x02000 /* sign detected */ 71 #define NDIGITS 0x04000 /* no digits detected */ 72 73 #define DPTOK 0x08000 /* (float) decimal point is still legal */ 74 #define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */ 75 76 #define PFXOK 0x08000 /* 0x prefix is (still) legal */ 77 #define NZDIGITS 0x10000 /* no zero digits detected */ 78 79 /* 80 * Conversion types. 81 */ 82 #define CT_CHAR 0 /* %c conversion */ 83 #define CT_CCL 1 /* %[...] conversion */ 84 #define CT_STRING 2 /* %s conversion */ 85 #define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */ 86 #define CT_FLOAT 4 /* floating, i.e., strtod */ 87 88 #define u_char unsigned char 89 #define u_long unsigned long 90 91 static u_char *__sccl(char *, u_char *); 92 93 #if !defined(VFSCANF) 94 #define VFSCANF vfscanf 95 #endif 96 97 /* 98 * vfscanf 99 */ 100 int 101 VFSCANF(FILE *fp, const char *fmt0, __va_list ap) 102 { 103 u_char *fmt = (u_char *)fmt0; 104 int c; /* character from format, or conversion */ 105 size_t width; /* field width, or 0 */ 106 char *p; /* points into all kinds of strings */ 107 int n; /* handy integer */ 108 int flags; /* flags as defined above */ 109 char *p0; /* saves original value of p when necessary */ 110 int nassigned; /* number of fields assigned */ 111 int nread; /* number of characters consumed from fp */ 112 int base; /* base argument to strtoimax/strtouimax */ 113 char ccltab[256]; /* character class table for %[...] */ 114 char buf[BUF]; /* buffer for numeric conversions */ 115 116 /* `basefix' is used to avoid `if' tests in the integer scanner */ 117 static short basefix[17] = 118 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 119 120 _SET_ORIENTATION(fp, -1); 121 122 nassigned = 0; 123 nread = 0; 124 base = 0; /* XXX just to keep gcc happy */ 125 for (;;) { 126 c = *fmt++; 127 if (c == 0) 128 return (nassigned); 129 if (isspace(c)) { 130 while ((fp->_r > 0 || __srefill(fp) == 0) && 131 isspace(*fp->_p)) 132 nread++, fp->_r--, fp->_p++; 133 continue; 134 } 135 if (c != '%') 136 goto literal; 137 width = 0; 138 flags = 0; 139 /* 140 * switch on the format. continue if done; 141 * break once format type is derived. 142 */ 143 again: c = *fmt++; 144 switch (c) { 145 case '%': 146 literal: 147 if (fp->_r <= 0 && __srefill(fp)) 148 goto input_failure; 149 if (*fp->_p != c) 150 goto match_failure; 151 fp->_r--, fp->_p++; 152 nread++; 153 continue; 154 155 case '*': 156 flags |= SUPPRESS; 157 goto again; 158 case 'j': 159 flags |= MAXINT; 160 goto again; 161 case 'L': 162 flags |= LONGDBL; 163 goto again; 164 case 'h': 165 if (*fmt == 'h') { 166 fmt++; 167 flags |= SHORTSHORT; 168 } else { 169 flags |= SHORT; 170 } 171 goto again; 172 case 'l': 173 if (*fmt == 'l') { 174 fmt++; 175 flags |= LLONG; 176 } else { 177 flags |= LONG; 178 } 179 goto again; 180 case 'q': 181 flags |= LLONG; /* deprecated */ 182 goto again; 183 case 't': 184 flags |= PTRINT; 185 goto again; 186 case 'z': 187 flags |= SIZEINT; 188 goto again; 189 190 case '0': case '1': case '2': case '3': case '4': 191 case '5': case '6': case '7': case '8': case '9': 192 width = width * 10 + c - '0'; 193 goto again; 194 195 /* 196 * Conversions. 197 * Those marked `compat' are for 4.[123]BSD compatibility. 198 * 199 * (According to ANSI, E and X formats are supposed 200 * to the same as e and x. Sorry about that.) 201 */ 202 case 'D': /* compat */ 203 flags |= LONG; 204 /* FALLTHROUGH */ 205 case 'd': 206 c = CT_INT; 207 base = 10; 208 break; 209 210 case 'i': 211 c = CT_INT; 212 base = 0; 213 break; 214 215 case 'O': /* compat */ 216 flags |= LONG; 217 /* FALLTHROUGH */ 218 case 'o': 219 c = CT_INT; 220 flags |= UNSIGNED; 221 base = 8; 222 break; 223 224 case 'u': 225 c = CT_INT; 226 flags |= UNSIGNED; 227 base = 10; 228 break; 229 230 case 'X': 231 case 'x': 232 flags |= PFXOK; /* enable 0x prefixing */ 233 c = CT_INT; 234 flags |= UNSIGNED; 235 base = 16; 236 break; 237 238 #ifdef FLOATING_POINT 239 case 'E': 240 case 'G': 241 case 'e': 242 case 'f': 243 case 'F': 244 case 'g': 245 c = CT_FLOAT; 246 break; 247 #endif 248 249 case 's': 250 c = CT_STRING; 251 break; 252 253 case '[': 254 fmt = __sccl(ccltab, fmt); 255 flags |= NOSKIP; 256 c = CT_CCL; 257 break; 258 259 case 'c': 260 flags |= NOSKIP; 261 c = CT_CHAR; 262 break; 263 264 case 'p': /* pointer format is like hex */ 265 flags |= POINTER | PFXOK; 266 c = CT_INT; 267 flags |= UNSIGNED; 268 base = 16; 269 break; 270 271 case 'n': 272 if (flags & SUPPRESS) 273 continue; 274 if (flags & SHORTSHORT) 275 *va_arg(ap, __signed char *) = nread; 276 else if (flags & SHORT) 277 *va_arg(ap, short *) = nread; 278 else if (flags & LONG) 279 *va_arg(ap, long *) = nread; 280 else if (flags & SIZEINT) 281 *va_arg(ap, ssize_t *) = nread; 282 else if (flags & PTRINT) 283 *va_arg(ap, ptrdiff_t *) = nread; 284 else if (flags & LLONG) 285 *va_arg(ap, long long *) = nread; 286 else if (flags & MAXINT) 287 *va_arg(ap, intmax_t *) = nread; 288 else 289 *va_arg(ap, int *) = nread; 290 continue; 291 292 /* 293 * Disgusting backwards compatibility hacks. XXX 294 */ 295 case '\0': /* compat */ 296 return (EOF); 297 298 default: /* compat */ 299 if (isupper(c)) 300 flags |= LONG; 301 c = CT_INT; 302 base = 10; 303 break; 304 } 305 306 /* 307 * We have a conversion that requires input. 308 */ 309 if (fp->_r <= 0 && __srefill(fp)) 310 goto input_failure; 311 312 /* 313 * Consume leading white space, except for formats 314 * that suppress this. 315 */ 316 if ((flags & NOSKIP) == 0) { 317 while (isspace(*fp->_p)) { 318 nread++; 319 if (--fp->_r > 0) 320 fp->_p++; 321 else if (__srefill(fp)) 322 goto input_failure; 323 } 324 /* 325 * Note that there is at least one character in 326 * the buffer, so conversions that do not set NOSKIP 327 * ca no longer result in an input failure. 328 */ 329 } 330 331 /* 332 * Do the conversion. 333 */ 334 switch (c) { 335 336 case CT_CHAR: 337 /* scan arbitrary characters (sets NOSKIP) */ 338 if (width == 0) 339 width = 1; 340 if (flags & SUPPRESS) { 341 size_t sum = 0; 342 for (;;) { 343 if ((n = fp->_r) < width) { 344 sum += n; 345 width -= n; 346 fp->_p += n; 347 if (__srefill(fp)) { 348 if (sum == 0) 349 goto input_failure; 350 break; 351 } 352 } else { 353 sum += width; 354 fp->_r -= width; 355 fp->_p += width; 356 break; 357 } 358 } 359 nread += sum; 360 } else { 361 size_t r = fread((void *)va_arg(ap, char *), 1, 362 width, fp); 363 364 if (r == 0) 365 goto input_failure; 366 nread += r; 367 nassigned++; 368 } 369 break; 370 371 case CT_CCL: 372 /* scan a (nonempty) character class (sets NOSKIP) */ 373 if (width == 0) 374 width = (size_t)~0; /* `infinity' */ 375 /* take only those things in the class */ 376 if (flags & SUPPRESS) { 377 n = 0; 378 while (ccltab[*fp->_p]) { 379 n++, fp->_r--, fp->_p++; 380 if (--width == 0) 381 break; 382 if (fp->_r <= 0 && __srefill(fp)) { 383 if (n == 0) 384 goto input_failure; 385 break; 386 } 387 } 388 if (n == 0) 389 goto match_failure; 390 } else { 391 p0 = p = va_arg(ap, char *); 392 while (ccltab[*fp->_p]) { 393 fp->_r--; 394 *p++ = *fp->_p++; 395 if (--width == 0) 396 break; 397 if (fp->_r <= 0 && __srefill(fp)) { 398 if (p == p0) 399 goto input_failure; 400 break; 401 } 402 } 403 n = p - p0; 404 if (n == 0) 405 goto match_failure; 406 *p = '\0'; 407 nassigned++; 408 } 409 nread += n; 410 break; 411 412 case CT_STRING: 413 /* like CCL, but zero-length string OK, & no NOSKIP */ 414 if (width == 0) 415 width = (size_t)~0; 416 if (flags & SUPPRESS) { 417 n = 0; 418 while (!isspace(*fp->_p)) { 419 n++, fp->_r--, fp->_p++; 420 if (--width == 0) 421 break; 422 if (fp->_r <= 0 && __srefill(fp)) 423 break; 424 } 425 nread += n; 426 } else { 427 p0 = p = va_arg(ap, char *); 428 while (!isspace(*fp->_p)) { 429 fp->_r--; 430 *p++ = *fp->_p++; 431 if (--width == 0) 432 break; 433 if (fp->_r <= 0 && __srefill(fp)) 434 break; 435 } 436 *p = '\0'; 437 nread += p - p0; 438 nassigned++; 439 } 440 continue; 441 442 case CT_INT: 443 /* scan an integer as if by strtoimax/strtoumax */ 444 #ifdef hardway 445 if (width == 0 || width > sizeof(buf) - 1) 446 width = sizeof(buf) - 1; 447 #else 448 /* size_t is unsigned, hence this optimisation */ 449 if (--width > sizeof(buf) - 2) 450 width = sizeof(buf) - 2; 451 width++; 452 #endif 453 flags |= SIGNOK | NDIGITS | NZDIGITS; 454 for (p = buf; width; width--) { 455 c = *fp->_p; 456 /* 457 * Switch on the character; `goto ok' 458 * if we accept it as a part of number. 459 */ 460 switch (c) { 461 462 /* 463 * The digit 0 is always legal, but is 464 * special. For %i conversions, if no 465 * digits (zero or nonzero) have been 466 * scanned (only signs), we will have 467 * base==0. In that case, we should set 468 * it to 8 and enable 0x prefixing. 469 * Also, if we have not scanned zero digits 470 * before this, do not turn off prefixing 471 * (someone else will turn it off if we 472 * have scanned any nonzero digits). 473 */ 474 case '0': 475 if (base == 0) { 476 base = 8; 477 flags |= PFXOK; 478 } 479 if (flags & NZDIGITS) 480 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 481 else 482 flags &= ~(SIGNOK|PFXOK|NDIGITS); 483 goto ok; 484 485 /* 1 through 7 always legal */ 486 case '1': case '2': case '3': 487 case '4': case '5': case '6': case '7': 488 base = basefix[base]; 489 flags &= ~(SIGNOK | PFXOK | NDIGITS); 490 goto ok; 491 492 /* digits 8 and 9 ok iff decimal or hex */ 493 case '8': case '9': 494 base = basefix[base]; 495 if (base <= 8) 496 break; /* not legal here */ 497 flags &= ~(SIGNOK | PFXOK | NDIGITS); 498 goto ok; 499 500 /* letters ok iff hex */ 501 case 'A': case 'B': case 'C': 502 case 'D': case 'E': case 'F': 503 case 'a': case 'b': case 'c': 504 case 'd': case 'e': case 'f': 505 /* no need to fix base here */ 506 if (base <= 10) 507 break; /* not legal here */ 508 flags &= ~(SIGNOK | PFXOK | NDIGITS); 509 goto ok; 510 511 /* sign ok only as first character */ 512 case '+': case '-': 513 if (flags & SIGNOK) { 514 flags &= ~SIGNOK; 515 flags |= HAVESIGN; 516 goto ok; 517 } 518 break; 519 520 /* 521 * x ok iff flag still set and 2nd char (or 522 * 3rd char if we have a sign). 523 */ 524 case 'x': case 'X': 525 if ((flags & PFXOK) && p == 526 buf + 1 + !!(flags & HAVESIGN)) { 527 base = 16; /* if %i */ 528 flags &= ~PFXOK; 529 goto ok; 530 } 531 break; 532 } 533 534 /* 535 * If we got here, c is not a legal character 536 * for a number. Stop accumulating digits. 537 */ 538 break; 539 ok: 540 /* 541 * c is legal: store it and look at the next. 542 */ 543 *p++ = c; 544 if (--fp->_r > 0) 545 fp->_p++; 546 else if (__srefill(fp)) 547 break; /* EOF */ 548 } 549 /* 550 * If we had only a sign, it is no good; push 551 * back the sign. If the number ends in `x', 552 * it was [sign] '0' 'x', so push back the x 553 * and treat it as [sign] '0'. 554 */ 555 if (flags & NDIGITS) { 556 if (p > buf) 557 (void) ungetc(*(u_char *)--p, fp); 558 goto match_failure; 559 } 560 c = ((u_char *)p)[-1]; 561 if (c == 'x' || c == 'X') { 562 --p; 563 (void) ungetc(c, fp); 564 } 565 if ((flags & SUPPRESS) == 0) { 566 uintmax_t res; 567 568 *p = '\0'; 569 if (flags & UNSIGNED) 570 res = strtoumax(buf, NULL, base); 571 else 572 res = strtoimax(buf, NULL, base); 573 if (flags & POINTER) 574 *va_arg(ap, void **) = 575 (void *)(uintptr_t)res; 576 else if (flags & MAXINT) 577 *va_arg(ap, intmax_t *) = res; 578 else if (flags & LLONG) 579 *va_arg(ap, long long *) = res; 580 else if (flags & SIZEINT) 581 *va_arg(ap, ssize_t *) = res; 582 else if (flags & PTRINT) 583 *va_arg(ap, ptrdiff_t *) = res; 584 else if (flags & LONG) 585 *va_arg(ap, long *) = res; 586 else if (flags & SHORT) 587 *va_arg(ap, short *) = res; 588 else if (flags & SHORTSHORT) 589 *va_arg(ap, __signed char *) = res; 590 else 591 *va_arg(ap, int *) = res; 592 nassigned++; 593 } 594 nread += p - buf; 595 break; 596 597 #ifdef FLOATING_POINT 598 case CT_FLOAT: 599 /* scan a floating point number as if by strtod */ 600 #ifdef hardway 601 if (width == 0 || width > sizeof(buf) - 1) 602 width = sizeof(buf) - 1; 603 #else 604 /* size_t is unsigned, hence this optimisation */ 605 if (--width > sizeof(buf) - 2) 606 width = sizeof(buf) - 2; 607 width++; 608 #endif 609 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 610 for (p = buf; width; width--) { 611 c = *fp->_p; 612 /* 613 * This code mimicks the integer conversion 614 * code, but is much simpler. 615 */ 616 switch (c) { 617 618 case '0': case '1': case '2': case '3': 619 case '4': case '5': case '6': case '7': 620 case '8': case '9': 621 flags &= ~(SIGNOK | NDIGITS); 622 goto fok; 623 624 case '+': case '-': 625 if (flags & SIGNOK) { 626 flags &= ~SIGNOK; 627 goto fok; 628 } 629 break; 630 case '.': 631 if (flags & DPTOK) { 632 flags &= ~(SIGNOK | DPTOK); 633 goto fok; 634 } 635 break; 636 case 'e': case 'E': 637 /* no exponent without some digits */ 638 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 639 flags = 640 (flags & ~(EXPOK|DPTOK)) | 641 SIGNOK | NDIGITS; 642 goto fok; 643 } 644 break; 645 } 646 break; 647 fok: 648 *p++ = c; 649 if (--fp->_r > 0) 650 fp->_p++; 651 else if (__srefill(fp)) 652 break; /* EOF */ 653 } 654 /* 655 * If no digits, might be missing exponent digits 656 * (just give back the exponent) or might be missing 657 * regular digits, but had sign and/or decimal point. 658 */ 659 if (flags & NDIGITS) { 660 if (flags & EXPOK) { 661 /* no digits at all */ 662 while (p > buf) 663 ungetc(*(u_char *)--p, fp); 664 goto match_failure; 665 } 666 /* just a bad exponent (e and maybe sign) */ 667 c = *(u_char *)--p; 668 if (c != 'e' && c != 'E') { 669 (void) ungetc(c, fp);/* sign */ 670 c = *(u_char *)--p; 671 } 672 (void) ungetc(c, fp); 673 } 674 if ((flags & SUPPRESS) == 0) { 675 double res; 676 677 *p = '\0'; 678 res = strtod(buf, (char **) NULL); 679 if (flags & LONGDBL) 680 *va_arg(ap, long double *) = res; 681 else if (flags & LONG) 682 *va_arg(ap, double *) = res; 683 else 684 *va_arg(ap, float *) = res; 685 nassigned++; 686 } 687 nread += p - buf; 688 break; 689 #endif /* FLOATING_POINT */ 690 } 691 } 692 input_failure: 693 return (nassigned ? nassigned : -1); 694 match_failure: 695 return (nassigned); 696 } 697 698 /* 699 * Fill in the given table from the scanset at the given format 700 * (just after `['). Return a pointer to the character past the 701 * closing `]'. The table has a 1 wherever characters should be 702 * considered part of the scanset. 703 */ 704 static u_char * 705 __sccl(char *tab, u_char *fmt) 706 { 707 int c, n, v; 708 709 /* first `clear' the whole table */ 710 c = *fmt++; /* first char hat => negated scanset */ 711 if (c == '^') { 712 v = 1; /* default => accept */ 713 c = *fmt++; /* get new first char */ 714 } else 715 v = 0; /* default => reject */ 716 /* should probably use memset here */ 717 for (n = 0; n < 256; n++) 718 tab[n] = v; 719 if (c == 0) 720 return (fmt - 1);/* format ended before closing ] */ 721 722 /* 723 * Now set the entries corresponding to the actual scanset 724 * to the opposite of the above. 725 * 726 * The first character may be ']' (or '-') without being special; 727 * the last character may be '-'. 728 */ 729 v = 1 - v; 730 for (;;) { 731 tab[c] = v; /* take character c */ 732 doswitch: 733 n = *fmt++; /* and examine the next */ 734 switch (n) { 735 736 case 0: /* format ended too soon */ 737 return (fmt - 1); 738 739 case '-': 740 /* 741 * A scanset of the form 742 * [01+-] 743 * is defined as `the digit 0, the digit 1, 744 * the character +, the character -', but 745 * the effect of a scanset such as 746 * [a-zA-Z0-9] 747 * is implementation defined. The V7 Unix 748 * scanf treats `a-z' as `the letters a through 749 * z', but treats `a-a' as `the letter a, the 750 * character -, and the letter a'. 751 * 752 * For compatibility, the `-' is not considerd 753 * to define a range if the character following 754 * it is either a close bracket (required by ANSI) 755 * or is not numerically greater than the character 756 * we just stored in the table (c). 757 */ 758 n = *fmt; 759 if (n == ']' || n < c) { 760 c = '-'; 761 break; /* resume the for(;;) */ 762 } 763 fmt++; 764 do { /* fill in the range */ 765 tab[++c] = v; 766 } while (c < n); 767 #if 1 /* XXX another disgusting compatibility hack */ 768 /* 769 * Alas, the V7 Unix scanf also treats formats 770 * such as [a-c-e] as `the letters a through e'. 771 * This too is permitted by the standard.... 772 */ 773 goto doswitch; 774 #else 775 c = *fmt++; 776 if (c == 0) 777 return (fmt - 1); 778 if (c == ']') 779 return (fmt); 780 #endif 781 break; 782 783 case ']': /* end of scanset */ 784 return (fmt); 785 786 default: /* just another character */ 787 c = n; 788 break; 789 } 790 } 791 /* NOTREACHED */ 792 } 793