1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)vfscanf.c 5.6 (Berkeley) 02/24/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <ctype.h> 18 #if __STDC__ 19 #include <stdarg.h> 20 #else 21 #include <varargs.h> 22 #endif 23 #include "local.h" 24 25 #define FLOATING_POINT 26 27 #ifdef FLOATING_POINT 28 #include "floatio.h" 29 #define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */ 30 #else 31 #define BUF 40 32 #endif 33 34 /* 35 * Flags used during conversion. 36 */ 37 #define LONG 0x01 /* l: long or double */ 38 #define LONGDBL 0x02 /* L: long double; unimplemented */ 39 #define SHORT 0x04 /* h: short */ 40 #define SUPPRESS 0x08 /* suppress assignment */ 41 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 42 #define NOSKIP 0x20 /* do not skip blanks */ 43 44 /* 45 * The following are used in numeric conversions only: 46 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 47 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 48 */ 49 #define SIGNOK 0x40 /* +/- is (still) legal */ 50 #define NDIGITS 0x80 /* no digits detected */ 51 52 #define DPTOK 0x100 /* (float) decimal point is still legal */ 53 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 54 55 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 56 #define NZDIGITS 0x200 /* no zero digits detected */ 57 58 /* 59 * Conversion types. 60 */ 61 #define CT_CHAR 0 /* %c conversion */ 62 #define CT_CCL 1 /* %[...] conversion */ 63 #define CT_STRING 2 /* %s conversion */ 64 #define CT_INT 3 /* integer, i.e., strtol or strtoul */ 65 #define CT_FLOAT 4 /* floating, i.e., strtod */ 66 67 #define u_char unsigned char 68 #define u_long unsigned long 69 70 static u_char *__sccl(); 71 72 /* 73 * vfscanf 74 */ 75 __svfscanf(fp, fmt0, ap) 76 register FILE *fp; 77 char const *fmt0; 78 va_list ap; 79 { 80 register u_char *fmt = (u_char *)fmt0; 81 register int c; /* character from format, or conversion */ 82 register size_t width; /* field width, or 0 */ 83 register char *p; /* points into all kinds of strings */ 84 register int n; /* handy integer */ 85 register int flags; /* flags as defined above */ 86 register char *p0; /* saves original value of p when necessary */ 87 int nassigned; /* number of fields assigned */ 88 int nread; /* number of characters consumed from fp */ 89 int base; /* base argument to strtol/strtoul */ 90 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */ 91 char ccltab[256]; /* character class table for %[...] */ 92 char buf[BUF]; /* buffer for numeric conversions */ 93 94 /* `basefix' is used to avoid `if' tests in the integer scanner */ 95 static short basefix[17] = 96 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 97 98 nassigned = 0; 99 nread = 0; 100 base = 0; /* XXX just to keep gcc happy */ 101 ccfn = NULL; /* XXX just to keep gcc happy */ 102 for (;;) { 103 c = *fmt++; 104 if (c == 0) 105 return (nassigned); 106 if (isspace(c)) { 107 for (;;) { 108 if (fp->_r <= 0 && __srefill(fp)) 109 return (nassigned); 110 if (!isspace(*fp->_p)) 111 break; 112 nread++, fp->_r--, fp->_p++; 113 } 114 continue; 115 } 116 if (c != '%') 117 goto literal; 118 width = 0; 119 flags = 0; 120 /* 121 * switch on the format. continue if done; 122 * break once format type is derived. 123 */ 124 again: c = *fmt++; 125 switch (c) { 126 case '%': 127 literal: 128 if (fp->_r <= 0 && __srefill(fp)) 129 goto input_failure; 130 if (*fp->_p != c) 131 goto match_failure; 132 fp->_r--, fp->_p++; 133 nread++; 134 continue; 135 136 case '*': 137 flags |= SUPPRESS; 138 goto again; 139 case 'l': 140 flags |= LONG; 141 goto again; 142 case 'L': 143 flags |= LONGDBL; 144 goto again; 145 case 'h': 146 flags |= SHORT; 147 goto again; 148 149 case '0': case '1': case '2': case '3': case '4': 150 case '5': case '6': case '7': case '8': case '9': 151 width = width * 10 + c - '0'; 152 goto again; 153 154 /* 155 * Conversions. 156 * Those marked `compat' are for 4.[123]BSD compatibility. 157 * 158 * (According to ANSI, E and X formats are supposed 159 * to the same as e and x. Sorry about that.) 160 */ 161 case 'D': /* compat */ 162 flags |= LONG; 163 /* FALLTHROUGH */ 164 case 'd': 165 c = CT_INT; 166 ccfn = (u_long (*)())strtol; 167 base = 10; 168 break; 169 170 case 'i': 171 c = CT_INT; 172 ccfn = (u_long (*)())strtol; 173 base = 0; 174 break; 175 176 case 'O': /* compat */ 177 flags |= LONG; 178 /* FALLTHROUGH */ 179 case 'o': 180 c = CT_INT; 181 ccfn = strtoul; 182 base = 8; 183 break; 184 185 case 'u': 186 c = CT_INT; 187 ccfn = strtoul; 188 base = 10; 189 break; 190 191 case 'X': /* compat XXX */ 192 flags |= LONG; 193 /* FALLTHROUGH */ 194 case 'x': 195 flags |= PFXOK; /* enable 0x prefixing */ 196 c = CT_INT; 197 ccfn = strtoul; 198 base = 16; 199 break; 200 201 #ifdef FLOATING_POINT 202 case 'E': /* compat XXX */ 203 case 'F': /* compat */ 204 flags |= LONG; 205 /* FALLTHROUGH */ 206 case 'e': case 'f': case 'g': 207 c = CT_FLOAT; 208 break; 209 #endif 210 211 case 's': 212 c = CT_STRING; 213 break; 214 215 case '[': 216 fmt = __sccl(ccltab, fmt); 217 flags |= NOSKIP; 218 c = CT_CCL; 219 break; 220 221 case 'c': 222 flags |= NOSKIP; 223 c = CT_CHAR; 224 break; 225 226 case 'p': /* pointer format is like hex */ 227 flags |= POINTER | PFXOK; 228 c = CT_INT; 229 ccfn = strtoul; 230 base = 16; 231 break; 232 233 case 'n': 234 if (flags & SUPPRESS) /* ??? */ 235 continue; 236 if (flags & SHORT) 237 *va_arg(ap, short *) = nread; 238 else if (flags & LONG) 239 *va_arg(ap, long *) = nread; 240 else 241 *va_arg(ap, int *) = nread; 242 continue; 243 244 /* 245 * Disgusting backwards compatibility hacks. XXX 246 */ 247 case '\0': /* compat */ 248 return (EOF); 249 250 default: /* compat */ 251 if (isupper(c)) 252 flags |= LONG; 253 c = CT_INT; 254 ccfn = (u_long (*)())strtol; 255 base = 10; 256 break; 257 } 258 259 /* 260 * We have a conversion that requires input. 261 */ 262 if (fp->_r <= 0 && __srefill(fp)) 263 goto input_failure; 264 265 /* 266 * Consume leading white space, except for formats 267 * that suppress this. 268 */ 269 if ((flags & NOSKIP) == 0) { 270 while (isspace(*fp->_p)) { 271 nread++; 272 if (--fp->_r > 0) 273 fp->_p++; 274 else if (__srefill(fp)) 275 goto input_failure; 276 } 277 /* 278 * Note that there is at least one character in 279 * the buffer, so conversions that do not set NOSKIP 280 * ca no longer result in an input failure. 281 */ 282 } 283 284 /* 285 * Do the conversion. 286 */ 287 switch (c) { 288 289 case CT_CHAR: 290 /* scan arbitrary characters (sets NOSKIP) */ 291 if (width == 0) 292 width = 1; 293 if (flags & SUPPRESS) { 294 size_t sum = 0; 295 for (;;) { 296 if ((n = fp->_r) < width) { 297 sum += n; 298 width -= n; 299 fp->_p += n; 300 if (__srefill(fp)) { 301 if (sum == 0) 302 goto input_failure; 303 break; 304 } 305 } else { 306 sum += width; 307 fp->_r -= width; 308 fp->_p += width; 309 break; 310 } 311 } 312 nread += sum; 313 } else { 314 size_t r = fread((void *)va_arg(ap, char *), 1, 315 width, fp); 316 317 if (r == 0) 318 goto input_failure; 319 nread += r; 320 nassigned++; 321 } 322 break; 323 324 case CT_CCL: 325 /* scan a (nonempty) character class (sets NOSKIP) */ 326 if (width == 0) 327 width = ~0; /* `infinity' */ 328 /* take only those things in the class */ 329 if (flags & SUPPRESS) { 330 n = 0; 331 while (ccltab[*fp->_p]) { 332 n++, fp->_r--, fp->_p++; 333 if (--width == 0) 334 break; 335 if (fp->_r <= 0 && __srefill(fp)) { 336 if (n == 0) 337 goto input_failure; 338 break; 339 } 340 } 341 if (n == 0) 342 goto match_failure; 343 } else { 344 p0 = p = va_arg(ap, char *); 345 while (ccltab[*fp->_p]) { 346 fp->_r--; 347 *p++ = *fp->_p++; 348 if (--width == 0) 349 break; 350 if (fp->_r <= 0 && __srefill(fp)) { 351 if (p == p0) 352 goto input_failure; 353 break; 354 } 355 } 356 n = p - p0; 357 if (n == 0) 358 goto match_failure; 359 *p = 0; 360 nassigned++; 361 } 362 nread += n; 363 break; 364 365 case CT_STRING: 366 /* like CCL, but zero-length string OK, & no NOSKIP */ 367 if (width == 0) 368 width = ~0; 369 if (flags & SUPPRESS) { 370 n = 0; 371 while (!isspace(*fp->_p)) { 372 n++, fp->_r--, fp->_p++; 373 if (--width == 0) 374 break; 375 if (fp->_r <= 0 && __srefill(fp)) 376 break; 377 } 378 nread += n; 379 } else { 380 p0 = p = va_arg(ap, char *); 381 while (!isspace(*fp->_p)) { 382 fp->_r--; 383 *p++ = *fp->_p++; 384 if (--width == 0) 385 break; 386 if (fp->_r <= 0 && __srefill(fp)) 387 break; 388 } 389 *p = 0; 390 nread += p - p0; 391 nassigned++; 392 } 393 continue; 394 395 case CT_INT: 396 /* scan an integer as if by strtol/strtoul */ 397 #ifdef hardway 398 if (width == 0 || width > sizeof(buf) - 1) 399 width = sizeof(buf) - 1; 400 #else 401 /* size_t is unsigned, hence this optimisation */ 402 if (--width > sizeof(buf) - 2) 403 width = sizeof(buf) - 2; 404 width++; 405 #endif 406 flags |= SIGNOK | NDIGITS | NZDIGITS; 407 for (p = buf; width; width--) { 408 c = *fp->_p; 409 /* 410 * Switch on the character; `goto ok' 411 * if we accept it as a part of number. 412 */ 413 switch (c) { 414 415 /* 416 * The digit 0 is always legal, but is 417 * special. For %i conversions, if no 418 * digits (zero or nonzero) have been 419 * scanned (only signs), we will have 420 * base==0. In that case, we should set 421 * it to 8 and enable 0x prefixing. 422 * Also, if we have not scanned zero digits 423 * before this, do not turn off prefixing 424 * (someone else will turn it off if we 425 * have scanned any nonzero digits). 426 */ 427 case '0': 428 if (base == 0) { 429 base = 8; 430 flags |= PFXOK; 431 } 432 if (flags & NZDIGITS) 433 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 434 else 435 flags &= ~(SIGNOK|PFXOK|NDIGITS); 436 goto ok; 437 438 /* 1 through 7 always legal */ 439 case '1': case '2': case '3': 440 case '4': case '5': case '6': case '7': 441 base = basefix[base]; 442 flags &= ~(SIGNOK | PFXOK | NDIGITS); 443 goto ok; 444 445 /* digits 8 and 9 ok iff decimal or hex */ 446 case '8': case '9': 447 base = basefix[base]; 448 if (base <= 8) 449 break; /* not legal here */ 450 flags &= ~(SIGNOK | PFXOK | NDIGITS); 451 goto ok; 452 453 /* letters ok iff hex */ 454 case 'A': case 'B': case 'C': 455 case 'D': case 'E': case 'F': 456 case 'a': case 'b': case 'c': 457 case 'd': case 'e': case 'f': 458 /* no need to fix base here */ 459 if (base <= 10) 460 break; /* not legal here */ 461 flags &= ~(SIGNOK | PFXOK | NDIGITS); 462 goto ok; 463 464 /* sign ok only as first character */ 465 case '+': case '-': 466 if (flags & SIGNOK) { 467 flags &= ~SIGNOK; 468 goto ok; 469 } 470 break; 471 472 /* x ok iff flag still set & 2nd char */ 473 case 'x': case 'X': 474 if (flags & PFXOK && p == buf + 1) { 475 base = 16; /* if %i */ 476 flags &= ~PFXOK; 477 goto ok; 478 } 479 break; 480 } 481 482 /* 483 * If we got here, c is not a legal character 484 * for a number. Stop accumulating digits. 485 */ 486 break; 487 ok: 488 /* 489 * c is legal: store it and look at the next. 490 */ 491 *p++ = c; 492 if (--fp->_r > 0) 493 fp->_p++; 494 else if (__srefill(fp)) 495 break; /* EOF */ 496 } 497 /* 498 * If we had only a sign, it is no good; push 499 * back the sign. If the number ends in `x', 500 * it was [sign] '0' 'x', so push back the x 501 * and treat it as [sign] '0'. 502 */ 503 if (flags & NDIGITS) { 504 if (p > buf) 505 (void) ungetc(*(u_char *)--p, fp); 506 goto match_failure; 507 } 508 c = ((u_char *)p)[-1]; 509 if (c == 'x' || c == 'X') { 510 --p; 511 (void) ungetc(c, fp); 512 } 513 if ((flags & SUPPRESS) == 0) { 514 u_long res; 515 516 *p = 0; 517 res = (*ccfn)(buf, (char **)NULL, base); 518 if (flags & POINTER) 519 *va_arg(ap, void **) = (void *)res; 520 else if (flags & SHORT) 521 *va_arg(ap, short *) = res; 522 else if (flags & LONG) 523 *va_arg(ap, long *) = res; 524 else 525 *va_arg(ap, int *) = res; 526 nassigned++; 527 } 528 nread += p - buf; 529 break; 530 531 #ifdef FLOATING_POINT 532 case CT_FLOAT: 533 /* scan a floating point number as if by strtod */ 534 #ifdef hardway 535 if (width == 0 || width > sizeof(buf) - 1) 536 width = sizeof(buf) - 1; 537 #else 538 /* size_t is unsigned, hence this optimisation */ 539 if (--width > sizeof(buf) - 2) 540 width = sizeof(buf) - 2; 541 width++; 542 #endif 543 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 544 for (p = buf; width; width--) { 545 c = *fp->_p; 546 /* 547 * This code mimicks the integer conversion 548 * code, but is much simpler. 549 */ 550 switch (c) { 551 552 case '0': case '1': case '2': case '3': 553 case '4': case '5': case '6': case '7': 554 case '8': case '9': 555 flags &= ~(SIGNOK | NDIGITS); 556 goto fok; 557 558 case '+': case '-': 559 if (flags & SIGNOK) { 560 flags &= ~SIGNOK; 561 goto fok; 562 } 563 break; 564 case '.': 565 if (flags & DPTOK) { 566 flags &= ~(SIGNOK | DPTOK); 567 goto fok; 568 } 569 break; 570 case 'e': case 'E': 571 /* no exponent without some digits */ 572 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 573 flags = 574 (flags & ~(EXPOK|DPTOK)) | 575 SIGNOK | NDIGITS; 576 goto fok; 577 } 578 break; 579 } 580 break; 581 fok: 582 *p++ = c; 583 if (--fp->_r > 0) 584 fp->_p++; 585 else if (__srefill(fp)) 586 break; /* EOF */ 587 } 588 /* 589 * If no digits, might be missing exponent digits 590 * (just give back the exponent) or might be missing 591 * regular digits, but had sign and/or decimal point. 592 */ 593 if (flags & NDIGITS) { 594 if (flags & EXPOK) { 595 /* no digits at all */ 596 while (p > buf) 597 ungetc(*(u_char *)--p, fp); 598 goto match_failure; 599 } 600 /* just a bad exponent (e and maybe sign) */ 601 c = *(u_char *)--p; 602 if (c != 'e' && c != 'E') { 603 (void) ungetc(c, fp);/* sign */ 604 c = *(u_char *)--p; 605 } 606 (void) ungetc(c, fp); 607 } 608 if ((flags & SUPPRESS) == 0) { 609 double res; 610 611 *p = 0; 612 res = atof(buf); 613 if (flags & LONG) 614 *va_arg(ap, double *) = res; 615 else 616 *va_arg(ap, float *) = res; 617 nassigned++; 618 } 619 nread += p - buf; 620 break; 621 #endif /* FLOATING_POINT */ 622 } 623 } 624 input_failure: 625 return (nassigned ? nassigned : -1); 626 match_failure: 627 return (nassigned); 628 } 629 630 /* 631 * Fill in the given table from the scanset at the given format 632 * (just after `['). Return a pointer to the character past the 633 * closing `]'. The table has a 1 wherever characters should be 634 * considered part of the scanset. 635 */ 636 static u_char * 637 __sccl(tab, fmt) 638 register char *tab; 639 register u_char *fmt; 640 { 641 register int c, n, v; 642 643 /* first `clear' the whole table */ 644 c = *fmt++; /* first char hat => negated scanset */ 645 if (c == '^') { 646 v = 1; /* default => accept */ 647 c = *fmt++; /* get new first char */ 648 } else 649 v = 0; /* default => reject */ 650 /* should probably use memset here */ 651 for (n = 0; n < 256; n++) 652 tab[n] = v; 653 if (c == 0) 654 return (fmt - 1);/* format ended before closing ] */ 655 656 /* 657 * Now set the entries corresponding to the actual scanset 658 * to the opposite of the above. 659 * 660 * The first character may be ']' (or '-') without being special; 661 * the last character may be '-'. 662 */ 663 v = 1 - v; 664 for (;;) { 665 tab[c] = v; /* take character c */ 666 doswitch: 667 n = *fmt++; /* and examine the next */ 668 switch (n) { 669 670 case 0: /* format ended too soon */ 671 return (fmt - 1); 672 673 case '-': 674 /* 675 * A scanset of the form 676 * [01+-] 677 * is defined as `the digit 0, the digit 1, 678 * the character +, the character -', but 679 * the effect of a scanset such as 680 * [a-zA-Z0-9] 681 * is implementation defined. The V7 Unix 682 * scanf treats `a-z' as `the letters a through 683 * z', but treats `a-a' as `the letter a, the 684 * character -, and the letter a'. 685 * 686 * For compatibility, the `-' is not considerd 687 * to define a range if the character following 688 * it is either a close bracket (required by ANSI) 689 * or is not numerically greater than the character 690 * we just stored in the table (c). 691 */ 692 n = *fmt; 693 if (n == ']' || n < c) { 694 c = '-'; 695 break; /* resume the for(;;) */ 696 } 697 fmt++; 698 do { /* fill in the range */ 699 tab[++c] = v; 700 } while (c < n); 701 #if 1 /* XXX another disgusting compatibility hack */ 702 /* 703 * Alas, the V7 Unix scanf also treats formats 704 * such as [a-c-e] as `the letters a through e'. 705 * This too is permitted by the standard.... 706 */ 707 goto doswitch; 708 #else 709 c = *fmt++; 710 if (c == 0) 711 return (fmt - 1); 712 if (c == ']') 713 return (fmt); 714 #endif 715 break; 716 717 case ']': /* end of scanset */ 718 return (fmt); 719 720 default: /* just another character */ 721 c = n; 722 break; 723 } 724 } 725 /* NOTREACHED */ 726 } 727