1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfscanf.c 8.1 (Berkeley) 6/4/93 37 * $FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.14.2.2 2002/04/17 14:58:23 ache Exp $ 38 * $DragonFly: src/lib/libc/stdio/vfscanf.c,v 1.3 2004/03/08 07:55:57 hmp Exp $ 39 */ 40 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <ctype.h> 44 #if __STDC__ 45 #include <stdarg.h> 46 #else 47 #include <varargs.h> 48 #endif 49 #include <string.h> 50 51 #include "collate.h" 52 #include "local.h" 53 54 #define FLOATING_POINT 55 56 #ifdef FLOATING_POINT 57 #include <locale.h> 58 #include "floatio.h" 59 #endif 60 61 #define BUF 513 /* Maximum length of numeric string. */ 62 63 /* 64 * Flags used during conversion. 65 */ 66 #define LONG 0x01 /* l: long or double */ 67 #define LONGDBL 0x02 /* L: long double */ 68 #define SHORT 0x04 /* h: short */ 69 #define SUPPRESS 0x08 /* suppress assignment */ 70 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 71 #define NOSKIP 0x20 /* do not skip blanks */ 72 #define QUAD 0x400 73 74 /* 75 * The following are used in numeric conversions only: 76 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 77 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 78 */ 79 #define SIGNOK 0x40 /* +/- is (still) legal */ 80 #define NDIGITS 0x80 /* no digits detected */ 81 82 #define DPTOK 0x100 /* (float) decimal point is still legal */ 83 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 84 85 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 86 #define NZDIGITS 0x200 /* no zero digits detected */ 87 #define HAVESIGN 0x10000 /* sign detected */ 88 89 /* 90 * Conversion types. 91 */ 92 #define CT_CHAR 0 /* %c conversion */ 93 #define CT_CCL 1 /* %[...] conversion */ 94 #define CT_STRING 2 /* %s conversion */ 95 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 96 #define CT_FLOAT 4 /* floating, i.e., strtod */ 97 98 #define u_char unsigned char 99 #define u_long unsigned long 100 101 static u_char *__sccl(char *, u_char *); 102 103 /* 104 * vfscanf 105 */ 106 int 107 __svfscanf(FILE *fp, char const *fmt0, va_list ap) 108 { 109 u_char *fmt = (u_char *)fmt0; 110 int c; /* character from format, or conversion */ 111 size_t width; /* field width, or 0 */ 112 char *p; /* points into all kinds of strings */ 113 int n; /* handy integer */ 114 int flags; /* flags as defined above */ 115 char *p0; /* saves original value of p when necessary */ 116 int nassigned; /* number of fields assigned */ 117 int nconversions; /* number of conversions */ 118 int nread; /* number of characters consumed from fp */ 119 int base; /* base argument to strtoq/strtouq */ 120 u_quad_t(*ccfn)(); /* conversion function (strtoq/strtouq) */ 121 char ccltab[256]; /* character class table for %[...] */ 122 char buf[BUF]; /* buffer for numeric conversions */ 123 124 /* `basefix' is used to avoid `if' tests in the integer scanner */ 125 static short basefix[17] = 126 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 127 #ifdef FLOATING_POINT 128 char decimal_point = localeconv()->decimal_point[0]; 129 #endif 130 131 nassigned = 0; 132 nconversions = 0; 133 nread = 0; 134 base = 0; /* XXX just to keep gcc happy */ 135 ccfn = NULL; /* XXX just to keep gcc happy */ 136 for (;;) { 137 c = *fmt++; 138 if (c == 0) 139 return (nassigned); 140 if (isspace(c)) { 141 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) 142 nread++, fp->_r--, fp->_p++; 143 continue; 144 } 145 if (c != '%') 146 goto literal; 147 width = 0; 148 flags = 0; 149 /* 150 * switch on the format. continue if done; 151 * break once format type is derived. 152 */ 153 again: c = *fmt++; 154 switch (c) { 155 case '%': 156 literal: 157 if (fp->_r <= 0 && __srefill(fp)) 158 goto input_failure; 159 if (*fp->_p != c) 160 goto match_failure; 161 fp->_r--, fp->_p++; 162 nread++; 163 continue; 164 165 case '*': 166 flags |= SUPPRESS; 167 goto again; 168 case 'l': 169 flags |= LONG; 170 goto again; 171 case 'q': 172 flags |= QUAD; 173 goto again; 174 case 'L': 175 flags |= LONGDBL; 176 goto again; 177 case 'h': 178 flags |= SHORT; 179 goto again; 180 181 case '0': case '1': case '2': case '3': case '4': 182 case '5': case '6': case '7': case '8': case '9': 183 width = width * 10 + c - '0'; 184 goto again; 185 186 /* 187 * Conversions. 188 * Those marked `compat' are for 4.[123]BSD compatibility. 189 * 190 * (According to ANSI, E and X formats are supposed 191 * to the same as e and x. Sorry about that.) 192 */ 193 case 'D': /* compat */ 194 flags |= LONG; 195 /* FALLTHROUGH */ 196 case 'd': 197 c = CT_INT; 198 ccfn = (u_quad_t (*)())strtoq; 199 base = 10; 200 break; 201 202 case 'i': 203 c = CT_INT; 204 ccfn = (u_quad_t (*)())strtoq; 205 base = 0; 206 break; 207 208 case 'O': /* compat */ 209 flags |= LONG; 210 /* FALLTHROUGH */ 211 case 'o': 212 c = CT_INT; 213 ccfn = strtouq; 214 base = 8; 215 break; 216 217 case 'u': 218 c = CT_INT; 219 ccfn = strtouq; 220 base = 10; 221 break; 222 223 case 'X': /* compat XXX */ 224 flags |= LONG; 225 /* FALLTHROUGH */ 226 case 'x': 227 flags |= PFXOK; /* enable 0x prefixing */ 228 c = CT_INT; 229 ccfn = strtouq; 230 base = 16; 231 break; 232 233 #ifdef FLOATING_POINT 234 case 'E': /* compat XXX */ 235 case 'F': /* compat */ 236 flags |= LONG; 237 /* FALLTHROUGH */ 238 case 'e': case 'f': case 'g': 239 c = CT_FLOAT; 240 break; 241 #endif 242 243 case 's': 244 c = CT_STRING; 245 break; 246 247 case '[': 248 fmt = __sccl(ccltab, fmt); 249 flags |= NOSKIP; 250 c = CT_CCL; 251 break; 252 253 case 'c': 254 flags |= NOSKIP; 255 c = CT_CHAR; 256 break; 257 258 case 'p': /* pointer format is like hex */ 259 flags |= POINTER | PFXOK; 260 c = CT_INT; 261 ccfn = strtouq; 262 base = 16; 263 break; 264 265 case 'n': 266 nconversions++; 267 if (flags & SUPPRESS) /* ??? */ 268 continue; 269 if (flags & SHORT) 270 *va_arg(ap, short *) = nread; 271 else if (flags & LONG) 272 *va_arg(ap, long *) = nread; 273 else if (flags & QUAD) 274 *va_arg(ap, quad_t *) = nread; 275 else 276 *va_arg(ap, int *) = nread; 277 continue; 278 279 /* 280 * Disgusting backwards compatibility hacks. XXX 281 */ 282 case '\0': /* compat */ 283 return (EOF); 284 285 default: /* compat */ 286 if (isupper(c)) 287 flags |= LONG; 288 c = CT_INT; 289 ccfn = (u_quad_t (*)())strtoq; 290 base = 10; 291 break; 292 } 293 294 /* 295 * We have a conversion that requires input. 296 */ 297 if (fp->_r <= 0 && __srefill(fp)) 298 goto input_failure; 299 300 /* 301 * Consume leading white space, except for formats 302 * that suppress this. 303 */ 304 if ((flags & NOSKIP) == 0) { 305 while (isspace(*fp->_p)) { 306 nread++; 307 if (--fp->_r > 0) 308 fp->_p++; 309 else if (__srefill(fp)) 310 goto input_failure; 311 } 312 /* 313 * Note that there is at least one character in 314 * the buffer, so conversions that do not set NOSKIP 315 * ca no longer result in an input failure. 316 */ 317 } 318 319 /* 320 * Do the conversion. 321 */ 322 switch (c) { 323 324 case CT_CHAR: 325 /* scan arbitrary characters (sets NOSKIP) */ 326 if (width == 0) 327 width = 1; 328 if (flags & SUPPRESS) { 329 size_t sum = 0; 330 for (;;) { 331 if ((n = fp->_r) < width) { 332 sum += n; 333 width -= n; 334 fp->_p += n; 335 if (__srefill(fp)) { 336 if (sum == 0) 337 goto input_failure; 338 break; 339 } 340 } else { 341 sum += width; 342 fp->_r -= width; 343 fp->_p += width; 344 break; 345 } 346 } 347 nread += sum; 348 } else { 349 size_t r = fread((void *)va_arg(ap, char *), 1, 350 width, fp); 351 352 if (r == 0) 353 goto input_failure; 354 nread += r; 355 nassigned++; 356 } 357 nconversions++; 358 break; 359 360 case CT_CCL: 361 /* scan a (nonempty) character class (sets NOSKIP) */ 362 if (width == 0) 363 width = (size_t)~0; /* `infinity' */ 364 /* take only those things in the class */ 365 if (flags & SUPPRESS) { 366 n = 0; 367 while (ccltab[*fp->_p]) { 368 n++, fp->_r--, fp->_p++; 369 if (--width == 0) 370 break; 371 if (fp->_r <= 0 && __srefill(fp)) { 372 if (n == 0) 373 goto input_failure; 374 break; 375 } 376 } 377 if (n == 0) 378 goto match_failure; 379 } else { 380 p0 = p = va_arg(ap, char *); 381 while (ccltab[*fp->_p]) { 382 fp->_r--; 383 *p++ = *fp->_p++; 384 if (--width == 0) 385 break; 386 if (fp->_r <= 0 && __srefill(fp)) { 387 if (p == p0) 388 goto input_failure; 389 break; 390 } 391 } 392 n = p - p0; 393 if (n == 0) 394 goto match_failure; 395 *p = 0; 396 nassigned++; 397 } 398 nread += n; 399 nconversions++; 400 break; 401 402 case CT_STRING: 403 /* like CCL, but zero-length string OK, & no NOSKIP */ 404 if (width == 0) 405 width = (size_t)~0; 406 if (flags & SUPPRESS) { 407 n = 0; 408 while (!isspace(*fp->_p)) { 409 n++, fp->_r--, fp->_p++; 410 if (--width == 0) 411 break; 412 if (fp->_r <= 0 && __srefill(fp)) 413 break; 414 } 415 nread += n; 416 } else { 417 p0 = p = va_arg(ap, char *); 418 while (!isspace(*fp->_p)) { 419 fp->_r--; 420 *p++ = *fp->_p++; 421 if (--width == 0) 422 break; 423 if (fp->_r <= 0 && __srefill(fp)) 424 break; 425 } 426 *p = 0; 427 nread += p - p0; 428 nassigned++; 429 } 430 nconversions++; 431 continue; 432 433 case CT_INT: 434 /* scan an integer as if by strtoq/strtouq */ 435 #ifdef hardway 436 if (width == 0 || width > sizeof(buf) - 1) 437 width = sizeof(buf) - 1; 438 #else 439 /* size_t is unsigned, hence this optimisation */ 440 if (--width > sizeof(buf) - 2) 441 width = sizeof(buf) - 2; 442 width++; 443 #endif 444 flags |= SIGNOK | NDIGITS | NZDIGITS; 445 for (p = buf; width; width--) { 446 c = *fp->_p; 447 /* 448 * Switch on the character; `goto ok' 449 * if we accept it as a part of number. 450 */ 451 switch (c) { 452 453 /* 454 * The digit 0 is always legal, but is 455 * special. For %i conversions, if no 456 * digits (zero or nonzero) have been 457 * scanned (only signs), we will have 458 * base==0. In that case, we should set 459 * it to 8 and enable 0x prefixing. 460 * Also, if we have not scanned zero digits 461 * before this, do not turn off prefixing 462 * (someone else will turn it off if we 463 * have scanned any nonzero digits). 464 */ 465 case '0': 466 if (base == 0) { 467 base = 8; 468 flags |= PFXOK; 469 } 470 if (flags & NZDIGITS) 471 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 472 else 473 flags &= ~(SIGNOK|PFXOK|NDIGITS); 474 goto ok; 475 476 /* 1 through 7 always legal */ 477 case '1': case '2': case '3': 478 case '4': case '5': case '6': case '7': 479 base = basefix[base]; 480 flags &= ~(SIGNOK | PFXOK | NDIGITS); 481 goto ok; 482 483 /* digits 8 and 9 ok iff decimal or hex */ 484 case '8': case '9': 485 base = basefix[base]; 486 if (base <= 8) 487 break; /* not legal here */ 488 flags &= ~(SIGNOK | PFXOK | NDIGITS); 489 goto ok; 490 491 /* letters ok iff hex */ 492 case 'A': case 'B': case 'C': 493 case 'D': case 'E': case 'F': 494 case 'a': case 'b': case 'c': 495 case 'd': case 'e': case 'f': 496 /* no need to fix base here */ 497 if (base <= 10) 498 break; /* not legal here */ 499 flags &= ~(SIGNOK | PFXOK | NDIGITS); 500 goto ok; 501 502 /* sign ok only as first character */ 503 case '+': case '-': 504 if (flags & SIGNOK) { 505 flags &= ~SIGNOK; 506 flags |= HAVESIGN; 507 goto ok; 508 } 509 break; 510 511 /* 512 * x ok iff flag still set & 2nd char (or 513 * 3rd char if we have a sign). 514 */ 515 case 'x': case 'X': 516 if (flags & PFXOK && p == 517 buf + 1 + !!(flags & HAVESIGN)) { 518 base = 16; /* if %i */ 519 flags &= ~PFXOK; 520 goto ok; 521 } 522 break; 523 } 524 525 /* 526 * If we got here, c is not a legal character 527 * for a number. Stop accumulating digits. 528 */ 529 break; 530 ok: 531 /* 532 * c is legal: store it and look at the next. 533 */ 534 *p++ = c; 535 if (--fp->_r > 0) 536 fp->_p++; 537 else if (__srefill(fp)) 538 break; /* EOF */ 539 } 540 /* 541 * If we had only a sign, it is no good; push 542 * back the sign. If the number ends in `x', 543 * it was [sign] '0' 'x', so push back the x 544 * and treat it as [sign] '0'. 545 */ 546 if (flags & NDIGITS) { 547 if (p > buf) 548 (void) ungetc(*(u_char *)--p, fp); 549 goto match_failure; 550 } 551 c = ((u_char *)p)[-1]; 552 if (c == 'x' || c == 'X') { 553 --p; 554 (void) ungetc(c, fp); 555 } 556 if ((flags & SUPPRESS) == 0) { 557 u_quad_t res; 558 559 *p = 0; 560 res = (*ccfn)(buf, (char **)NULL, base); 561 if (flags & POINTER) 562 *va_arg(ap, void **) = 563 (void *)(u_long)res; 564 else if (flags & SHORT) 565 *va_arg(ap, short *) = res; 566 else if (flags & LONG) 567 *va_arg(ap, long *) = res; 568 else if (flags & QUAD) 569 *va_arg(ap, quad_t *) = res; 570 else 571 *va_arg(ap, int *) = res; 572 nassigned++; 573 } 574 nread += p - buf; 575 nconversions++; 576 break; 577 578 #ifdef FLOATING_POINT 579 case CT_FLOAT: 580 /* scan a floating point number as if by strtod */ 581 #ifdef hardway 582 if (width == 0 || width > sizeof(buf) - 1) 583 width = sizeof(buf) - 1; 584 #else 585 /* size_t is unsigned, hence this optimisation */ 586 if (--width > sizeof(buf) - 2) 587 width = sizeof(buf) - 2; 588 width++; 589 #endif 590 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 591 for (p = buf; width; width--) { 592 c = *fp->_p; 593 /* 594 * This code mimicks the integer conversion 595 * code, but is much simpler. 596 */ 597 switch (c) { 598 599 case '0': case '1': case '2': case '3': 600 case '4': case '5': case '6': case '7': 601 case '8': case '9': 602 flags &= ~(SIGNOK | NDIGITS); 603 goto fok; 604 605 case '+': case '-': 606 if (flags & SIGNOK) { 607 flags &= ~SIGNOK; 608 goto fok; 609 } 610 break; 611 case 'e': case 'E': 612 /* no exponent without some digits */ 613 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 614 flags = 615 (flags & ~(EXPOK|DPTOK)) | 616 SIGNOK | NDIGITS; 617 goto fok; 618 } 619 break; 620 default: 621 if ((char)c == decimal_point && 622 (flags & DPTOK)) { 623 flags &= ~(SIGNOK | DPTOK); 624 goto fok; 625 } 626 break; 627 } 628 break; 629 fok: 630 *p++ = c; 631 if (--fp->_r > 0) 632 fp->_p++; 633 else if (__srefill(fp)) 634 break; /* EOF */ 635 } 636 /* 637 * If no digits, might be missing exponent digits 638 * (just give back the exponent) or might be missing 639 * regular digits, but had sign and/or decimal point. 640 */ 641 if (flags & NDIGITS) { 642 if (flags & EXPOK) { 643 /* no digits at all */ 644 while (p > buf) 645 ungetc(*(u_char *)--p, fp); 646 goto match_failure; 647 } 648 /* just a bad exponent (e and maybe sign) */ 649 c = *(u_char *)--p; 650 if (c != 'e' && c != 'E') { 651 (void) ungetc(c, fp);/* sign */ 652 c = *(u_char *)--p; 653 } 654 (void) ungetc(c, fp); 655 } 656 if ((flags & SUPPRESS) == 0) { 657 double res; 658 659 *p = 0; 660 /* XXX this loses precision for long doubles. */ 661 res = strtod(buf, (char **) NULL); 662 if (flags & LONGDBL) 663 *va_arg(ap, long double *) = res; 664 else if (flags & LONG) 665 *va_arg(ap, double *) = res; 666 else 667 *va_arg(ap, float *) = res; 668 nassigned++; 669 } 670 nread += p - buf; 671 nconversions++; 672 break; 673 #endif /* FLOATING_POINT */ 674 } 675 } 676 input_failure: 677 return (nconversions != 0 ? nassigned : EOF); 678 match_failure: 679 return (nassigned); 680 } 681 682 /* 683 * Fill in the given table from the scanset at the given format 684 * (just after `['). Return a pointer to the character past the 685 * closing `]'. The table has a 1 wherever characters should be 686 * considered part of the scanset. 687 */ 688 static u_char * 689 __sccl(tab, fmt) 690 char *tab; 691 u_char *fmt; 692 { 693 int c, n, v, i; 694 695 /* first `clear' the whole table */ 696 c = *fmt++; /* first char hat => negated scanset */ 697 if (c == '^') { 698 v = 1; /* default => accept */ 699 c = *fmt++; /* get new first char */ 700 } else 701 v = 0; /* default => reject */ 702 703 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 704 (void) memset(tab, v, 256); 705 706 if (c == 0) 707 return (fmt - 1);/* format ended before closing ] */ 708 709 /* 710 * Now set the entries corresponding to the actual scanset 711 * to the opposite of the above. 712 * 713 * The first character may be ']' (or '-') without being special; 714 * the last character may be '-'. 715 */ 716 v = 1 - v; 717 for (;;) { 718 tab[c] = v; /* take character c */ 719 doswitch: 720 n = *fmt++; /* and examine the next */ 721 switch (n) { 722 723 case 0: /* format ended too soon */ 724 return (fmt - 1); 725 726 case '-': 727 /* 728 * A scanset of the form 729 * [01+-] 730 * is defined as `the digit 0, the digit 1, 731 * the character +, the character -', but 732 * the effect of a scanset such as 733 * [a-zA-Z0-9] 734 * is implementation defined. The V7 Unix 735 * scanf treats `a-z' as `the letters a through 736 * z', but treats `a-a' as `the letter a, the 737 * character -, and the letter a'. 738 * 739 * For compatibility, the `-' is not considerd 740 * to define a range if the character following 741 * it is either a close bracket (required by ANSI) 742 * or is not numerically greater than the character 743 * we just stored in the table (c). 744 */ 745 n = *fmt; 746 if (n == ']' 747 || (__collate_load_error ? n < c : 748 __collate_range_cmp (n, c) < 0 749 ) 750 ) { 751 c = '-'; 752 break; /* resume the for(;;) */ 753 } 754 fmt++; 755 /* fill in the range */ 756 if (__collate_load_error) { 757 do { 758 tab[++c] = v; 759 } while (c < n); 760 } else { 761 for (i = 0; i < 256; i ++) 762 if ( __collate_range_cmp (c, i) < 0 763 && __collate_range_cmp (i, n) <= 0 764 ) 765 tab[i] = v; 766 } 767 #if 1 /* XXX another disgusting compatibility hack */ 768 c = n; 769 /* 770 * Alas, the V7 Unix scanf also treats formats 771 * such as [a-c-e] as `the letters a through e'. 772 * This too is permitted by the standard.... 773 */ 774 goto doswitch; 775 #else 776 c = *fmt++; 777 if (c == 0) 778 return (fmt - 1); 779 if (c == ']') 780 return (fmt); 781 #endif 782 break; 783 784 case ']': /* end of scanset */ 785 return (fmt); 786 787 default: /* just another character */ 788 c = n; 789 break; 790 } 791 } 792 /* NOTREACHED */ 793 } 794