1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Copyright (c) 2011 The FreeBSD Foundation 6 * All rights reserved. 7 * Portions of this software were developed by David Chisnall 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Chris Torek. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)vfscanf.c 8.1 (Berkeley) 6/4/93 38 * $FreeBSD: head/lib/libc/stdio/vfscanf.c 249808 2013-04-23 13:33:13Z emaste $ 39 */ 40 41 42 #include "namespace.h" 43 #include <ctype.h> 44 #include <inttypes.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stddef.h> 48 #include <stdarg.h> 49 #include <string.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 #include "un-namespace.h" 53 54 #include "collate.h" 55 #include "libc_private.h" 56 #include "local.h" 57 #include "xlocale_private.h" 58 59 #ifndef NO_FLOATING_POINT 60 #include <locale.h> 61 #endif 62 63 #define BUF 513 /* Maximum length of numeric string. */ 64 65 /* 66 * Flags used during conversion. 67 */ 68 #define LONG 0x01 /* l: long or double */ 69 #define LONGDBL 0x02 /* L: long double */ 70 #define SHORT 0x04 /* h: short */ 71 #define SUPPRESS 0x08 /* *: suppress assignment */ 72 #define POINTER 0x10 /* p: void * (as hex) */ 73 #define NOSKIP 0x20 /* [ or c: do not skip blanks */ 74 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 75 #define INTMAXT 0x800 /* j: intmax_t */ 76 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 77 #define SIZET 0x2000 /* z: size_t */ 78 #define SHORTSHORT 0x4000 /* hh: char */ 79 #define UNSIGNED 0x8000 /* %[oupxX] conversions */ 80 81 /* 82 * The following are used in integral conversions only: 83 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS 84 */ 85 #define SIGNOK 0x40 /* +/- is (still) legal */ 86 #define NDIGITS 0x80 /* no digits detected */ 87 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 88 #define NZDIGITS 0x200 /* no zero digits detected */ 89 #define HAVESIGN 0x10000 /* sign detected */ 90 91 /* 92 * Conversion types. 93 */ 94 #define CT_CHAR 0 /* %c conversion */ 95 #define CT_CCL 1 /* %[...] conversion */ 96 #define CT_STRING 2 /* %s conversion */ 97 #define CT_INT 3 /* %[dioupxX] conversion */ 98 #define CT_FLOAT 4 /* %[efgEFG] conversion */ 99 100 static const u_char *__sccl(char *, const u_char *); 101 #ifndef NO_FLOATING_POINT 102 static int parsefloat(FILE *, char *, char *, locale_t); 103 #endif 104 105 __weak_reference(__vfscanf, vfscanf); 106 107 /* 108 * Conversion functions are passed a pointer to this object instead of 109 * a real parameter to indicate that the assignment-suppression (*) 110 * flag was specified. We could use a NULL pointer to indicate this, 111 * but that would mask bugs in applications that call scanf() with a 112 * NULL pointer. 113 */ 114 static const int suppress; 115 #define SUPPRESS_PTR ((void *)&suppress) 116 117 static const mbstate_t initial_mbs; 118 119 /* 120 * The following conversion functions return the number of characters consumed, 121 * or -1 on input failure. Character class conversion returns 0 on match 122 * failure. 123 */ 124 125 static __inline int 126 convert_char(FILE *fp, char * p, int width) 127 { 128 int n; 129 130 if (p == SUPPRESS_PTR) { 131 size_t sum = 0; 132 for (;;) { 133 if ((n = fp->pub._r) < width) { 134 sum += n; 135 width -= n; 136 fp->pub._p += n; 137 if (__srefill(fp)) { 138 if (sum == 0) 139 return (-1); 140 break; 141 } 142 } else { 143 sum += width; 144 fp->pub._r -= width; 145 fp->pub._p += width; 146 break; 147 } 148 } 149 return (sum); 150 } else { 151 size_t r = __fread(p, 1, width, fp); 152 153 if (r == 0) 154 return (-1); 155 return (r); 156 } 157 } 158 159 static __inline int 160 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) 161 { 162 mbstate_t mbs; 163 int n, nread; 164 wint_t wi; 165 166 mbs = initial_mbs; 167 n = 0; 168 while (width-- != 0 && 169 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { 170 if (wcp != SUPPRESS_PTR) 171 *wcp++ = (wchar_t)wi; 172 n += nread; 173 } 174 if (n == 0) 175 return (-1); 176 return (n); 177 } 178 179 static __inline int 180 convert_ccl(FILE *fp, char * p, int width, const char *ccltab) 181 { 182 char *p0; 183 int n; 184 185 if (p == SUPPRESS_PTR) { 186 n = 0; 187 while (ccltab[*fp->pub._p]) { 188 n++, fp->pub._r--, fp->pub._p++; 189 if (--width == 0) 190 break; 191 if (fp->pub._r <= 0 && __srefill(fp)) { 192 if (n == 0) 193 return (-1); 194 break; 195 } 196 } 197 } else { 198 p0 = p; 199 while (ccltab[*fp->pub._p]) { 200 fp->pub._r--; 201 *p++ = *fp->pub._p++; 202 if (--width == 0) 203 break; 204 if (fp->pub._r <= 0 && __srefill(fp)) { 205 if (p == p0) 206 return (-1); 207 break; 208 } 209 } 210 n = p - p0; 211 if (n == 0) 212 return (0); 213 *p = 0; 214 } 215 return (n); 216 } 217 218 static __inline int 219 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, 220 locale_t locale) 221 { 222 mbstate_t mbs; 223 wint_t wi; 224 int n, nread; 225 226 mbs = initial_mbs; 227 n = 0; 228 if (wcp == SUPPRESS_PTR) { 229 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 230 width-- != 0 && ccltab[wctob(wi)]) 231 n += nread; 232 if (wi != WEOF) 233 __ungetwc(wi, fp, __get_locale()); 234 } else { 235 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 236 width-- != 0 && ccltab[wctob(wi)]) { 237 *wcp++ = (wchar_t)wi; 238 n += nread; 239 } 240 if (wi != WEOF) 241 __ungetwc(wi, fp, __get_locale()); 242 if (n == 0) 243 return (0); 244 *wcp = 0; 245 } 246 return (n); 247 } 248 249 static __inline int 250 convert_string(FILE *fp, char * p, int width) 251 { 252 char *p0; 253 int n; 254 255 if (p == SUPPRESS_PTR) { 256 n = 0; 257 while (!isspace(*fp->pub._p)) { 258 n++, fp->pub._r--, fp->pub._p++; 259 if (--width == 0) 260 break; 261 if (fp->pub._r <= 0 && __srefill(fp)) 262 break; 263 } 264 } else { 265 p0 = p; 266 while (!isspace(*fp->pub._p)) { 267 fp->pub._r--; 268 *p++ = *fp->pub._p++; 269 if (--width == 0) 270 break; 271 if (fp->pub._r <= 0 && __srefill(fp)) 272 break; 273 } 274 *p = 0; 275 n = p - p0; 276 } 277 return (n); 278 } 279 280 static __inline int 281 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) 282 { 283 mbstate_t mbs; 284 wint_t wi; 285 int n, nread; 286 287 mbs = initial_mbs; 288 n = 0; 289 if (wcp == SUPPRESS_PTR) { 290 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 291 width-- != 0 && !iswspace(wi)) 292 n += nread; 293 if (wi != WEOF) 294 __ungetwc(wi, fp, __get_locale()); 295 } else { 296 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && 297 width-- != 0 && !iswspace(wi)) { 298 *wcp++ = (wchar_t)wi; 299 n += nread; 300 } 301 if (wi != WEOF) 302 __ungetwc(wi, fp, __get_locale()); 303 *wcp = '\0'; 304 } 305 return (n); 306 } 307 308 /* 309 * Read an integer, storing it in buf. The only relevant bit in the 310 * flags argument is PFXOK. 311 * 312 * Return 0 on a match failure, and the number of characters read 313 * otherwise. 314 */ 315 static __inline int 316 parseint(FILE *fp, char * __restrict buf, int width, int base, int flags) 317 { 318 /* `basefix' is used to avoid `if' tests */ 319 static const short basefix[17] = 320 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 321 char *p; 322 int c; 323 324 flags |= SIGNOK | NDIGITS | NZDIGITS; 325 for (p = buf; width; width--) { 326 c = *fp->pub._p; 327 /* 328 * Switch on the character; `goto ok' if we accept it 329 * as a part of number. 330 */ 331 switch (c) { 332 333 /* 334 * The digit 0 is always legal, but is special. For 335 * %i conversions, if no digits (zero or nonzero) have 336 * been scanned (only signs), we will have base==0. 337 * In that case, we should set it to 8 and enable 0x 338 * prefixing. Also, if we have not scanned zero 339 * digits before this, do not turn off prefixing 340 * (someone else will turn it off if we have scanned 341 * any nonzero digits). 342 */ 343 case '0': 344 if (base == 0) { 345 base = 8; 346 flags |= PFXOK; 347 } 348 if (flags & NZDIGITS) 349 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 350 else 351 flags &= ~(SIGNOK|PFXOK|NDIGITS); 352 goto ok; 353 354 /* 1 through 7 always legal */ 355 case '1': case '2': case '3': 356 case '4': case '5': case '6': case '7': 357 base = basefix[base]; 358 flags &= ~(SIGNOK | PFXOK | NDIGITS); 359 goto ok; 360 361 /* digits 8 and 9 ok iff decimal or hex */ 362 case '8': case '9': 363 base = basefix[base]; 364 if (base <= 8) 365 break; /* not legal here */ 366 flags &= ~(SIGNOK | PFXOK | NDIGITS); 367 goto ok; 368 369 /* letters ok iff hex */ 370 case 'A': case 'B': case 'C': 371 case 'D': case 'E': case 'F': 372 case 'a': case 'b': case 'c': 373 case 'd': case 'e': case 'f': 374 /* no need to fix base here */ 375 if (base <= 10) 376 break; /* not legal here */ 377 flags &= ~(SIGNOK | PFXOK | NDIGITS); 378 goto ok; 379 380 /* sign ok only as first character */ 381 case '+': case '-': 382 if (flags & SIGNOK) { 383 flags &= ~SIGNOK; 384 flags |= HAVESIGN; 385 goto ok; 386 } 387 break; 388 389 /* 390 * x ok iff flag still set & 2nd char (or 3rd char if 391 * we have a sign). 392 */ 393 case 'x': case 'X': 394 if (flags & PFXOK && p == 395 buf + 1 + !!(flags & HAVESIGN)) { 396 base = 16; /* if %i */ 397 flags &= ~PFXOK; 398 goto ok; 399 } 400 break; 401 } 402 403 /* 404 * If we got here, c is not a legal character for a 405 * number. Stop accumulating digits. 406 */ 407 break; 408 ok: 409 /* 410 * c is legal: store it and look at the next. 411 */ 412 *p++ = c; 413 if (--fp->pub._r > 0) 414 fp->pub._p++; 415 else if (__srefill(fp)) 416 break; /* EOF */ 417 } 418 /* 419 * If we had only a sign, it is no good; push back the sign. 420 * If the number ends in `x', it was [sign] '0' 'x', so push 421 * back the x and treat it as [sign] '0'. 422 */ 423 if (flags & NDIGITS) { 424 if (p > buf) 425 (void) __ungetc(*(u_char *)--p, fp); 426 return (0); 427 } 428 c = ((u_char *)p)[-1]; 429 if (c == 'x' || c == 'X') { 430 --p; 431 (void) __ungetc(c, fp); 432 } 433 return (p - buf); 434 } 435 436 /* 437 * __vfscanf - MT-safe version 438 */ 439 int 440 __vfscanf(FILE *fp, char const *fmt0, va_list ap) 441 { 442 int ret; 443 444 FLOCKFILE(fp); 445 ret = __svfscanf(fp, __get_locale(), fmt0, ap); 446 FUNLOCKFILE(fp); 447 return (ret); 448 } 449 int 450 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap) 451 { 452 int ret; 453 FIX_LOCALE(locale); 454 455 FLOCKFILE(fp); 456 ret = __svfscanf(fp, locale, fmt0, ap); 457 FUNLOCKFILE(fp); 458 return (ret); 459 } 460 461 /* 462 * __svfscanf - non-MT-safe version of __vfscanf 463 */ 464 int 465 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) 466 { 467 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) 468 const u_char *fmt = (const u_char *)fmt0; 469 int c; /* character from format, or conversion */ 470 size_t width; /* field width, or 0 */ 471 int flags; /* flags as defined above */ 472 int nassigned; /* number of fields assigned */ 473 int nconversions; /* number of conversions */ 474 int nr; /* characters read by the current conversion */ 475 int nread; /* number of characters consumed from fp */ 476 int base; /* base argument to conversion function */ 477 char ccltab[256]; /* character class table for %[...] */ 478 char buf[BUF]; /* buffer for numeric conversions */ 479 480 ORIENT(fp, -1); 481 482 nassigned = 0; 483 nconversions = 0; 484 nread = 0; 485 for (;;) { 486 c = *fmt++; 487 if (c == 0) 488 return (nassigned); 489 if (isspace(c)) { 490 while ((fp->pub._r > 0 || __srefill(fp) == 0) && isspace(*fp->pub._p)) 491 nread++, fp->pub._r--, fp->pub._p++; 492 continue; 493 } 494 if (c != '%') 495 goto literal; 496 width = 0; 497 flags = 0; 498 /* 499 * switch on the format. continue if done; 500 * break once format type is derived. 501 */ 502 again: c = *fmt++; 503 switch (c) { 504 case '%': 505 literal: 506 if (fp->pub._r <= 0 && __srefill(fp)) 507 goto input_failure; 508 if (*fp->pub._p != c) 509 goto match_failure; 510 fp->pub._r--, fp->pub._p++; 511 nread++; 512 continue; 513 514 case '*': 515 flags |= SUPPRESS; 516 goto again; 517 case 'j': 518 flags |= INTMAXT; 519 goto again; 520 case 'l': 521 if (flags & LONG) { 522 flags &= ~LONG; 523 flags |= LONGLONG; 524 } else 525 flags |= LONG; 526 goto again; 527 case 'q': 528 flags |= LONGLONG; /* not quite */ 529 goto again; 530 case 't': 531 flags |= PTRDIFFT; 532 goto again; 533 case 'z': 534 flags |= SIZET; 535 goto again; 536 case 'L': 537 flags |= LONGDBL; 538 goto again; 539 case 'h': 540 if (flags & SHORT) { 541 flags &= ~SHORT; 542 flags |= SHORTSHORT; 543 } else 544 flags |= SHORT; 545 goto again; 546 547 case '0': case '1': case '2': case '3': case '4': 548 case '5': case '6': case '7': case '8': case '9': 549 width = width * 10 + c - '0'; 550 goto again; 551 552 /* 553 * Conversions. 554 */ 555 case 'd': 556 c = CT_INT; 557 base = 10; 558 break; 559 560 case 'i': 561 c = CT_INT; 562 base = 0; 563 break; 564 565 case 'o': 566 c = CT_INT; 567 flags |= UNSIGNED; 568 base = 8; 569 break; 570 571 case 'u': 572 c = CT_INT; 573 flags |= UNSIGNED; 574 base = 10; 575 break; 576 577 case 'X': 578 case 'x': 579 flags |= PFXOK; /* enable 0x prefixing */ 580 c = CT_INT; 581 flags |= UNSIGNED; 582 base = 16; 583 break; 584 585 #ifndef NO_FLOATING_POINT 586 case 'A': case 'E': case 'F': case 'G': 587 case 'a': case 'e': case 'f': case 'g': 588 c = CT_FLOAT; 589 break; 590 #endif 591 592 case 'S': 593 flags |= LONG; 594 /* FALLTHROUGH */ 595 case 's': 596 c = CT_STRING; 597 break; 598 599 case '[': 600 fmt = __sccl(ccltab, fmt); 601 flags |= NOSKIP; 602 c = CT_CCL; 603 break; 604 605 case 'C': 606 flags |= LONG; 607 /* FALLTHROUGH */ 608 case 'c': 609 flags |= NOSKIP; 610 c = CT_CHAR; 611 break; 612 613 case 'p': /* pointer format is like hex */ 614 flags |= POINTER | PFXOK; 615 c = CT_INT; /* assumes sizeof(uintmax_t) */ 616 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 617 base = 16; 618 break; 619 620 case 'n': 621 if (flags & SUPPRESS) /* ??? */ 622 continue; 623 if (flags & SHORTSHORT) 624 *va_arg(ap, char *) = nread; 625 else if (flags & SHORT) 626 *va_arg(ap, short *) = nread; 627 else if (flags & LONG) 628 *va_arg(ap, long *) = nread; 629 else if (flags & LONGLONG) 630 *va_arg(ap, long long *) = nread; 631 else if (flags & INTMAXT) 632 *va_arg(ap, intmax_t *) = nread; 633 else if (flags & SIZET) 634 *va_arg(ap, size_t *) = nread; 635 else if (flags & PTRDIFFT) 636 *va_arg(ap, ptrdiff_t *) = nread; 637 else 638 *va_arg(ap, int *) = nread; 639 continue; 640 641 default: 642 goto match_failure; 643 644 /* 645 * Disgusting backwards compatibility hack. XXX 646 */ 647 case '\0': /* compat */ 648 return (EOF); 649 } 650 651 /* 652 * We have a conversion that requires input. 653 */ 654 if (fp->pub._r <= 0 && __srefill(fp)) 655 goto input_failure; 656 657 /* 658 * Consume leading white space, except for formats 659 * that suppress this. 660 */ 661 if ((flags & NOSKIP) == 0) { 662 while (isspace(*fp->pub._p)) { 663 nread++; 664 if (--fp->pub._r > 0) 665 fp->pub._p++; 666 else if (__srefill(fp)) 667 goto input_failure; 668 } 669 /* 670 * Note that there is at least one character in 671 * the buffer, so conversions that do not set NOSKIP 672 * ca no longer result in an input failure. 673 */ 674 } 675 676 /* 677 * Do the conversion. 678 */ 679 switch (c) { 680 681 case CT_CHAR: 682 /* scan arbitrary characters (sets NOSKIP) */ 683 if (width == 0) 684 width = 1; 685 if (flags & LONG) { 686 nr = convert_wchar(fp, GETARG(wchar_t *), 687 width, locale); 688 } else { 689 nr = convert_char(fp, GETARG(char *), width); 690 } 691 if (nr < 0) 692 goto input_failure; 693 break; 694 695 case CT_CCL: 696 /* scan a (nonempty) character class (sets NOSKIP) */ 697 if (width == 0) 698 width = (size_t)~0; /* `infinity' */ 699 if (flags & LONG) { 700 nr = convert_wccl(fp, GETARG(wchar_t *), width, 701 ccltab, locale); 702 } else { 703 nr = convert_ccl(fp, GETARG(char *), width, 704 ccltab); 705 } 706 if (nr <= 0) { 707 if (nr < 0) 708 goto input_failure; 709 else /* nr == 0 */ 710 goto match_failure; 711 } 712 break; 713 714 case CT_STRING: 715 /* like CCL, but zero-length string OK, & no NOSKIP */ 716 if (width == 0) 717 width = (size_t)~0; 718 if (flags & LONG) { 719 nr = convert_wstring(fp, GETARG(wchar_t *), 720 width, locale); 721 } else { 722 nr = convert_string(fp, GETARG(char *), width); 723 } 724 if (nr < 0) 725 goto input_failure; 726 break; 727 728 case CT_INT: 729 /* scan an integer as if by the conversion function */ 730 #ifdef hardway 731 if (width == 0 || width > sizeof(buf) - 1) 732 width = sizeof(buf) - 1; 733 #else 734 /* size_t is unsigned, hence this optimisation */ 735 if (--width > sizeof(buf) - 2) 736 width = sizeof(buf) - 2; 737 width++; 738 #endif 739 nr = parseint(fp, buf, width, base, flags); 740 if (nr == 0) 741 goto match_failure; 742 if ((flags & SUPPRESS) == 0) { 743 uintmax_t res; 744 745 buf[nr] = '\0'; 746 if ((flags & UNSIGNED) == 0) 747 res = strtoimax_l(buf, NULL, base, locale); 748 else 749 res = strtoumax_l(buf, NULL, base, locale); 750 if (flags & POINTER) 751 *va_arg(ap, void **) = 752 (void *)(uintptr_t)res; 753 else if (flags & SHORTSHORT) 754 *va_arg(ap, char *) = res; 755 else if (flags & SHORT) 756 *va_arg(ap, short *) = res; 757 else if (flags & LONG) 758 *va_arg(ap, long *) = res; 759 else if (flags & LONGLONG) 760 *va_arg(ap, long long *) = res; 761 else if (flags & INTMAXT) 762 *va_arg(ap, intmax_t *) = res; 763 else if (flags & PTRDIFFT) 764 *va_arg(ap, ptrdiff_t *) = res; 765 else if (flags & SIZET) 766 *va_arg(ap, size_t *) = res; 767 else 768 *va_arg(ap, int *) = res; 769 } 770 break; 771 772 #ifndef NO_FLOATING_POINT 773 case CT_FLOAT: 774 /* scan a floating point number as if by strtod */ 775 if (width == 0 || width > sizeof(buf) - 1) 776 width = sizeof(buf) - 1; 777 nr = parsefloat(fp, buf, buf + width, locale); 778 if (nr == 0) 779 goto match_failure; 780 if ((flags & SUPPRESS) == 0) { 781 if (flags & LONGDBL) { 782 long double res = strtold_l(buf, NULL, 783 locale); 784 *va_arg(ap, long double *) = res; 785 } else if (flags & LONG) { 786 double res = strtod_l(buf, NULL, 787 locale); 788 *va_arg(ap, double *) = res; 789 } else { 790 float res = strtof_l(buf, NULL, locale); 791 *va_arg(ap, float *) = res; 792 } 793 } 794 break; 795 #endif /* !NO_FLOATING_POINT */ 796 } 797 if (!(flags & SUPPRESS)) 798 nassigned++; 799 nread += nr; 800 nconversions++; 801 } 802 input_failure: 803 return (nconversions != 0 ? nassigned : EOF); 804 match_failure: 805 return (nassigned); 806 } 807 808 /* 809 * Fill in the given table from the scanset at the given format 810 * (just after `['). Return a pointer to the character past the 811 * closing `]'. The table has a 1 wherever characters should be 812 * considered part of the scanset. 813 */ 814 static const u_char * 815 __sccl(char *tab, const u_char *fmt) 816 { 817 int c, n, v, i; 818 struct xlocale_collate *table = 819 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 820 821 /* first `clear' the whole table */ 822 c = *fmt++; /* first char hat => negated scanset */ 823 if (c == '^') { 824 v = 1; /* default => accept */ 825 c = *fmt++; /* get new first char */ 826 } else 827 v = 0; /* default => reject */ 828 829 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 830 (void) memset(tab, v, 256); 831 832 if (c == 0) 833 return (fmt - 1);/* format ended before closing ] */ 834 835 /* 836 * Now set the entries corresponding to the actual scanset 837 * to the opposite of the above. 838 * 839 * The first character may be ']' (or '-') without being special; 840 * the last character may be '-'. 841 */ 842 v = 1 - v; 843 for (;;) { 844 tab[c] = v; /* take character c */ 845 doswitch: 846 n = *fmt++; /* and examine the next */ 847 switch (n) { 848 849 case 0: /* format ended too soon */ 850 return (fmt - 1); 851 852 case '-': 853 /* 854 * A scanset of the form 855 * [01+-] 856 * is defined as `the digit 0, the digit 1, 857 * the character +, the character -', but 858 * the effect of a scanset such as 859 * [a-zA-Z0-9] 860 * is implementation defined. The V7 Unix 861 * scanf treats `a-z' as `the letters a through 862 * z', but treats `a-a' as `the letter a, the 863 * character -, and the letter a'. 864 * 865 * For compatibility, the `-' is not considerd 866 * to define a range if the character following 867 * it is either a close bracket (required by ANSI) 868 * or is not numerically greater than the character 869 * we just stored in the table (c). 870 */ 871 n = *fmt; 872 if (n == ']' 873 || (table->__collate_load_error ? n < c : 874 __collate_range_cmp (table, n, c) < 0 875 ) 876 ) { 877 c = '-'; 878 break; /* resume the for(;;) */ 879 } 880 fmt++; 881 /* fill in the range */ 882 if (table->__collate_load_error) { 883 do { 884 tab[++c] = v; 885 } while (c < n); 886 } else { 887 for (i = 0; i < 256; i ++) 888 if ( __collate_range_cmp (table, c, i) < 0 889 && __collate_range_cmp (table, i, n) <= 0 890 ) 891 tab[i] = v; 892 } 893 #if 1 /* XXX another disgusting compatibility hack */ 894 c = n; 895 /* 896 * Alas, the V7 Unix scanf also treats formats 897 * such as [a-c-e] as `the letters a through e'. 898 * This too is permitted by the standard.... 899 */ 900 goto doswitch; 901 #else 902 c = *fmt++; 903 if (c == 0) 904 return (fmt - 1); 905 if (c == ']') 906 return (fmt); 907 #endif 908 break; 909 910 case ']': /* end of scanset */ 911 return (fmt); 912 913 default: /* just another character */ 914 c = n; 915 break; 916 } 917 } 918 /* NOTREACHED */ 919 } 920 921 #ifndef NO_FLOATING_POINT 922 static int 923 parsefloat(FILE *fp, char *buf, char *end, locale_t locale) 924 { 925 char *commit, *p; 926 int infnanpos = 0, decptpos = 0; 927 enum { 928 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 929 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS 930 } state = S_START; 931 unsigned char c; 932 const char *decpt = localeconv_l(locale)->decimal_point; 933 _Bool gotmantdig = 0, ishex = 0; 934 935 /* 936 * We set commit = p whenever the string we have read so far 937 * constitutes a valid representation of a floating point 938 * number by itself. At some point, the parse will complete 939 * or fail, and we will ungetc() back to the last commit point. 940 * To ensure that the file offset gets updated properly, it is 941 * always necessary to read at least one character that doesn't 942 * match; thus, we can't short-circuit "infinity" or "nan(...)". 943 */ 944 commit = buf - 1; 945 for (p = buf; p < end; ) { 946 c = *fp->pub._p; 947 reswitch: 948 switch (state) { 949 case S_START: 950 state = S_GOTSIGN; 951 if (c == '-' || c == '+') 952 break; 953 else 954 goto reswitch; 955 case S_GOTSIGN: 956 switch (c) { 957 case '0': 958 state = S_MAYBEHEX; 959 commit = p; 960 break; 961 case 'I': 962 case 'i': 963 state = S_INF; 964 break; 965 case 'N': 966 case 'n': 967 state = S_NAN; 968 break; 969 default: 970 state = S_DIGITS; 971 goto reswitch; 972 } 973 break; 974 case S_INF: 975 if (infnanpos > 6 || 976 (c != "nfinity"[infnanpos] && 977 c != "NFINITY"[infnanpos])) 978 goto parsedone; 979 if (infnanpos == 1 || infnanpos == 6) 980 commit = p; /* inf or infinity */ 981 infnanpos++; 982 break; 983 case S_NAN: 984 switch (infnanpos) { 985 case 0: 986 if (c != 'A' && c != 'a') 987 goto parsedone; 988 break; 989 case 1: 990 if (c != 'N' && c != 'n') 991 goto parsedone; 992 else 993 commit = p; 994 break; 995 case 2: 996 if (c != '(') 997 goto parsedone; 998 break; 999 default: 1000 if (c == ')') { 1001 commit = p; 1002 state = S_DONE; 1003 } else if (!isalnum(c) && c != '_') 1004 goto parsedone; 1005 break; 1006 } 1007 infnanpos++; 1008 break; 1009 case S_DONE: 1010 goto parsedone; 1011 case S_MAYBEHEX: 1012 state = S_DIGITS; 1013 if (c == 'X' || c == 'x') { 1014 ishex = 1; 1015 break; 1016 } else { /* we saw a '0', but no 'x' */ 1017 gotmantdig = 1; 1018 goto reswitch; 1019 } 1020 case S_DIGITS: 1021 if ((ishex && isxdigit(c)) || isdigit(c)) { 1022 gotmantdig = 1; 1023 commit = p; 1024 break; 1025 } else { 1026 state = S_DECPT; 1027 goto reswitch; 1028 } 1029 case S_DECPT: 1030 if (c == decpt[decptpos]) { 1031 if (decpt[++decptpos] == '\0') { 1032 /* We read the complete decpt seq. */ 1033 state = S_FRAC; 1034 if (gotmantdig) 1035 commit = p; 1036 } 1037 break; 1038 } else if (!decptpos) { 1039 /* We didn't read any decpt characters. */ 1040 state = S_FRAC; 1041 goto reswitch; 1042 } else { 1043 /* 1044 * We read part of a multibyte decimal point, 1045 * but the rest is invalid, so bail. 1046 */ 1047 goto parsedone; 1048 } 1049 case S_FRAC: 1050 if (((c == 'E' || c == 'e') && !ishex) || 1051 ((c == 'P' || c == 'p') && ishex)) { 1052 if (!gotmantdig) 1053 goto parsedone; 1054 else 1055 state = S_EXP; 1056 } else if ((ishex && isxdigit(c)) || isdigit(c)) { 1057 commit = p; 1058 gotmantdig = 1; 1059 } else 1060 goto parsedone; 1061 break; 1062 case S_EXP: 1063 state = S_EXPDIGITS; 1064 if (c == '-' || c == '+') 1065 break; 1066 else 1067 goto reswitch; 1068 case S_EXPDIGITS: 1069 if (isdigit(c)) 1070 commit = p; 1071 else 1072 goto parsedone; 1073 break; 1074 default: 1075 abort(); 1076 } 1077 *p++ = c; 1078 if (--fp->pub._r > 0) 1079 fp->pub._p++; 1080 else if (__srefill(fp)) 1081 break; /* EOF */ 1082 } 1083 1084 parsedone: 1085 while (commit < --p) 1086 __ungetc(*(u_char *)p, fp); 1087 *++commit = '\0'; 1088 return (commit - buf); 1089 } 1090 #endif 1091