1 /* numeric.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * "That only makes eleven (plus one mislaid) and not fourteen, 13 * unless wizards count differently to other people." --Beorn 14 * 15 * [p.115 of _The Hobbit_: "Queer Lodgings"] 16 */ 17 18 /* 19 =head1 Numeric functions 20 21 =cut 22 23 This file contains all the stuff needed by perl for manipulating numeric 24 values, including such things as replacements for the OS's atof() function 25 26 */ 27 28 #include "EXTERN.h" 29 #define PERL_IN_NUMERIC_C 30 #include "perl.h" 31 32 #ifdef Perl_strtod 33 34 PERL_STATIC_INLINE NV 35 S_strtod(pTHX_ const char * const s, char ** e) 36 { 37 NV result; 38 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 39 40 STORE_LC_NUMERIC_SET_TO_NEEDED(); 41 42 # ifdef USE_QUADMATH 43 44 result = strtoflt128(s, e); 45 46 # elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \ 47 && defined(USE_LONG_DOUBLE) 48 # if defined(__MINGW64_VERSION_MAJOR) 49 /*********************************************** 50 We are unable to use strtold because of 51 https://sourceforge.net/p/mingw-w64/bugs/711/ 52 & 53 https://sourceforge.net/p/mingw-w64/bugs/725/ 54 55 but __mingw_strtold is fine. 56 ***********************************************/ 57 58 result = __mingw_strtold(s, e); 59 60 # else 61 62 result = strtold(s, e); 63 64 # endif 65 # elif defined(HAS_STRTOD) 66 67 result = strtod(s, e); 68 69 # endif 70 71 RESTORE_LC_NUMERIC(); 72 73 return result; 74 } 75 76 #endif /* #ifdef Perl_strtod */ 77 78 /* 79 80 =for apidoc my_strtod 81 82 This function is equivalent to the libc strtod() function, and is available 83 even on platforms that lack plain strtod(). Its return value is the best 84 available precision depending on platform capabilities and F<Configure> 85 options. 86 87 It properly handles the locale radix character, meaning it expects a dot except 88 when called from within the scope of S<C<use locale>>, in which case the radix 89 character should be that specified by the current locale. 90 91 The synonym Strod() may be used instead. 92 93 =cut 94 95 */ 96 97 NV 98 my_strtod(const char * const s, char **e) 99 { 100 dTHX; 101 102 PERL_ARGS_ASSERT_MY_STRTOD; 103 104 #ifdef Perl_strtod 105 106 return S_strtod(aTHX_ s, e); 107 108 #else 109 110 { 111 NV result; 112 char ** end_ptr = NULL; 113 114 *end_ptr = my_atof2(s, &result); 115 if (e) { 116 *e = *end_ptr; 117 } 118 119 if (! *end_ptr) { 120 result = 0.0; 121 } 122 123 return result; 124 } 125 126 #endif 127 128 } 129 130 131 U32 132 Perl_cast_ulong(NV f) 133 { 134 if (f < 0.0) 135 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f; 136 if (f < U32_MAX_P1) { 137 #if CASTFLAGS & 2 138 if (f < U32_MAX_P1_HALF) 139 return (U32) f; 140 f -= U32_MAX_P1_HALF; 141 return ((U32) f) | (1 + (U32_MAX >> 1)); 142 #else 143 return (U32) f; 144 #endif 145 } 146 return f > 0 ? U32_MAX : 0 /* NaN */; 147 } 148 149 I32 150 Perl_cast_i32(NV f) 151 { 152 if (f < I32_MAX_P1) 153 return f < I32_MIN ? I32_MIN : (I32) f; 154 if (f < U32_MAX_P1) { 155 #if CASTFLAGS & 2 156 if (f < U32_MAX_P1_HALF) 157 return (I32)(U32) f; 158 f -= U32_MAX_P1_HALF; 159 return (I32)(((U32) f) | (1 + (U32_MAX >> 1))); 160 #else 161 return (I32)(U32) f; 162 #endif 163 } 164 return f > 0 ? (I32)U32_MAX : 0 /* NaN */; 165 } 166 167 IV 168 Perl_cast_iv(NV f) 169 { 170 if (f < IV_MAX_P1) 171 return f < IV_MIN ? IV_MIN : (IV) f; 172 if (f < UV_MAX_P1) { 173 #if CASTFLAGS & 2 174 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */ 175 if (f < UV_MAX_P1_HALF) 176 return (IV)(UV) f; 177 f -= UV_MAX_P1_HALF; 178 return (IV)(((UV) f) | (1 + (UV_MAX >> 1))); 179 #else 180 return (IV)(UV) f; 181 #endif 182 } 183 return f > 0 ? (IV)UV_MAX : 0 /* NaN */; 184 } 185 186 UV 187 Perl_cast_uv(NV f) 188 { 189 if (f < 0.0) 190 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f; 191 if (f < UV_MAX_P1) { 192 #if CASTFLAGS & 2 193 if (f < UV_MAX_P1_HALF) 194 return (UV) f; 195 f -= UV_MAX_P1_HALF; 196 return ((UV) f) | (1 + (UV_MAX >> 1)); 197 #else 198 return (UV) f; 199 #endif 200 } 201 return f > 0 ? UV_MAX : 0 /* NaN */; 202 } 203 204 /* 205 =for apidoc grok_bin 206 207 converts a string representing a binary number to numeric form. 208 209 On entry C<start> and C<*len> give the string to scan, C<*flags> gives 210 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. 211 The scan stops at the end of the string, or the first invalid character. 212 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an 213 invalid character will also trigger a warning. 214 On return C<*len> is set to the length of the scanned string, 215 and C<*flags> gives output flags. 216 217 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 218 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin> 219 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 220 and writes the value to C<*result> (or the value is discarded if C<result> 221 is NULL). 222 223 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless 224 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If 225 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary 226 number may use C<"_"> characters to separate digits. 227 228 =cut 229 230 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 231 which suppresses any message for non-portable numbers that are still valid 232 on this platform. 233 */ 234 235 UV 236 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 237 { 238 const char *s = start; 239 STRLEN len = *len_p; 240 UV value = 0; 241 NV value_nv = 0; 242 243 const UV max_div_2 = UV_MAX / 2; 244 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 245 bool overflowed = FALSE; 246 char bit; 247 248 PERL_ARGS_ASSERT_GROK_BIN; 249 250 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 251 /* strip off leading b or 0b. 252 for compatibility silently suffer "b" and "0b" as valid binary 253 numbers. */ 254 if (len >= 1) { 255 if (isALPHA_FOLD_EQ(s[0], 'b')) { 256 s++; 257 len--; 258 } 259 else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) { 260 s+=2; 261 len-=2; 262 } 263 } 264 } 265 266 for (; len-- && (bit = *s); s++) { 267 if (bit == '0' || bit == '1') { 268 /* Write it in this wonky order with a goto to attempt to get the 269 compiler to make the common case integer-only loop pretty tight. 270 With gcc seems to be much straighter code than old scan_bin. */ 271 redo: 272 if (!overflowed) { 273 if (value <= max_div_2) { 274 value = (value << 1) | (bit - '0'); 275 continue; 276 } 277 /* Bah. We're just overflowed. */ 278 /* diag_listed_as: Integer overflow in %s number */ 279 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 280 "Integer overflow in binary number"); 281 overflowed = TRUE; 282 value_nv = (NV) value; 283 } 284 value_nv *= 2.0; 285 /* If an NV has not enough bits in its mantissa to 286 * represent a UV this summing of small low-order numbers 287 * is a waste of time (because the NV cannot preserve 288 * the low-order bits anyway): we could just remember when 289 * did we overflow and in the end just multiply value_nv by the 290 * right amount. */ 291 value_nv += (NV)(bit - '0'); 292 continue; 293 } 294 if (bit == '_' && len && allow_underscores && (bit = s[1]) 295 && (bit == '0' || bit == '1')) 296 { 297 --len; 298 ++s; 299 goto redo; 300 } 301 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 302 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 303 "Illegal binary digit '%c' ignored", *s); 304 break; 305 } 306 307 if ( ( overflowed && value_nv > 4294967295.0) 308 #if UVSIZE > 4 309 || (!overflowed && value > 0xffffffff 310 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 311 #endif 312 ) { 313 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 314 "Binary number > 0b11111111111111111111111111111111 non-portable"); 315 } 316 *len_p = s - start; 317 if (!overflowed) { 318 *flags = 0; 319 return value; 320 } 321 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 322 if (result) 323 *result = value_nv; 324 return UV_MAX; 325 } 326 327 /* 328 =for apidoc grok_hex 329 330 converts a string representing a hex number to numeric form. 331 332 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives 333 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. 334 The scan stops at the end of the string, or the first invalid character. 335 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an 336 invalid character will also trigger a warning. 337 On return C<*len> is set to the length of the scanned string, 338 and C<*flags> gives output flags. 339 340 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 341 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex> 342 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 343 and writes the value to C<*result> (or the value is discarded if C<result> 344 is C<NULL>). 345 346 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless 347 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If 348 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex 349 number may use C<"_"> characters to separate digits. 350 351 =cut 352 353 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 354 which suppresses any message for non-portable numbers, but which are valid 355 on this platform. 356 */ 357 358 UV 359 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 360 { 361 const char *s = start; 362 STRLEN len = *len_p; 363 UV value = 0; 364 NV value_nv = 0; 365 const UV max_div_16 = UV_MAX / 16; 366 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 367 bool overflowed = FALSE; 368 369 PERL_ARGS_ASSERT_GROK_HEX; 370 371 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 372 /* strip off leading x or 0x. 373 for compatibility silently suffer "x" and "0x" as valid hex numbers. 374 */ 375 if (len >= 1) { 376 if (isALPHA_FOLD_EQ(s[0], 'x')) { 377 s++; 378 len--; 379 } 380 else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) { 381 s+=2; 382 len-=2; 383 } 384 } 385 } 386 387 for (; len-- && *s; s++) { 388 if (isXDIGIT(*s)) { 389 /* Write it in this wonky order with a goto to attempt to get the 390 compiler to make the common case integer-only loop pretty tight. 391 With gcc seems to be much straighter code than old scan_hex. */ 392 redo: 393 if (!overflowed) { 394 if (value <= max_div_16) { 395 value = (value << 4) | XDIGIT_VALUE(*s); 396 continue; 397 } 398 /* Bah. We're just overflowed. */ 399 /* diag_listed_as: Integer overflow in %s number */ 400 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 401 "Integer overflow in hexadecimal number"); 402 overflowed = TRUE; 403 value_nv = (NV) value; 404 } 405 value_nv *= 16.0; 406 /* If an NV has not enough bits in its mantissa to 407 * represent a UV this summing of small low-order numbers 408 * is a waste of time (because the NV cannot preserve 409 * the low-order bits anyway): we could just remember when 410 * did we overflow and in the end just multiply value_nv by the 411 * right amount of 16-tuples. */ 412 value_nv += (NV) XDIGIT_VALUE(*s); 413 continue; 414 } 415 if (*s == '_' && len && allow_underscores && s[1] 416 && isXDIGIT(s[1])) 417 { 418 --len; 419 ++s; 420 goto redo; 421 } 422 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 423 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 424 "Illegal hexadecimal digit '%c' ignored", *s); 425 break; 426 } 427 428 if ( ( overflowed && value_nv > 4294967295.0) 429 #if UVSIZE > 4 430 || (!overflowed && value > 0xffffffff 431 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 432 #endif 433 ) { 434 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 435 "Hexadecimal number > 0xffffffff non-portable"); 436 } 437 *len_p = s - start; 438 if (!overflowed) { 439 *flags = 0; 440 return value; 441 } 442 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 443 if (result) 444 *result = value_nv; 445 return UV_MAX; 446 } 447 448 /* 449 =for apidoc grok_oct 450 451 converts a string representing an octal number to numeric form. 452 453 On entry C<start> and C<*len> give the string to scan, C<*flags> gives 454 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. 455 The scan stops at the end of the string, or the first invalid character. 456 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an 457 8 or 9 will also trigger a warning. 458 On return C<*len> is set to the length of the scanned string, 459 and C<*flags> gives output flags. 460 461 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 462 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct> 463 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 464 and writes the value to C<*result> (or the value is discarded if C<result> 465 is C<NULL>). 466 467 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal 468 number may use C<"_"> characters to separate digits. 469 470 =cut 471 472 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE> 473 which suppresses any message for non-portable numbers, but which are valid 474 on this platform. 475 */ 476 477 UV 478 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 479 { 480 const char *s = start; 481 STRLEN len = *len_p; 482 UV value = 0; 483 NV value_nv = 0; 484 const UV max_div_8 = UV_MAX / 8; 485 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 486 bool overflowed = FALSE; 487 488 PERL_ARGS_ASSERT_GROK_OCT; 489 490 for (; len-- && *s; s++) { 491 if (isOCTAL(*s)) { 492 /* Write it in this wonky order with a goto to attempt to get the 493 compiler to make the common case integer-only loop pretty tight. 494 */ 495 redo: 496 if (!overflowed) { 497 if (value <= max_div_8) { 498 value = (value << 3) | OCTAL_VALUE(*s); 499 continue; 500 } 501 /* Bah. We're just overflowed. */ 502 /* diag_listed_as: Integer overflow in %s number */ 503 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 504 "Integer overflow in octal number"); 505 overflowed = TRUE; 506 value_nv = (NV) value; 507 } 508 value_nv *= 8.0; 509 /* If an NV has not enough bits in its mantissa to 510 * represent a UV this summing of small low-order numbers 511 * is a waste of time (because the NV cannot preserve 512 * the low-order bits anyway): we could just remember when 513 * did we overflow and in the end just multiply value_nv by the 514 * right amount of 8-tuples. */ 515 value_nv += (NV) OCTAL_VALUE(*s); 516 continue; 517 } 518 if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) { 519 --len; 520 ++s; 521 goto redo; 522 } 523 /* Allow \octal to work the DWIM way (that is, stop scanning 524 * as soon as non-octal characters are seen, complain only if 525 * someone seems to want to use the digits eight and nine. Since we 526 * know it is not octal, then if isDIGIT, must be an 8 or 9). */ 527 if (isDIGIT(*s)) { 528 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 529 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 530 "Illegal octal digit '%c' ignored", *s); 531 } 532 break; 533 } 534 535 if ( ( overflowed && value_nv > 4294967295.0) 536 #if UVSIZE > 4 537 || (!overflowed && value > 0xffffffff 538 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 539 #endif 540 ) { 541 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 542 "Octal number > 037777777777 non-portable"); 543 } 544 *len_p = s - start; 545 if (!overflowed) { 546 *flags = 0; 547 return value; 548 } 549 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 550 if (result) 551 *result = value_nv; 552 return UV_MAX; 553 } 554 555 /* 556 =for apidoc scan_bin 557 558 For backwards compatibility. Use C<grok_bin> instead. 559 560 =for apidoc scan_hex 561 562 For backwards compatibility. Use C<grok_hex> instead. 563 564 =for apidoc scan_oct 565 566 For backwards compatibility. Use C<grok_oct> instead. 567 568 =cut 569 */ 570 571 NV 572 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 573 { 574 NV rnv; 575 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 576 const UV ruv = grok_bin (start, &len, &flags, &rnv); 577 578 PERL_ARGS_ASSERT_SCAN_BIN; 579 580 *retlen = len; 581 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 582 } 583 584 NV 585 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 586 { 587 NV rnv; 588 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 589 const UV ruv = grok_oct (start, &len, &flags, &rnv); 590 591 PERL_ARGS_ASSERT_SCAN_OCT; 592 593 *retlen = len; 594 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 595 } 596 597 NV 598 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 599 { 600 NV rnv; 601 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 602 const UV ruv = grok_hex (start, &len, &flags, &rnv); 603 604 PERL_ARGS_ASSERT_SCAN_HEX; 605 606 *retlen = len; 607 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 608 } 609 610 /* 611 =for apidoc grok_numeric_radix 612 613 Scan and skip for a numeric decimal separator (radix). 614 615 =cut 616 */ 617 bool 618 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) 619 { 620 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX; 621 622 #ifdef USE_LOCALE_NUMERIC 623 624 if (IN_LC(LC_NUMERIC)) { 625 STRLEN len; 626 char * radix; 627 bool matches_radix = FALSE; 628 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 629 630 STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); 631 632 radix = SvPV(PL_numeric_radix_sv, len); 633 radix = savepvn(radix, len); 634 635 RESTORE_LC_NUMERIC(); 636 637 if (*sp + len <= send) { 638 matches_radix = memEQ(*sp, radix, len); 639 } 640 641 Safefree(radix); 642 643 if (matches_radix) { 644 *sp += len; 645 return TRUE; 646 } 647 } 648 649 #endif 650 651 /* always try "." if numeric radix didn't match because 652 * we may have data from different locales mixed */ 653 if (*sp < send && **sp == '.') { 654 ++*sp; 655 return TRUE; 656 } 657 658 return FALSE; 659 } 660 661 /* 662 =for apidoc grok_infnan 663 664 Helper for C<grok_number()>, accepts various ways of spelling "infinity" 665 or "not a number", and returns one of the following flag combinations: 666 667 IS_NUMBER_INFINITY 668 IS_NUMBER_NAN 669 IS_NUMBER_INFINITY | IS_NUMBER_NEG 670 IS_NUMBER_NAN | IS_NUMBER_NEG 671 0 672 673 possibly |-ed with C<IS_NUMBER_TRAILING>. 674 675 If an infinity or a not-a-number is recognized, C<*sp> will point to 676 one byte past the end of the recognized string. If the recognition fails, 677 zero is returned, and C<*sp> will not move. 678 679 =cut 680 */ 681 682 int 683 Perl_grok_infnan(pTHX_ const char** sp, const char* send) 684 { 685 const char* s = *sp; 686 int flags = 0; 687 #if defined(NV_INF) || defined(NV_NAN) 688 bool odh = FALSE; /* one-dot-hash: 1.#INF */ 689 690 PERL_ARGS_ASSERT_GROK_INFNAN; 691 692 if (*s == '+') { 693 s++; if (s == send) return 0; 694 } 695 else if (*s == '-') { 696 flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */ 697 s++; if (s == send) return 0; 698 } 699 700 if (*s == '1') { 701 /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN) 702 * Let's keep the dot optional. */ 703 s++; if (s == send) return 0; 704 if (*s == '.') { 705 s++; if (s == send) return 0; 706 } 707 if (*s == '#') { 708 s++; if (s == send) return 0; 709 } else 710 return 0; 711 odh = TRUE; 712 } 713 714 if (isALPHA_FOLD_EQ(*s, 'I')) { 715 /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */ 716 717 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; 718 s++; if (s == send) return 0; 719 if (isALPHA_FOLD_EQ(*s, 'F')) { 720 s++; 721 if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) { 722 int fail = 723 flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING; 724 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail; 725 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail; 726 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail; 727 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail; 728 s++; 729 } else if (odh) { 730 while (*s == '0') { /* 1.#INF00 */ 731 s++; 732 } 733 } 734 while (s < send && isSPACE(*s)) 735 s++; 736 if (s < send && *s) { 737 flags |= IS_NUMBER_TRAILING; 738 } 739 flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; 740 } 741 else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */ 742 s++; 743 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 744 while (*s == '0') { /* 1.#IND00 */ 745 s++; 746 } 747 if (*s) { 748 flags |= IS_NUMBER_TRAILING; 749 } 750 } else 751 return 0; 752 } 753 else { 754 /* Maybe NAN of some sort */ 755 756 if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) { 757 /* snan, qNaN */ 758 /* XXX do something with the snan/qnan difference */ 759 s++; if (s == send) return 0; 760 } 761 762 if (isALPHA_FOLD_EQ(*s, 'N')) { 763 s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0; 764 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; 765 s++; 766 767 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 768 if (s == send) { 769 return flags; 770 } 771 772 /* NaN can be followed by various stuff (NaNQ, NaNS), but 773 * there are also multiple different NaN values, and some 774 * implementations output the "payload" values, 775 * e.g. NaN123, NAN(abc), while some legacy implementations 776 * have weird stuff like NaN%. */ 777 if (isALPHA_FOLD_EQ(*s, 'q') || 778 isALPHA_FOLD_EQ(*s, 's')) { 779 /* "nanq" or "nans" are ok, though generating 780 * these portably is tricky. */ 781 s++; 782 if (s == send) { 783 return flags; 784 } 785 } 786 if (*s == '(') { 787 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */ 788 const char *t; 789 s++; 790 if (s == send) { 791 return flags | IS_NUMBER_TRAILING; 792 } 793 t = s + 1; 794 while (t < send && *t && *t != ')') { 795 t++; 796 } 797 if (t == send) { 798 return flags | IS_NUMBER_TRAILING; 799 } 800 if (*t == ')') { 801 int nantype; 802 UV nanval; 803 if (s[0] == '0' && s + 2 < t && 804 isALPHA_FOLD_EQ(s[1], 'x') && 805 isXDIGIT(s[2])) { 806 STRLEN len = t - s; 807 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 808 nanval = grok_hex(s, &len, &flags, NULL); 809 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) { 810 nantype = 0; 811 } else { 812 nantype = IS_NUMBER_IN_UV; 813 } 814 s += len; 815 } else if (s[0] == '0' && s + 2 < t && 816 isALPHA_FOLD_EQ(s[1], 'b') && 817 (s[2] == '0' || s[2] == '1')) { 818 STRLEN len = t - s; 819 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 820 nanval = grok_bin(s, &len, &flags, NULL); 821 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) { 822 nantype = 0; 823 } else { 824 nantype = IS_NUMBER_IN_UV; 825 } 826 s += len; 827 } else { 828 const char *u; 829 nantype = 830 grok_number_flags(s, t - s, &nanval, 831 PERL_SCAN_TRAILING | 832 PERL_SCAN_ALLOW_UNDERSCORES); 833 /* Unfortunately grok_number_flags() doesn't 834 * tell how far we got and the ')' will always 835 * be "trailing", so we need to double-check 836 * whether we had something dubious. */ 837 for (u = s; u < t; u++) { 838 if (!isDIGIT(*u)) { 839 flags |= IS_NUMBER_TRAILING; 840 break; 841 } 842 } 843 s = u; 844 } 845 846 /* XXX Doesn't do octal: nan("0123"). 847 * Probably not a big loss. */ 848 849 if ((nantype & IS_NUMBER_NOT_INT) || 850 !(nantype && IS_NUMBER_IN_UV)) { 851 /* XXX the nanval is currently unused, that is, 852 * not inserted as the NaN payload of the NV. 853 * But the above code already parses the C99 854 * nan(...) format. See below, and see also 855 * the nan() in POSIX.xs. 856 * 857 * Certain configuration combinations where 858 * NVSIZE is greater than UVSIZE mean that 859 * a single UV cannot contain all the possible 860 * NaN payload bits. There would need to be 861 * some more generic syntax than "nan($uv)". 862 * 863 * Issues to keep in mind: 864 * 865 * (1) In most common cases there would 866 * not be an integral number of bytes that 867 * could be set, only a certain number of bits. 868 * For example for the common case of 869 * NVSIZE == UVSIZE == 8 there is room for 52 870 * bits in the payload, but the most significant 871 * bit is commonly reserved for the 872 * signaling/quiet bit, leaving 51 bits. 873 * Furthermore, the C99 nan() is supposed 874 * to generate quiet NaNs, so it is doubtful 875 * whether it should be able to generate 876 * signaling NaNs. For the x86 80-bit doubles 877 * (if building a long double Perl) there would 878 * be 62 bits (s/q bit being the 63rd). 879 * 880 * (2) Endianness of the payload bits. If the 881 * payload is specified as an UV, the low-order 882 * bits of the UV are naturally little-endianed 883 * (rightmost) bits of the payload. The endianness 884 * of UVs and NVs can be different. */ 885 return 0; 886 } 887 if (s < t) { 888 flags |= IS_NUMBER_TRAILING; 889 } 890 } else { 891 /* Looked like nan(...), but no close paren. */ 892 flags |= IS_NUMBER_TRAILING; 893 } 894 } else { 895 while (s < send && isSPACE(*s)) 896 s++; 897 if (s < send && *s) { 898 /* Note that we here implicitly accept (parse as 899 * "nan", but with warnings) also any other weird 900 * trailing stuff for "nan". In the above we just 901 * check that if we got the C99-style "nan(...)", 902 * the "..." looks sane. 903 * If in future we accept more ways of specifying 904 * the nan payload, the accepting would happen around 905 * here. */ 906 flags |= IS_NUMBER_TRAILING; 907 } 908 } 909 s = send; 910 } 911 else 912 return 0; 913 } 914 915 while (s < send && isSPACE(*s)) 916 s++; 917 918 #else 919 PERL_UNUSED_ARG(send); 920 #endif /* #if defined(NV_INF) || defined(NV_NAN) */ 921 *sp = s; 922 return flags; 923 } 924 925 /* 926 =for apidoc grok_number_flags 927 928 Recognise (or not) a number. The type of the number is returned 929 (0 if unrecognised), otherwise it is a bit-ORed combination of 930 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>, 931 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h). 932 933 If the value of the number can fit in a UV, it is returned in C<*valuep>. 934 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV> 935 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned 936 to during processing even though C<IS_NUMBER_IN_UV> is not set on return. 937 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when 938 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur. 939 940 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were 941 seen (in which case C<*valuep> gives the true value truncated to an integer), and 942 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the 943 absolute value). C<IS_NUMBER_IN_UV> is not set if e notation was used or the 944 number is larger than a UV. 945 946 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing 947 non-numeric text on an otherwise successful I<grok>, setting 948 C<IS_NUMBER_TRAILING> on the result. 949 950 =for apidoc grok_number 951 952 Identical to C<grok_number_flags()> with C<flags> set to zero. 953 954 =cut 955 */ 956 int 957 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) 958 { 959 PERL_ARGS_ASSERT_GROK_NUMBER; 960 961 return grok_number_flags(pv, len, valuep, 0); 962 } 963 964 static const UV uv_max_div_10 = UV_MAX / 10; 965 static const U8 uv_max_mod_10 = UV_MAX % 10; 966 967 int 968 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) 969 { 970 const char *s = pv; 971 const char * const send = pv + len; 972 const char *d; 973 int numtype = 0; 974 975 PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS; 976 977 while (s < send && isSPACE(*s)) 978 s++; 979 if (s == send) { 980 return 0; 981 } else if (*s == '-') { 982 s++; 983 numtype = IS_NUMBER_NEG; 984 } 985 else if (*s == '+') 986 s++; 987 988 if (s == send) 989 return 0; 990 991 /* The first digit (after optional sign): note that might 992 * also point to "infinity" or "nan", or "1.#INF". */ 993 d = s; 994 995 /* next must be digit or the radix separator or beginning of infinity/nan */ 996 if (isDIGIT(*s)) { 997 /* UVs are at least 32 bits, so the first 9 decimal digits cannot 998 overflow. */ 999 UV value = *s - '0'; 1000 /* This construction seems to be more optimiser friendly. 1001 (without it gcc does the isDIGIT test and the *s - '0' separately) 1002 With it gcc on arm is managing 6 instructions (6 cycles) per digit. 1003 In theory the optimiser could deduce how far to unroll the loop 1004 before checking for overflow. */ 1005 if (++s < send) { 1006 int digit = *s - '0'; 1007 if (inRANGE(digit, 0, 9)) { 1008 value = value * 10 + digit; 1009 if (++s < send) { 1010 digit = *s - '0'; 1011 if (inRANGE(digit, 0, 9)) { 1012 value = value * 10 + digit; 1013 if (++s < send) { 1014 digit = *s - '0'; 1015 if (inRANGE(digit, 0, 9)) { 1016 value = value * 10 + digit; 1017 if (++s < send) { 1018 digit = *s - '0'; 1019 if (inRANGE(digit, 0, 9)) { 1020 value = value * 10 + digit; 1021 if (++s < send) { 1022 digit = *s - '0'; 1023 if (inRANGE(digit, 0, 9)) { 1024 value = value * 10 + digit; 1025 if (++s < send) { 1026 digit = *s - '0'; 1027 if (inRANGE(digit, 0, 9)) { 1028 value = value * 10 + digit; 1029 if (++s < send) { 1030 digit = *s - '0'; 1031 if (inRANGE(digit, 0, 9)) { 1032 value = value * 10 + digit; 1033 if (++s < send) { 1034 digit = *s - '0'; 1035 if (inRANGE(digit, 0, 9)) { 1036 value = value * 10 + digit; 1037 if (++s < send) { 1038 /* Now got 9 digits, so need to check 1039 each time for overflow. */ 1040 digit = *s - '0'; 1041 while ( inRANGE(digit, 0, 9) 1042 && (value < uv_max_div_10 1043 || (value == uv_max_div_10 1044 && digit <= uv_max_mod_10))) { 1045 value = value * 10 + digit; 1046 if (++s < send) 1047 digit = *s - '0'; 1048 else 1049 break; 1050 } 1051 if (inRANGE(digit, 0, 9) 1052 && (s < send)) { 1053 /* value overflowed. 1054 skip the remaining digits, don't 1055 worry about setting *valuep. */ 1056 do { 1057 s++; 1058 } while (s < send && isDIGIT(*s)); 1059 numtype |= 1060 IS_NUMBER_GREATER_THAN_UV_MAX; 1061 goto skip_value; 1062 } 1063 } 1064 } 1065 } 1066 } 1067 } 1068 } 1069 } 1070 } 1071 } 1072 } 1073 } 1074 } 1075 } 1076 } 1077 } 1078 } 1079 } 1080 numtype |= IS_NUMBER_IN_UV; 1081 if (valuep) 1082 *valuep = value; 1083 1084 skip_value: 1085 if (GROK_NUMERIC_RADIX(&s, send)) { 1086 numtype |= IS_NUMBER_NOT_INT; 1087 while (s < send && isDIGIT(*s)) /* optional digits after the radix */ 1088 s++; 1089 } 1090 } 1091 else if (GROK_NUMERIC_RADIX(&s, send)) { 1092 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ 1093 /* no digits before the radix means we need digits after it */ 1094 if (s < send && isDIGIT(*s)) { 1095 do { 1096 s++; 1097 } while (s < send && isDIGIT(*s)); 1098 if (valuep) { 1099 /* integer approximation is valid - it's 0. */ 1100 *valuep = 0; 1101 } 1102 } 1103 else 1104 return 0; 1105 } 1106 1107 if (s > d && s < send) { 1108 /* we can have an optional exponent part */ 1109 if (isALPHA_FOLD_EQ(*s, 'e')) { 1110 s++; 1111 if (s < send && (*s == '-' || *s == '+')) 1112 s++; 1113 if (s < send && isDIGIT(*s)) { 1114 do { 1115 s++; 1116 } while (s < send && isDIGIT(*s)); 1117 } 1118 else if (flags & PERL_SCAN_TRAILING) 1119 return numtype | IS_NUMBER_TRAILING; 1120 else 1121 return 0; 1122 1123 /* The only flag we keep is sign. Blow away any "it's UV" */ 1124 numtype &= IS_NUMBER_NEG; 1125 numtype |= IS_NUMBER_NOT_INT; 1126 } 1127 } 1128 while (s < send && isSPACE(*s)) 1129 s++; 1130 if (s >= send) 1131 return numtype; 1132 if (memEQs(pv, len, "0 but true")) { 1133 if (valuep) 1134 *valuep = 0; 1135 return IS_NUMBER_IN_UV; 1136 } 1137 /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */ 1138 if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) { 1139 /* Really detect inf/nan. Start at d, not s, since the above 1140 * code might have already consumed the "1." or "1". */ 1141 const int infnan = Perl_grok_infnan(aTHX_ &d, send); 1142 if ((infnan & IS_NUMBER_INFINITY)) { 1143 return (numtype | infnan); /* Keep sign for infinity. */ 1144 } 1145 else if ((infnan & IS_NUMBER_NAN)) { 1146 return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */ 1147 } 1148 } 1149 else if (flags & PERL_SCAN_TRAILING) { 1150 return numtype | IS_NUMBER_TRAILING; 1151 } 1152 1153 return 0; 1154 } 1155 1156 /* 1157 =for apidoc grok_atoUV 1158 1159 parse a string, looking for a decimal unsigned integer. 1160 1161 On entry, C<pv> points to the beginning of the string; 1162 C<valptr> points to a UV that will receive the converted value, if found; 1163 C<endptr> is either NULL or points to a variable that points to one byte 1164 beyond the point in C<pv> that this routine should examine. 1165 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated. 1166 1167 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with 1168 no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that 1169 value. 1170 1171 If you constrain the portion of C<pv> that is looked at by this function (by 1172 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a 1173 valid value, it will return TRUE, setting C<*endptr> to the byte following the 1174 final digit of the value. But if there is no constraint at what's looked at, 1175 all of C<pv> must be valid in order for TRUE to be returned. 1176 1177 The only characters this accepts are the decimal digits '0'..'9'. 1178 1179 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional 1180 leading whitespace, nor negative inputs. If such features are required, the 1181 calling code needs to explicitly implement those. 1182 1183 Note that this function returns FALSE for inputs that would overflow a UV, 1184 or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor 1185 C<01>, C<002>, I<etc>. 1186 1187 Background: C<atoi> has severe problems with illegal inputs, it cannot be 1188 used for incremental parsing, and therefore should be avoided 1189 C<atoi> and C<strtol> are also affected by locale settings, which can also be 1190 seen as a bug (global state controlled by user environment). 1191 1192 =cut 1193 1194 */ 1195 1196 bool 1197 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr) 1198 { 1199 const char* s = pv; 1200 const char** eptr; 1201 const char* end2; /* Used in case endptr is NULL. */ 1202 UV val = 0; /* The parsed value. */ 1203 1204 PERL_ARGS_ASSERT_GROK_ATOUV; 1205 1206 if (endptr) { 1207 eptr = endptr; 1208 } 1209 else { 1210 end2 = s + strlen(s); 1211 eptr = &end2; 1212 } 1213 1214 if ( *eptr <= s 1215 || ! isDIGIT(*s)) 1216 { 1217 return FALSE; 1218 } 1219 1220 /* Single-digit inputs are quite common. */ 1221 val = *s++ - '0'; 1222 if (s < *eptr && isDIGIT(*s)) { 1223 /* Fail on extra leading zeros. */ 1224 if (val == 0) 1225 return FALSE; 1226 while (s < *eptr && isDIGIT(*s)) { 1227 /* This could be unrolled like in grok_number(), but 1228 * the expected uses of this are not speed-needy, and 1229 * unlikely to need full 64-bitness. */ 1230 const U8 digit = *s++ - '0'; 1231 if (val < uv_max_div_10 || 1232 (val == uv_max_div_10 && digit <= uv_max_mod_10)) { 1233 val = val * 10 + digit; 1234 } else { 1235 return FALSE; 1236 } 1237 } 1238 } 1239 1240 if (endptr == NULL) { 1241 if (*s) { 1242 return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */ 1243 } 1244 } 1245 else { 1246 *endptr = s; 1247 } 1248 1249 *valptr = val; 1250 return TRUE; 1251 } 1252 1253 #ifndef Perl_strtod 1254 STATIC NV 1255 S_mulexp10(NV value, I32 exponent) 1256 { 1257 NV result = 1.0; 1258 NV power = 10.0; 1259 bool negative = 0; 1260 I32 bit; 1261 1262 if (exponent == 0) 1263 return value; 1264 if (value == 0) 1265 return (NV)0; 1266 1267 /* On OpenVMS VAX we by default use the D_FLOAT double format, 1268 * and that format does not have *easy* capabilities [1] for 1269 * overflowing doubles 'silently' as IEEE fp does. We also need 1270 * to support G_FLOAT on both VAX and Alpha, and though the exponent 1271 * range is much larger than D_FLOAT it still doesn't do silent 1272 * overflow. Therefore we need to detect early whether we would 1273 * overflow (this is the behaviour of the native string-to-float 1274 * conversion routines, and therefore of native applications, too). 1275 * 1276 * [1] Trying to establish a condition handler to trap floating point 1277 * exceptions is not a good idea. */ 1278 1279 /* In UNICOS and in certain Cray models (such as T90) there is no 1280 * IEEE fp, and no way at all from C to catch fp overflows gracefully. 1281 * There is something you can do if you are willing to use some 1282 * inline assembler: the instruction is called DFI-- but that will 1283 * disable *all* floating point interrupts, a little bit too large 1284 * a hammer. Therefore we need to catch potential overflows before 1285 * it's too late. */ 1286 1287 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP) 1288 STMT_START { 1289 const NV exp_v = log10(value); 1290 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP) 1291 return NV_MAX; 1292 if (exponent < 0) { 1293 if (-(exponent + exp_v) >= NV_MAX_10_EXP) 1294 return 0.0; 1295 while (-exponent >= NV_MAX_10_EXP) { 1296 /* combination does not overflow, but 10^(-exponent) does */ 1297 value /= 10; 1298 ++exponent; 1299 } 1300 } 1301 } STMT_END; 1302 #endif 1303 1304 if (exponent < 0) { 1305 negative = 1; 1306 exponent = -exponent; 1307 #ifdef NV_MAX_10_EXP 1308 /* for something like 1234 x 10^-309, the action of calculating 1309 * the intermediate value 10^309 then returning 1234 / (10^309) 1310 * will fail, since 10^309 becomes infinity. In this case try to 1311 * refactor it as 123 / (10^308) etc. 1312 */ 1313 while (value && exponent > NV_MAX_10_EXP) { 1314 exponent--; 1315 value /= 10; 1316 } 1317 if (value == 0.0) 1318 return value; 1319 #endif 1320 } 1321 #if defined(__osf__) 1322 /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV) 1323 * Tru64 fp behavior on inf/nan is somewhat broken. Another way 1324 * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF) 1325 * but that breaks another set of infnan.t tests. */ 1326 # define FP_OVERFLOWS_TO_ZERO 1327 #endif 1328 for (bit = 1; exponent; bit <<= 1) { 1329 if (exponent & bit) { 1330 exponent ^= bit; 1331 result *= power; 1332 #ifdef FP_OVERFLOWS_TO_ZERO 1333 if (result == 0) 1334 # ifdef NV_INF 1335 return value < 0 ? -NV_INF : NV_INF; 1336 # else 1337 return value < 0 ? -FLT_MAX : FLT_MAX; 1338 # endif 1339 #endif 1340 /* Floating point exceptions are supposed to be turned off, 1341 * but if we're obviously done, don't risk another iteration. 1342 */ 1343 if (exponent == 0) break; 1344 } 1345 power *= power; 1346 } 1347 return negative ? value / result : value * result; 1348 } 1349 #endif /* #ifndef Perl_strtod */ 1350 1351 #ifdef Perl_strtod 1352 # define ATOF(s, x) my_atof2(s, &x) 1353 #else 1354 # define ATOF(s, x) Perl_atof2(s, x) 1355 #endif 1356 1357 NV 1358 Perl_my_atof(pTHX_ const char* s) 1359 { 1360 /* 's' must be NUL terminated */ 1361 1362 NV x = 0.0; 1363 1364 PERL_ARGS_ASSERT_MY_ATOF; 1365 1366 #if ! defined(USE_LOCALE_NUMERIC) 1367 1368 ATOF(s, x); 1369 1370 #else 1371 1372 { 1373 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 1374 STORE_LC_NUMERIC_SET_TO_NEEDED(); 1375 if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) { 1376 ATOF(s,x); 1377 } 1378 else { 1379 1380 /* Look through the string for the first thing that looks like a 1381 * decimal point: either the value in the current locale or the 1382 * standard fallback of '.'. The one which appears earliest in the 1383 * input string is the one that we should have atof look for. Note 1384 * that we have to determine this beforehand because on some 1385 * systems, Perl_atof2 is just a wrapper around the system's atof. 1386 * */ 1387 const char * const standard_pos = strchr(s, '.'); 1388 const char * const local_pos 1389 = strstr(s, SvPV_nolen(PL_numeric_radix_sv)); 1390 const bool use_standard_radix 1391 = standard_pos && (!local_pos || standard_pos < local_pos); 1392 1393 if (use_standard_radix) { 1394 SET_NUMERIC_STANDARD(); 1395 LOCK_LC_NUMERIC_STANDARD(); 1396 } 1397 1398 ATOF(s,x); 1399 1400 if (use_standard_radix) { 1401 UNLOCK_LC_NUMERIC_STANDARD(); 1402 SET_NUMERIC_UNDERLYING(); 1403 } 1404 } 1405 RESTORE_LC_NUMERIC(); 1406 } 1407 1408 #endif 1409 1410 return x; 1411 } 1412 1413 #if defined(NV_INF) || defined(NV_NAN) 1414 1415 #ifdef USING_MSVC6 1416 # pragma warning(push) 1417 # pragma warning(disable:4756;disable:4056) 1418 #endif 1419 static char* 1420 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value) 1421 { 1422 const char *p0 = negative ? s - 1 : s; 1423 const char *p = p0; 1424 const int infnan = grok_infnan(&p, send); 1425 if (infnan && p != p0) { 1426 /* If we can generate inf/nan directly, let's do so. */ 1427 #ifdef NV_INF 1428 if ((infnan & IS_NUMBER_INFINITY)) { 1429 *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF; 1430 return (char*)p; 1431 } 1432 #endif 1433 #ifdef NV_NAN 1434 if ((infnan & IS_NUMBER_NAN)) { 1435 *value = NV_NAN; 1436 return (char*)p; 1437 } 1438 #endif 1439 #ifdef Perl_strtod 1440 /* If still here, we didn't have either NV_INF or NV_NAN, 1441 * and can try falling back to native strtod/strtold. 1442 * 1443 * The native interface might not recognize all the possible 1444 * inf/nan strings Perl recognizes. What we can try 1445 * is to try faking the input. We will try inf/-inf/nan 1446 * as the most promising/portable input. */ 1447 { 1448 const char* fake = "silence compiler warning"; 1449 char* endp; 1450 NV nv; 1451 #ifdef NV_INF 1452 if ((infnan & IS_NUMBER_INFINITY)) { 1453 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf"; 1454 } 1455 #endif 1456 #ifdef NV_NAN 1457 if ((infnan & IS_NUMBER_NAN)) { 1458 fake = "nan"; 1459 } 1460 #endif 1461 assert(strNE(fake, "silence compiler warning")); 1462 nv = S_strtod(aTHX_ fake, &endp); 1463 if (fake != endp) { 1464 #ifdef NV_INF 1465 if ((infnan & IS_NUMBER_INFINITY)) { 1466 # ifdef Perl_isinf 1467 if (Perl_isinf(nv)) 1468 *value = nv; 1469 # else 1470 /* last resort, may generate SIGFPE */ 1471 *value = Perl_exp((NV)1e9); 1472 if ((infnan & IS_NUMBER_NEG)) 1473 *value = -*value; 1474 # endif 1475 return (char*)p; /* p, not endp */ 1476 } 1477 #endif 1478 #ifdef NV_NAN 1479 if ((infnan & IS_NUMBER_NAN)) { 1480 # ifdef Perl_isnan 1481 if (Perl_isnan(nv)) 1482 *value = nv; 1483 # else 1484 /* last resort, may generate SIGFPE */ 1485 *value = Perl_log((NV)-1.0); 1486 # endif 1487 return (char*)p; /* p, not endp */ 1488 #endif 1489 } 1490 } 1491 } 1492 #endif /* #ifdef Perl_strtod */ 1493 } 1494 return NULL; 1495 } 1496 #ifdef USING_MSVC6 1497 # pragma warning(pop) 1498 #endif 1499 1500 #endif /* if defined(NV_INF) || defined(NV_NAN) */ 1501 1502 char* 1503 Perl_my_atof2(pTHX_ const char* orig, NV* value) 1504 { 1505 PERL_ARGS_ASSERT_MY_ATOF2; 1506 return my_atof3(orig, value, 0); 1507 } 1508 1509 char* 1510 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len) 1511 { 1512 const char* s = orig; 1513 NV result[3] = {0.0, 0.0, 0.0}; 1514 #if defined(USE_PERL_ATOF) || defined(Perl_strtod) 1515 const char* send = s + ((len != 0) 1516 ? len 1517 : strlen(orig)); /* one past the last */ 1518 bool negative = 0; 1519 #endif 1520 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod) 1521 UV accumulator[2] = {0,0}; /* before/after dp */ 1522 bool seen_digit = 0; 1523 I32 exp_adjust[2] = {0,0}; 1524 I32 exp_acc[2] = {-1, -1}; 1525 /* the current exponent adjust for the accumulators */ 1526 I32 exponent = 0; 1527 I32 seen_dp = 0; 1528 I32 digit = 0; 1529 I32 old_digit = 0; 1530 I32 sig_digits = 0; /* noof significant digits seen so far */ 1531 #endif 1532 1533 #if defined(USE_PERL_ATOF) || defined(Perl_strtod) 1534 PERL_ARGS_ASSERT_MY_ATOF3; 1535 1536 /* leading whitespace */ 1537 while (s < send && isSPACE(*s)) 1538 ++s; 1539 1540 /* sign */ 1541 switch (*s) { 1542 case '-': 1543 negative = 1; 1544 /* FALLTHROUGH */ 1545 case '+': 1546 ++s; 1547 } 1548 #endif 1549 1550 #ifdef Perl_strtod 1551 { 1552 char* endp; 1553 char* copy = NULL; 1554 1555 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value))) 1556 return endp; 1557 1558 /* If the length is passed in, the input string isn't NUL-terminated, 1559 * and in it turns out the function below assumes it is; therefore we 1560 * create a copy and NUL-terminate that */ 1561 if (len) { 1562 Newx(copy, len + 1, char); 1563 Copy(orig, copy, len, char); 1564 copy[len] = '\0'; 1565 s = copy + (s - orig); 1566 } 1567 1568 result[2] = S_strtod(aTHX_ s, &endp); 1569 1570 /* If we created a copy, 'endp' is in terms of that. Convert back to 1571 * the original */ 1572 if (copy) { 1573 s = (s - copy) + (char *) orig; 1574 endp = (endp - copy) + (char *) orig; 1575 Safefree(copy); 1576 } 1577 1578 if (s != endp) { 1579 *value = negative ? -result[2] : result[2]; 1580 return endp; 1581 } 1582 return NULL; 1583 } 1584 #elif defined(USE_PERL_ATOF) 1585 1586 /* There is no point in processing more significant digits 1587 * than the NV can hold. Note that NV_DIG is a lower-bound value, 1588 * while we need an upper-bound value. We add 2 to account for this; 1589 * since it will have been conservative on both the first and last digit. 1590 * For example a 32-bit mantissa with an exponent of 4 would have 1591 * exact values in the set 1592 * 4 1593 * 8 1594 * .. 1595 * 17179869172 1596 * 17179869176 1597 * 17179869180 1598 * 1599 * where for the purposes of calculating NV_DIG we would have to discount 1600 * both the first and last digit, since neither can hold all values from 1601 * 0..9; but for calculating the value we must examine those two digits. 1602 */ 1603 #ifdef MAX_SIG_DIG_PLUS 1604 /* It is not necessarily the case that adding 2 to NV_DIG gets all the 1605 possible digits in a NV, especially if NVs are not IEEE compliant 1606 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */ 1607 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS) 1608 #else 1609 # define MAX_SIG_DIGITS (NV_DIG+2) 1610 #endif 1611 1612 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */ 1613 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10)) 1614 1615 #if defined(NV_INF) || defined(NV_NAN) 1616 { 1617 char* endp; 1618 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value))) 1619 return endp; 1620 } 1621 #endif 1622 1623 /* we accumulate digits into an integer; when this becomes too 1624 * large, we add the total to NV and start again */ 1625 1626 while (s < send) { 1627 if (isDIGIT(*s)) { 1628 seen_digit = 1; 1629 old_digit = digit; 1630 digit = *s++ - '0'; 1631 if (seen_dp) 1632 exp_adjust[1]++; 1633 1634 /* don't start counting until we see the first significant 1635 * digit, eg the 5 in 0.00005... */ 1636 if (!sig_digits && digit == 0) 1637 continue; 1638 1639 if (++sig_digits > MAX_SIG_DIGITS) { 1640 /* limits of precision reached */ 1641 if (digit > 5) { 1642 ++accumulator[seen_dp]; 1643 } else if (digit == 5) { 1644 if (old_digit % 2) { /* round to even - Allen */ 1645 ++accumulator[seen_dp]; 1646 } 1647 } 1648 if (seen_dp) { 1649 exp_adjust[1]--; 1650 } else { 1651 exp_adjust[0]++; 1652 } 1653 /* skip remaining digits */ 1654 while (s < send && isDIGIT(*s)) { 1655 ++s; 1656 if (! seen_dp) { 1657 exp_adjust[0]++; 1658 } 1659 } 1660 /* warn of loss of precision? */ 1661 } 1662 else { 1663 if (accumulator[seen_dp] > MAX_ACCUMULATE) { 1664 /* add accumulator to result and start again */ 1665 result[seen_dp] = S_mulexp10(result[seen_dp], 1666 exp_acc[seen_dp]) 1667 + (NV)accumulator[seen_dp]; 1668 accumulator[seen_dp] = 0; 1669 exp_acc[seen_dp] = 0; 1670 } 1671 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit; 1672 ++exp_acc[seen_dp]; 1673 } 1674 } 1675 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) { 1676 seen_dp = 1; 1677 if (sig_digits > MAX_SIG_DIGITS) { 1678 while (s < send && isDIGIT(*s)) { 1679 ++s; 1680 } 1681 break; 1682 } 1683 } 1684 else { 1685 break; 1686 } 1687 } 1688 1689 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0]; 1690 if (seen_dp) { 1691 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1]; 1692 } 1693 1694 if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) { 1695 bool expnegative = 0; 1696 1697 ++s; 1698 switch (*s) { 1699 case '-': 1700 expnegative = 1; 1701 /* FALLTHROUGH */ 1702 case '+': 1703 ++s; 1704 } 1705 while (s < send && isDIGIT(*s)) 1706 exponent = exponent * 10 + (*s++ - '0'); 1707 if (expnegative) 1708 exponent = -exponent; 1709 } 1710 1711 /* now apply the exponent */ 1712 1713 if (seen_dp) { 1714 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]) 1715 + S_mulexp10(result[1],exponent-exp_adjust[1]); 1716 } else { 1717 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]); 1718 } 1719 1720 /* now apply the sign */ 1721 if (negative) 1722 result[2] = -result[2]; 1723 #endif /* USE_PERL_ATOF */ 1724 *value = result[2]; 1725 return (char *)s; 1726 } 1727 1728 /* 1729 =for apidoc isinfnan 1730 1731 C<Perl_isinfnan()> is utility function that returns true if the NV 1732 argument is either an infinity or a C<NaN>, false otherwise. To test 1733 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>. 1734 1735 This is also the logical inverse of Perl_isfinite(). 1736 1737 =cut 1738 */ 1739 bool 1740 Perl_isinfnan(NV nv) 1741 { 1742 PERL_UNUSED_ARG(nv); 1743 #ifdef Perl_isinf 1744 if (Perl_isinf(nv)) 1745 return TRUE; 1746 #endif 1747 #ifdef Perl_isnan 1748 if (Perl_isnan(nv)) 1749 return TRUE; 1750 #endif 1751 return FALSE; 1752 } 1753 1754 /* 1755 =for apidoc 1756 1757 Checks whether the argument would be either an infinity or C<NaN> when used 1758 as a number, but is careful not to trigger non-numeric or uninitialized 1759 warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already. 1760 1761 =cut 1762 */ 1763 1764 bool 1765 Perl_isinfnansv(pTHX_ SV *sv) 1766 { 1767 PERL_ARGS_ASSERT_ISINFNANSV; 1768 if (!SvOK(sv)) 1769 return FALSE; 1770 if (SvNOKp(sv)) 1771 return Perl_isinfnan(SvNVX(sv)); 1772 if (SvIOKp(sv)) 1773 return FALSE; 1774 { 1775 STRLEN len; 1776 const char *s = SvPV_nomg_const(sv, len); 1777 return cBOOL(grok_infnan(&s, s+len)); 1778 } 1779 } 1780 1781 #ifndef HAS_MODFL 1782 /* C99 has truncl, pre-C99 Solaris had aintl. We can use either with 1783 * copysignl to emulate modfl, which is in some platforms missing or 1784 * broken. */ 1785 # if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL) 1786 long double 1787 Perl_my_modfl(long double x, long double *ip) 1788 { 1789 *ip = truncl(x); 1790 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1791 } 1792 # elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL) 1793 long double 1794 Perl_my_modfl(long double x, long double *ip) 1795 { 1796 *ip = aintl(x); 1797 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1798 } 1799 # endif 1800 #endif 1801 1802 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */ 1803 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL) 1804 long double 1805 Perl_my_frexpl(long double x, int *e) { 1806 *e = x == 0.0L ? 0 : ilogbl(x) + 1; 1807 return (scalbnl(x, -*e)); 1808 } 1809 #endif 1810 1811 /* 1812 =for apidoc Perl_signbit 1813 1814 Return a non-zero integer if the sign bit on an NV is set, and 0 if 1815 it is not. 1816 1817 If F<Configure> detects this system has a C<signbit()> that will work with 1818 our NVs, then we just use it via the C<#define> in F<perl.h>. Otherwise, 1819 fall back on this implementation. The main use of this function 1820 is catching C<-0.0>. 1821 1822 C<Configure> notes: This function is called C<'Perl_signbit'> instead of a 1823 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()> 1824 function or macro that doesn't happen to work with our particular choice 1825 of NVs. We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect 1826 the standard system headers to be happy. Also, this is a no-context 1827 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in 1828 F<perl.h> as a simple macro call to the system's C<signbit()>. 1829 Users should just always call C<Perl_signbit()>. 1830 1831 =cut 1832 */ 1833 #if !defined(HAS_SIGNBIT) 1834 int 1835 Perl_signbit(NV x) { 1836 # ifdef Perl_fp_class_nzero 1837 return Perl_fp_class_nzero(x); 1838 /* Try finding the high byte, and assume it's highest bit 1839 * is the sign. This assumption is probably wrong somewhere. */ 1840 # elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 1841 return (((unsigned char *)&x)[9] & 0x80); 1842 # elif defined(NV_LITTLE_ENDIAN) 1843 /* Note that NVSIZE is sizeof(NV), which would make the below be 1844 * wrong if the end bytes are unused, which happens with the x86 1845 * 80-bit long doubles, which is why take care of that above. */ 1846 return (((unsigned char *)&x)[NVSIZE - 1] & 0x80); 1847 # elif defined(NV_BIG_ENDIAN) 1848 return (((unsigned char *)&x)[0] & 0x80); 1849 # else 1850 /* This last resort fallback is wrong for the negative zero. */ 1851 return (x < 0.0) ? 1 : 0; 1852 # endif 1853 } 1854 #endif 1855 1856 /* 1857 * ex: set ts=8 sts=4 sw=4 et: 1858 */ 1859