1 /* numeric.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * "That only makes eleven (plus one mislaid) and not fourteen, 13 * unless wizards count differently to other people." --Beorn 14 * 15 * [p.115 of _The Hobbit_: "Queer Lodgings"] 16 */ 17 18 /* 19 =head1 Numeric functions 20 21 This file contains all the stuff needed by perl for manipulating numeric 22 values, including such things as replacements for the OS's atof() function 23 24 =cut 25 26 */ 27 28 #include "EXTERN.h" 29 #define PERL_IN_NUMERIC_C 30 #include "perl.h" 31 32 U32 33 Perl_cast_ulong(pTHX_ NV f) 34 { 35 PERL_UNUSED_CONTEXT; 36 if (f < 0.0) 37 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f; 38 if (f < U32_MAX_P1) { 39 #if CASTFLAGS & 2 40 if (f < U32_MAX_P1_HALF) 41 return (U32) f; 42 f -= U32_MAX_P1_HALF; 43 return ((U32) f) | (1 + U32_MAX >> 1); 44 #else 45 return (U32) f; 46 #endif 47 } 48 return f > 0 ? U32_MAX : 0 /* NaN */; 49 } 50 51 I32 52 Perl_cast_i32(pTHX_ NV f) 53 { 54 PERL_UNUSED_CONTEXT; 55 if (f < I32_MAX_P1) 56 return f < I32_MIN ? I32_MIN : (I32) f; 57 if (f < U32_MAX_P1) { 58 #if CASTFLAGS & 2 59 if (f < U32_MAX_P1_HALF) 60 return (I32)(U32) f; 61 f -= U32_MAX_P1_HALF; 62 return (I32)(((U32) f) | (1 + U32_MAX >> 1)); 63 #else 64 return (I32)(U32) f; 65 #endif 66 } 67 return f > 0 ? (I32)U32_MAX : 0 /* NaN */; 68 } 69 70 IV 71 Perl_cast_iv(pTHX_ NV f) 72 { 73 PERL_UNUSED_CONTEXT; 74 if (f < IV_MAX_P1) 75 return f < IV_MIN ? IV_MIN : (IV) f; 76 if (f < UV_MAX_P1) { 77 #if CASTFLAGS & 2 78 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */ 79 if (f < UV_MAX_P1_HALF) 80 return (IV)(UV) f; 81 f -= UV_MAX_P1_HALF; 82 return (IV)(((UV) f) | (1 + UV_MAX >> 1)); 83 #else 84 return (IV)(UV) f; 85 #endif 86 } 87 return f > 0 ? (IV)UV_MAX : 0 /* NaN */; 88 } 89 90 UV 91 Perl_cast_uv(pTHX_ NV f) 92 { 93 PERL_UNUSED_CONTEXT; 94 if (f < 0.0) 95 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f; 96 if (f < UV_MAX_P1) { 97 #if CASTFLAGS & 2 98 if (f < UV_MAX_P1_HALF) 99 return (UV) f; 100 f -= UV_MAX_P1_HALF; 101 return ((UV) f) | (1 + UV_MAX >> 1); 102 #else 103 return (UV) f; 104 #endif 105 } 106 return f > 0 ? UV_MAX : 0 /* NaN */; 107 } 108 109 /* 110 =for apidoc grok_bin 111 112 converts a string representing a binary number to numeric form. 113 114 On entry I<start> and I<*len> give the string to scan, I<*flags> gives 115 conversion flags, and I<result> should be NULL or a pointer to an NV. 116 The scan stops at the end of the string, or the first invalid character. 117 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an 118 invalid character will also trigger a warning. 119 On return I<*len> is set to the length of the scanned string, 120 and I<*flags> gives output flags. 121 122 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 123 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin> 124 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 125 and writes the value to I<*result> (or the value is discarded if I<result> 126 is NULL). 127 128 The binary number may optionally be prefixed with "0b" or "b" unless 129 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If 130 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary 131 number may use '_' characters to separate digits. 132 133 =cut 134 135 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 136 which suppresses any message for non-portable numbers that are still valid 137 on this platform. 138 */ 139 140 UV 141 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 142 { 143 const char *s = start; 144 STRLEN len = *len_p; 145 UV value = 0; 146 NV value_nv = 0; 147 148 const UV max_div_2 = UV_MAX / 2; 149 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 150 bool overflowed = FALSE; 151 char bit; 152 153 PERL_ARGS_ASSERT_GROK_BIN; 154 155 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 156 /* strip off leading b or 0b. 157 for compatibility silently suffer "b" and "0b" as valid binary 158 numbers. */ 159 if (len >= 1) { 160 if (s[0] == 'b' || s[0] == 'B') { 161 s++; 162 len--; 163 } 164 else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { 165 s+=2; 166 len-=2; 167 } 168 } 169 } 170 171 for (; len-- && (bit = *s); s++) { 172 if (bit == '0' || bit == '1') { 173 /* Write it in this wonky order with a goto to attempt to get the 174 compiler to make the common case integer-only loop pretty tight. 175 With gcc seems to be much straighter code than old scan_bin. */ 176 redo: 177 if (!overflowed) { 178 if (value <= max_div_2) { 179 value = (value << 1) | (bit - '0'); 180 continue; 181 } 182 /* Bah. We're just overflowed. */ 183 /* diag_listed_as: Integer overflow in %s number */ 184 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 185 "Integer overflow in binary number"); 186 overflowed = TRUE; 187 value_nv = (NV) value; 188 } 189 value_nv *= 2.0; 190 /* If an NV has not enough bits in its mantissa to 191 * represent a UV this summing of small low-order numbers 192 * is a waste of time (because the NV cannot preserve 193 * the low-order bits anyway): we could just remember when 194 * did we overflow and in the end just multiply value_nv by the 195 * right amount. */ 196 value_nv += (NV)(bit - '0'); 197 continue; 198 } 199 if (bit == '_' && len && allow_underscores && (bit = s[1]) 200 && (bit == '0' || bit == '1')) 201 { 202 --len; 203 ++s; 204 goto redo; 205 } 206 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 207 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 208 "Illegal binary digit '%c' ignored", *s); 209 break; 210 } 211 212 if ( ( overflowed && value_nv > 4294967295.0) 213 #if UVSIZE > 4 214 || (!overflowed && value > 0xffffffff 215 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 216 #endif 217 ) { 218 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 219 "Binary number > 0b11111111111111111111111111111111 non-portable"); 220 } 221 *len_p = s - start; 222 if (!overflowed) { 223 *flags = 0; 224 return value; 225 } 226 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 227 if (result) 228 *result = value_nv; 229 return UV_MAX; 230 } 231 232 /* 233 =for apidoc grok_hex 234 235 converts a string representing a hex number to numeric form. 236 237 On entry I<start> and I<*len_p> give the string to scan, I<*flags> gives 238 conversion flags, and I<result> should be NULL or a pointer to an NV. 239 The scan stops at the end of the string, or the first invalid character. 240 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an 241 invalid character will also trigger a warning. 242 On return I<*len> is set to the length of the scanned string, 243 and I<*flags> gives output flags. 244 245 If the value is <= UV_MAX it is returned as a UV, the output flags are clear, 246 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex> 247 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 248 and writes the value to I<*result> (or the value is discarded if I<result> 249 is NULL). 250 251 The hex number may optionally be prefixed with "0x" or "x" unless 252 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If 253 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex 254 number may use '_' characters to separate digits. 255 256 =cut 257 258 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 259 which suppresses any message for non-portable numbers that are still valid 260 on this platform. 261 */ 262 263 UV 264 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 265 { 266 dVAR; 267 const char *s = start; 268 STRLEN len = *len_p; 269 UV value = 0; 270 NV value_nv = 0; 271 const UV max_div_16 = UV_MAX / 16; 272 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 273 bool overflowed = FALSE; 274 275 PERL_ARGS_ASSERT_GROK_HEX; 276 277 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 278 /* strip off leading x or 0x. 279 for compatibility silently suffer "x" and "0x" as valid hex numbers. 280 */ 281 if (len >= 1) { 282 if (s[0] == 'x' || s[0] == 'X') { 283 s++; 284 len--; 285 } 286 else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { 287 s+=2; 288 len-=2; 289 } 290 } 291 } 292 293 for (; len-- && *s; s++) { 294 if (isXDIGIT(*s)) { 295 /* Write it in this wonky order with a goto to attempt to get the 296 compiler to make the common case integer-only loop pretty tight. 297 With gcc seems to be much straighter code than old scan_hex. */ 298 redo: 299 if (!overflowed) { 300 if (value <= max_div_16) { 301 value = (value << 4) | XDIGIT_VALUE(*s); 302 continue; 303 } 304 /* Bah. We're just overflowed. */ 305 /* diag_listed_as: Integer overflow in %s number */ 306 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 307 "Integer overflow in hexadecimal number"); 308 overflowed = TRUE; 309 value_nv = (NV) value; 310 } 311 value_nv *= 16.0; 312 /* If an NV has not enough bits in its mantissa to 313 * represent a UV this summing of small low-order numbers 314 * is a waste of time (because the NV cannot preserve 315 * the low-order bits anyway): we could just remember when 316 * did we overflow and in the end just multiply value_nv by the 317 * right amount of 16-tuples. */ 318 value_nv += (NV) XDIGIT_VALUE(*s); 319 continue; 320 } 321 if (*s == '_' && len && allow_underscores && s[1] 322 && isXDIGIT(s[1])) 323 { 324 --len; 325 ++s; 326 goto redo; 327 } 328 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 329 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 330 "Illegal hexadecimal digit '%c' ignored", *s); 331 break; 332 } 333 334 if ( ( overflowed && value_nv > 4294967295.0) 335 #if UVSIZE > 4 336 || (!overflowed && value > 0xffffffff 337 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 338 #endif 339 ) { 340 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 341 "Hexadecimal number > 0xffffffff non-portable"); 342 } 343 *len_p = s - start; 344 if (!overflowed) { 345 *flags = 0; 346 return value; 347 } 348 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 349 if (result) 350 *result = value_nv; 351 return UV_MAX; 352 } 353 354 /* 355 =for apidoc grok_oct 356 357 converts a string representing an octal number to numeric form. 358 359 On entry I<start> and I<*len> give the string to scan, I<*flags> gives 360 conversion flags, and I<result> should be NULL or a pointer to an NV. 361 The scan stops at the end of the string, or the first invalid character. 362 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an 363 8 or 9 will also trigger a warning. 364 On return I<*len> is set to the length of the scanned string, 365 and I<*flags> gives output flags. 366 367 If the value is <= UV_MAX it is returned as a UV, the output flags are clear, 368 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct> 369 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 370 and writes the value to I<*result> (or the value is discarded if I<result> 371 is NULL). 372 373 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal 374 number may use '_' characters to separate digits. 375 376 =cut 377 378 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE> 379 which suppresses any message for non-portable numbers, but which are valid 380 on this platform. 381 */ 382 383 UV 384 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 385 { 386 const char *s = start; 387 STRLEN len = *len_p; 388 UV value = 0; 389 NV value_nv = 0; 390 const UV max_div_8 = UV_MAX / 8; 391 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 392 bool overflowed = FALSE; 393 394 PERL_ARGS_ASSERT_GROK_OCT; 395 396 for (; len-- && *s; s++) { 397 if (isOCTAL(*s)) { 398 /* Write it in this wonky order with a goto to attempt to get the 399 compiler to make the common case integer-only loop pretty tight. 400 */ 401 redo: 402 if (!overflowed) { 403 if (value <= max_div_8) { 404 value = (value << 3) | OCTAL_VALUE(*s); 405 continue; 406 } 407 /* Bah. We're just overflowed. */ 408 /* diag_listed_as: Integer overflow in %s number */ 409 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 410 "Integer overflow in octal number"); 411 overflowed = TRUE; 412 value_nv = (NV) value; 413 } 414 value_nv *= 8.0; 415 /* If an NV has not enough bits in its mantissa to 416 * represent a UV this summing of small low-order numbers 417 * is a waste of time (because the NV cannot preserve 418 * the low-order bits anyway): we could just remember when 419 * did we overflow and in the end just multiply value_nv by the 420 * right amount of 8-tuples. */ 421 value_nv += (NV) OCTAL_VALUE(*s); 422 continue; 423 } 424 if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) { 425 --len; 426 ++s; 427 goto redo; 428 } 429 /* Allow \octal to work the DWIM way (that is, stop scanning 430 * as soon as non-octal characters are seen, complain only if 431 * someone seems to want to use the digits eight and nine. Since we 432 * know it is not octal, then if isDIGIT, must be an 8 or 9). */ 433 if (isDIGIT(*s)) { 434 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 435 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 436 "Illegal octal digit '%c' ignored", *s); 437 } 438 break; 439 } 440 441 if ( ( overflowed && value_nv > 4294967295.0) 442 #if UVSIZE > 4 443 || (!overflowed && value > 0xffffffff 444 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 445 #endif 446 ) { 447 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 448 "Octal number > 037777777777 non-portable"); 449 } 450 *len_p = s - start; 451 if (!overflowed) { 452 *flags = 0; 453 return value; 454 } 455 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 456 if (result) 457 *result = value_nv; 458 return UV_MAX; 459 } 460 461 /* 462 =for apidoc scan_bin 463 464 For backwards compatibility. Use C<grok_bin> instead. 465 466 =for apidoc scan_hex 467 468 For backwards compatibility. Use C<grok_hex> instead. 469 470 =for apidoc scan_oct 471 472 For backwards compatibility. Use C<grok_oct> instead. 473 474 =cut 475 */ 476 477 NV 478 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 479 { 480 NV rnv; 481 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 482 const UV ruv = grok_bin (start, &len, &flags, &rnv); 483 484 PERL_ARGS_ASSERT_SCAN_BIN; 485 486 *retlen = len; 487 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 488 } 489 490 NV 491 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 492 { 493 NV rnv; 494 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 495 const UV ruv = grok_oct (start, &len, &flags, &rnv); 496 497 PERL_ARGS_ASSERT_SCAN_OCT; 498 499 *retlen = len; 500 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 501 } 502 503 NV 504 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 505 { 506 NV rnv; 507 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 508 const UV ruv = grok_hex (start, &len, &flags, &rnv); 509 510 PERL_ARGS_ASSERT_SCAN_HEX; 511 512 *retlen = len; 513 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 514 } 515 516 /* 517 =for apidoc grok_numeric_radix 518 519 Scan and skip for a numeric decimal separator (radix). 520 521 =cut 522 */ 523 bool 524 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) 525 { 526 #ifdef USE_LOCALE_NUMERIC 527 dVAR; 528 529 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX; 530 531 if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) { 532 STRLEN len; 533 const char * const radix = SvPV(PL_numeric_radix_sv, len); 534 if (*sp + len <= send && memEQ(*sp, radix, len)) { 535 *sp += len; 536 return TRUE; 537 } 538 } 539 /* always try "." if numeric radix didn't match because 540 * we may have data from different locales mixed */ 541 #endif 542 543 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX; 544 545 if (*sp < send && **sp == '.') { 546 ++*sp; 547 return TRUE; 548 } 549 return FALSE; 550 } 551 552 /* 553 =for apidoc grok_number 554 555 Recognise (or not) a number. The type of the number is returned 556 (0 if unrecognised), otherwise it is a bit-ORed combination of 557 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT, 558 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h). 559 560 If the value of the number can fit in a UV, it is returned in the *valuep 561 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV 562 will never be set unless *valuep is valid, but *valuep may have been assigned 563 to during processing even though IS_NUMBER_IN_UV is not set on return. 564 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when 565 valuep is non-NULL, but no actual assignment (or SEGV) will occur. 566 567 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were 568 seen (in which case *valuep gives the true value truncated to an integer), and 569 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the 570 absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the 571 number is larger than a UV. 572 573 =cut 574 */ 575 int 576 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) 577 { 578 const char *s = pv; 579 const char * const send = pv + len; 580 const UV max_div_10 = UV_MAX / 10; 581 const char max_mod_10 = UV_MAX % 10; 582 int numtype = 0; 583 int sawinf = 0; 584 int sawnan = 0; 585 586 PERL_ARGS_ASSERT_GROK_NUMBER; 587 588 while (s < send && isSPACE(*s)) 589 s++; 590 if (s == send) { 591 return 0; 592 } else if (*s == '-') { 593 s++; 594 numtype = IS_NUMBER_NEG; 595 } 596 else if (*s == '+') 597 s++; 598 599 if (s == send) 600 return 0; 601 602 /* next must be digit or the radix separator or beginning of infinity */ 603 if (isDIGIT(*s)) { 604 /* UVs are at least 32 bits, so the first 9 decimal digits cannot 605 overflow. */ 606 UV value = *s - '0'; 607 /* This construction seems to be more optimiser friendly. 608 (without it gcc does the isDIGIT test and the *s - '0' separately) 609 With it gcc on arm is managing 6 instructions (6 cycles) per digit. 610 In theory the optimiser could deduce how far to unroll the loop 611 before checking for overflow. */ 612 if (++s < send) { 613 int digit = *s - '0'; 614 if (digit >= 0 && digit <= 9) { 615 value = value * 10 + digit; 616 if (++s < send) { 617 digit = *s - '0'; 618 if (digit >= 0 && digit <= 9) { 619 value = value * 10 + digit; 620 if (++s < send) { 621 digit = *s - '0'; 622 if (digit >= 0 && digit <= 9) { 623 value = value * 10 + digit; 624 if (++s < send) { 625 digit = *s - '0'; 626 if (digit >= 0 && digit <= 9) { 627 value = value * 10 + digit; 628 if (++s < send) { 629 digit = *s - '0'; 630 if (digit >= 0 && digit <= 9) { 631 value = value * 10 + digit; 632 if (++s < send) { 633 digit = *s - '0'; 634 if (digit >= 0 && digit <= 9) { 635 value = value * 10 + digit; 636 if (++s < send) { 637 digit = *s - '0'; 638 if (digit >= 0 && digit <= 9) { 639 value = value * 10 + digit; 640 if (++s < send) { 641 digit = *s - '0'; 642 if (digit >= 0 && digit <= 9) { 643 value = value * 10 + digit; 644 if (++s < send) { 645 /* Now got 9 digits, so need to check 646 each time for overflow. */ 647 digit = *s - '0'; 648 while (digit >= 0 && digit <= 9 649 && (value < max_div_10 650 || (value == max_div_10 651 && digit <= max_mod_10))) { 652 value = value * 10 + digit; 653 if (++s < send) 654 digit = *s - '0'; 655 else 656 break; 657 } 658 if (digit >= 0 && digit <= 9 659 && (s < send)) { 660 /* value overflowed. 661 skip the remaining digits, don't 662 worry about setting *valuep. */ 663 do { 664 s++; 665 } while (s < send && isDIGIT(*s)); 666 numtype |= 667 IS_NUMBER_GREATER_THAN_UV_MAX; 668 goto skip_value; 669 } 670 } 671 } 672 } 673 } 674 } 675 } 676 } 677 } 678 } 679 } 680 } 681 } 682 } 683 } 684 } 685 } 686 } 687 numtype |= IS_NUMBER_IN_UV; 688 if (valuep) 689 *valuep = value; 690 691 skip_value: 692 if (GROK_NUMERIC_RADIX(&s, send)) { 693 numtype |= IS_NUMBER_NOT_INT; 694 while (s < send && isDIGIT(*s)) /* optional digits after the radix */ 695 s++; 696 } 697 } 698 else if (GROK_NUMERIC_RADIX(&s, send)) { 699 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ 700 /* no digits before the radix means we need digits after it */ 701 if (s < send && isDIGIT(*s)) { 702 do { 703 s++; 704 } while (s < send && isDIGIT(*s)); 705 if (valuep) { 706 /* integer approximation is valid - it's 0. */ 707 *valuep = 0; 708 } 709 } 710 else 711 return 0; 712 } else if (*s == 'I' || *s == 'i') { 713 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 714 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; 715 s++; if (s < send && (*s == 'I' || *s == 'i')) { 716 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 717 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; 718 s++; if (s == send || (*s != 'T' && *s != 't')) return 0; 719 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; 720 s++; 721 } 722 sawinf = 1; 723 } else if (*s == 'N' || *s == 'n') { 724 /* XXX TODO: There are signaling NaNs and quiet NaNs. */ 725 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; 726 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 727 s++; 728 sawnan = 1; 729 } else 730 return 0; 731 732 if (sawinf) { 733 numtype &= IS_NUMBER_NEG; /* Keep track of sign */ 734 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; 735 } else if (sawnan) { 736 numtype &= IS_NUMBER_NEG; /* Keep track of sign */ 737 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 738 } else if (s < send) { 739 /* we can have an optional exponent part */ 740 if (*s == 'e' || *s == 'E') { 741 /* The only flag we keep is sign. Blow away any "it's UV" */ 742 numtype &= IS_NUMBER_NEG; 743 numtype |= IS_NUMBER_NOT_INT; 744 s++; 745 if (s < send && (*s == '-' || *s == '+')) 746 s++; 747 if (s < send && isDIGIT(*s)) { 748 do { 749 s++; 750 } while (s < send && isDIGIT(*s)); 751 } 752 else 753 return 0; 754 } 755 } 756 while (s < send && isSPACE(*s)) 757 s++; 758 if (s >= send) 759 return numtype; 760 if (len == 10 && memEQ(pv, "0 but true", 10)) { 761 if (valuep) 762 *valuep = 0; 763 return IS_NUMBER_IN_UV; 764 } 765 return 0; 766 } 767 768 STATIC NV 769 S_mulexp10(NV value, I32 exponent) 770 { 771 NV result = 1.0; 772 NV power = 10.0; 773 bool negative = 0; 774 I32 bit; 775 776 if (exponent == 0) 777 return value; 778 if (value == 0) 779 return (NV)0; 780 781 /* On OpenVMS VAX we by default use the D_FLOAT double format, 782 * and that format does not have *easy* capabilities [1] for 783 * overflowing doubles 'silently' as IEEE fp does. We also need 784 * to support G_FLOAT on both VAX and Alpha, and though the exponent 785 * range is much larger than D_FLOAT it still doesn't do silent 786 * overflow. Therefore we need to detect early whether we would 787 * overflow (this is the behaviour of the native string-to-float 788 * conversion routines, and therefore of native applications, too). 789 * 790 * [1] Trying to establish a condition handler to trap floating point 791 * exceptions is not a good idea. */ 792 793 /* In UNICOS and in certain Cray models (such as T90) there is no 794 * IEEE fp, and no way at all from C to catch fp overflows gracefully. 795 * There is something you can do if you are willing to use some 796 * inline assembler: the instruction is called DFI-- but that will 797 * disable *all* floating point interrupts, a little bit too large 798 * a hammer. Therefore we need to catch potential overflows before 799 * it's too late. */ 800 801 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP) 802 STMT_START { 803 const NV exp_v = log10(value); 804 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP) 805 return NV_MAX; 806 if (exponent < 0) { 807 if (-(exponent + exp_v) >= NV_MAX_10_EXP) 808 return 0.0; 809 while (-exponent >= NV_MAX_10_EXP) { 810 /* combination does not overflow, but 10^(-exponent) does */ 811 value /= 10; 812 ++exponent; 813 } 814 } 815 } STMT_END; 816 #endif 817 818 if (exponent < 0) { 819 negative = 1; 820 exponent = -exponent; 821 #ifdef NV_MAX_10_EXP 822 /* for something like 1234 x 10^-309, the action of calculating 823 * the intermediate value 10^309 then returning 1234 / (10^309) 824 * will fail, since 10^309 becomes infinity. In this case try to 825 * refactor it as 123 / (10^308) etc. 826 */ 827 while (value && exponent > NV_MAX_10_EXP) { 828 exponent--; 829 value /= 10; 830 } 831 #endif 832 } 833 for (bit = 1; exponent; bit <<= 1) { 834 if (exponent & bit) { 835 exponent ^= bit; 836 result *= power; 837 /* Floating point exceptions are supposed to be turned off, 838 * but if we're obviously done, don't risk another iteration. 839 */ 840 if (exponent == 0) break; 841 } 842 power *= power; 843 } 844 return negative ? value / result : value * result; 845 } 846 847 NV 848 Perl_my_atof(pTHX_ const char* s) 849 { 850 NV x = 0.0; 851 #ifdef USE_LOCALE_NUMERIC 852 dVAR; 853 854 PERL_ARGS_ASSERT_MY_ATOF; 855 856 { 857 DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED(); 858 if (PL_numeric_local && PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) { 859 const char *standard = NULL, *local = NULL; 860 bool use_standard_radix; 861 862 /* Look through the string for the first thing that looks like a 863 * decimal point: either the value in the current locale or the 864 * standard fallback of '.'. The one which appears earliest in the 865 * input string is the one that we should have atof look for. Note 866 * that we have to determine this beforehand because on some 867 * systems, Perl_atof2 is just a wrapper around the system's atof. 868 * */ 869 standard = strchr(s, '.'); 870 local = strstr(s, SvPV_nolen(PL_numeric_radix_sv)); 871 872 use_standard_radix = standard && (!local || standard < local); 873 874 if (use_standard_radix) 875 SET_NUMERIC_STANDARD(); 876 877 Perl_atof2(s, x); 878 879 if (use_standard_radix) 880 SET_NUMERIC_LOCAL(); 881 } 882 else 883 Perl_atof2(s, x); 884 RESTORE_LC_NUMERIC(); 885 } 886 #else 887 Perl_atof2(s, x); 888 #endif 889 return x; 890 } 891 892 char* 893 Perl_my_atof2(pTHX_ const char* orig, NV* value) 894 { 895 NV result[3] = {0.0, 0.0, 0.0}; 896 const char* s = orig; 897 #ifdef USE_PERL_ATOF 898 UV accumulator[2] = {0,0}; /* before/after dp */ 899 bool negative = 0; 900 const char* send = s + strlen(orig) - 1; 901 bool seen_digit = 0; 902 I32 exp_adjust[2] = {0,0}; 903 I32 exp_acc[2] = {-1, -1}; 904 /* the current exponent adjust for the accumulators */ 905 I32 exponent = 0; 906 I32 seen_dp = 0; 907 I32 digit = 0; 908 I32 old_digit = 0; 909 I32 sig_digits = 0; /* noof significant digits seen so far */ 910 911 PERL_ARGS_ASSERT_MY_ATOF2; 912 913 /* There is no point in processing more significant digits 914 * than the NV can hold. Note that NV_DIG is a lower-bound value, 915 * while we need an upper-bound value. We add 2 to account for this; 916 * since it will have been conservative on both the first and last digit. 917 * For example a 32-bit mantissa with an exponent of 4 would have 918 * exact values in the set 919 * 4 920 * 8 921 * .. 922 * 17179869172 923 * 17179869176 924 * 17179869180 925 * 926 * where for the purposes of calculating NV_DIG we would have to discount 927 * both the first and last digit, since neither can hold all values from 928 * 0..9; but for calculating the value we must examine those two digits. 929 */ 930 #ifdef MAX_SIG_DIG_PLUS 931 /* It is not necessarily the case that adding 2 to NV_DIG gets all the 932 possible digits in a NV, especially if NVs are not IEEE compliant 933 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */ 934 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS) 935 #else 936 # define MAX_SIG_DIGITS (NV_DIG+2) 937 #endif 938 939 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */ 940 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10)) 941 942 /* leading whitespace */ 943 while (isSPACE(*s)) 944 ++s; 945 946 /* sign */ 947 switch (*s) { 948 case '-': 949 negative = 1; 950 /* fall through */ 951 case '+': 952 ++s; 953 } 954 955 /* punt to strtod for NaN/Inf; if no support for it there, tough luck */ 956 957 #ifdef HAS_STRTOD 958 if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') { 959 const char *p = negative ? s - 1 : s; 960 char *endp; 961 NV rslt; 962 rslt = strtod(p, &endp); 963 if (endp != p) { 964 *value = rslt; 965 return (char *)endp; 966 } 967 } 968 #endif 969 970 /* we accumulate digits into an integer; when this becomes too 971 * large, we add the total to NV and start again */ 972 973 while (1) { 974 if (isDIGIT(*s)) { 975 seen_digit = 1; 976 old_digit = digit; 977 digit = *s++ - '0'; 978 if (seen_dp) 979 exp_adjust[1]++; 980 981 /* don't start counting until we see the first significant 982 * digit, eg the 5 in 0.00005... */ 983 if (!sig_digits && digit == 0) 984 continue; 985 986 if (++sig_digits > MAX_SIG_DIGITS) { 987 /* limits of precision reached */ 988 if (digit > 5) { 989 ++accumulator[seen_dp]; 990 } else if (digit == 5) { 991 if (old_digit % 2) { /* round to even - Allen */ 992 ++accumulator[seen_dp]; 993 } 994 } 995 if (seen_dp) { 996 exp_adjust[1]--; 997 } else { 998 exp_adjust[0]++; 999 } 1000 /* skip remaining digits */ 1001 while (isDIGIT(*s)) { 1002 ++s; 1003 if (! seen_dp) { 1004 exp_adjust[0]++; 1005 } 1006 } 1007 /* warn of loss of precision? */ 1008 } 1009 else { 1010 if (accumulator[seen_dp] > MAX_ACCUMULATE) { 1011 /* add accumulator to result and start again */ 1012 result[seen_dp] = S_mulexp10(result[seen_dp], 1013 exp_acc[seen_dp]) 1014 + (NV)accumulator[seen_dp]; 1015 accumulator[seen_dp] = 0; 1016 exp_acc[seen_dp] = 0; 1017 } 1018 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit; 1019 ++exp_acc[seen_dp]; 1020 } 1021 } 1022 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) { 1023 seen_dp = 1; 1024 if (sig_digits > MAX_SIG_DIGITS) { 1025 do { 1026 ++s; 1027 } while (isDIGIT(*s)); 1028 break; 1029 } 1030 } 1031 else { 1032 break; 1033 } 1034 } 1035 1036 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0]; 1037 if (seen_dp) { 1038 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1]; 1039 } 1040 1041 if (seen_digit && (*s == 'e' || *s == 'E')) { 1042 bool expnegative = 0; 1043 1044 ++s; 1045 switch (*s) { 1046 case '-': 1047 expnegative = 1; 1048 /* fall through */ 1049 case '+': 1050 ++s; 1051 } 1052 while (isDIGIT(*s)) 1053 exponent = exponent * 10 + (*s++ - '0'); 1054 if (expnegative) 1055 exponent = -exponent; 1056 } 1057 1058 1059 1060 /* now apply the exponent */ 1061 1062 if (seen_dp) { 1063 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]) 1064 + S_mulexp10(result[1],exponent-exp_adjust[1]); 1065 } else { 1066 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]); 1067 } 1068 1069 /* now apply the sign */ 1070 if (negative) 1071 result[2] = -result[2]; 1072 #endif /* USE_PERL_ATOF */ 1073 *value = result[2]; 1074 return (char *)s; 1075 } 1076 1077 #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL) 1078 long double 1079 Perl_my_modfl(long double x, long double *ip) 1080 { 1081 *ip = aintl(x); 1082 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1083 } 1084 #endif 1085 1086 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL) 1087 long double 1088 Perl_my_frexpl(long double x, int *e) { 1089 *e = x == 0.0L ? 0 : ilogbl(x) + 1; 1090 return (scalbnl(x, -*e)); 1091 } 1092 #endif 1093 1094 /* 1095 =for apidoc Perl_signbit 1096 1097 Return a non-zero integer if the sign bit on an NV is set, and 0 if 1098 it is not. 1099 1100 If Configure detects this system has a signbit() that will work with 1101 our NVs, then we just use it via the #define in perl.h. Otherwise, 1102 fall back on this implementation. As a first pass, this gets everything 1103 right except -0.0. Alas, catching -0.0 is the main use for this function, 1104 so this is not too helpful yet. Still, at least we have the scaffolding 1105 in place to support other systems, should that prove useful. 1106 1107 1108 Configure notes: This function is called 'Perl_signbit' instead of a 1109 plain 'signbit' because it is easy to imagine a system having a signbit() 1110 function or macro that doesn't happen to work with our particular choice 1111 of NVs. We shouldn't just re-#define signbit as Perl_signbit and expect 1112 the standard system headers to be happy. Also, this is a no-context 1113 function (no pTHX_) because Perl_signbit() is usually re-#defined in 1114 perl.h as a simple macro call to the system's signbit(). 1115 Users should just always call Perl_signbit(). 1116 1117 =cut 1118 */ 1119 #if !defined(HAS_SIGNBIT) 1120 int 1121 Perl_signbit(NV x) { 1122 return (x < 0.0) ? 1 : 0; 1123 } 1124 #endif 1125 1126 /* 1127 * Local variables: 1128 * c-indentation-style: bsd 1129 * c-basic-offset: 4 1130 * indent-tabs-mode: nil 1131 * End: 1132 * 1133 * ex: set ts=8 sts=4 sw=4 et: 1134 */ 1135