1 /* numeric.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * "That only makes eleven (plus one mislaid) and not fourteen, 13 * unless wizards count differently to other people." --Beorn 14 * 15 * [p.115 of _The Hobbit_: "Queer Lodgings"] 16 */ 17 18 /* 19 =head1 Numeric functions 20 21 This file contains all the stuff needed by perl for manipulating numeric 22 values, including such things as replacements for the OS's atof() function 23 24 =cut 25 26 */ 27 28 #include "EXTERN.h" 29 #define PERL_IN_NUMERIC_C 30 #include "perl.h" 31 32 U32 33 Perl_cast_ulong(pTHX_ NV f) 34 { 35 PERL_UNUSED_CONTEXT; 36 if (f < 0.0) 37 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f; 38 if (f < U32_MAX_P1) { 39 #if CASTFLAGS & 2 40 if (f < U32_MAX_P1_HALF) 41 return (U32) f; 42 f -= U32_MAX_P1_HALF; 43 return ((U32) f) | (1 + U32_MAX >> 1); 44 #else 45 return (U32) f; 46 #endif 47 } 48 return f > 0 ? U32_MAX : 0 /* NaN */; 49 } 50 51 I32 52 Perl_cast_i32(pTHX_ NV f) 53 { 54 PERL_UNUSED_CONTEXT; 55 if (f < I32_MAX_P1) 56 return f < I32_MIN ? I32_MIN : (I32) f; 57 if (f < U32_MAX_P1) { 58 #if CASTFLAGS & 2 59 if (f < U32_MAX_P1_HALF) 60 return (I32)(U32) f; 61 f -= U32_MAX_P1_HALF; 62 return (I32)(((U32) f) | (1 + U32_MAX >> 1)); 63 #else 64 return (I32)(U32) f; 65 #endif 66 } 67 return f > 0 ? (I32)U32_MAX : 0 /* NaN */; 68 } 69 70 IV 71 Perl_cast_iv(pTHX_ NV f) 72 { 73 PERL_UNUSED_CONTEXT; 74 if (f < IV_MAX_P1) 75 return f < IV_MIN ? IV_MIN : (IV) f; 76 if (f < UV_MAX_P1) { 77 #if CASTFLAGS & 2 78 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */ 79 if (f < UV_MAX_P1_HALF) 80 return (IV)(UV) f; 81 f -= UV_MAX_P1_HALF; 82 return (IV)(((UV) f) | (1 + UV_MAX >> 1)); 83 #else 84 return (IV)(UV) f; 85 #endif 86 } 87 return f > 0 ? (IV)UV_MAX : 0 /* NaN */; 88 } 89 90 UV 91 Perl_cast_uv(pTHX_ NV f) 92 { 93 PERL_UNUSED_CONTEXT; 94 if (f < 0.0) 95 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f; 96 if (f < UV_MAX_P1) { 97 #if CASTFLAGS & 2 98 if (f < UV_MAX_P1_HALF) 99 return (UV) f; 100 f -= UV_MAX_P1_HALF; 101 return ((UV) f) | (1 + UV_MAX >> 1); 102 #else 103 return (UV) f; 104 #endif 105 } 106 return f > 0 ? UV_MAX : 0 /* NaN */; 107 } 108 109 /* 110 =for apidoc grok_bin 111 112 converts a string representing a binary number to numeric form. 113 114 On entry I<start> and I<*len> give the string to scan, I<*flags> gives 115 conversion flags, and I<result> should be NULL or a pointer to an NV. 116 The scan stops at the end of the string, or the first invalid character. 117 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an 118 invalid character will also trigger a warning. 119 On return I<*len> is set to the length of the scanned string, 120 and I<*flags> gives output flags. 121 122 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 123 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin> 124 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 125 and writes the value to I<*result> (or the value is discarded if I<result> 126 is NULL). 127 128 The binary number may optionally be prefixed with "0b" or "b" unless 129 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If 130 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary 131 number may use '_' characters to separate digits. 132 133 =cut 134 135 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 136 which suppresses any message for non-portable numbers that are still valid 137 on this platform. 138 */ 139 140 UV 141 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 142 { 143 const char *s = start; 144 STRLEN len = *len_p; 145 UV value = 0; 146 NV value_nv = 0; 147 148 const UV max_div_2 = UV_MAX / 2; 149 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 150 bool overflowed = FALSE; 151 char bit; 152 153 PERL_ARGS_ASSERT_GROK_BIN; 154 155 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 156 /* strip off leading b or 0b. 157 for compatibility silently suffer "b" and "0b" as valid binary 158 numbers. */ 159 if (len >= 1) { 160 if (s[0] == 'b' || s[0] == 'B') { 161 s++; 162 len--; 163 } 164 else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { 165 s+=2; 166 len-=2; 167 } 168 } 169 } 170 171 for (; len-- && (bit = *s); s++) { 172 if (bit == '0' || bit == '1') { 173 /* Write it in this wonky order with a goto to attempt to get the 174 compiler to make the common case integer-only loop pretty tight. 175 With gcc seems to be much straighter code than old scan_bin. */ 176 redo: 177 if (!overflowed) { 178 if (value <= max_div_2) { 179 value = (value << 1) | (bit - '0'); 180 continue; 181 } 182 /* Bah. We're just overflowed. */ 183 /* diag_listed_as: Integer overflow in %s number */ 184 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 185 "Integer overflow in binary number"); 186 overflowed = TRUE; 187 value_nv = (NV) value; 188 } 189 value_nv *= 2.0; 190 /* If an NV has not enough bits in its mantissa to 191 * represent a UV this summing of small low-order numbers 192 * is a waste of time (because the NV cannot preserve 193 * the low-order bits anyway): we could just remember when 194 * did we overflow and in the end just multiply value_nv by the 195 * right amount. */ 196 value_nv += (NV)(bit - '0'); 197 continue; 198 } 199 if (bit == '_' && len && allow_underscores && (bit = s[1]) 200 && (bit == '0' || bit == '1')) 201 { 202 --len; 203 ++s; 204 goto redo; 205 } 206 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 207 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 208 "Illegal binary digit '%c' ignored", *s); 209 break; 210 } 211 212 if ( ( overflowed && value_nv > 4294967295.0) 213 #if UVSIZE > 4 214 || (!overflowed && value > 0xffffffff 215 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 216 #endif 217 ) { 218 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 219 "Binary number > 0b11111111111111111111111111111111 non-portable"); 220 } 221 *len_p = s - start; 222 if (!overflowed) { 223 *flags = 0; 224 return value; 225 } 226 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 227 if (result) 228 *result = value_nv; 229 return UV_MAX; 230 } 231 232 /* 233 =for apidoc grok_hex 234 235 converts a string representing a hex number to numeric form. 236 237 On entry I<start> and I<*len> give the string to scan, I<*flags> gives 238 conversion flags, and I<result> should be NULL or a pointer to an NV. 239 The scan stops at the end of the string, or the first invalid character. 240 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an 241 invalid character will also trigger a warning. 242 On return I<*len> is set to the length of the scanned string, 243 and I<*flags> gives output flags. 244 245 If the value is <= UV_MAX it is returned as a UV, the output flags are clear, 246 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex> 247 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 248 and writes the value to I<*result> (or the value is discarded if I<result> 249 is NULL). 250 251 The hex number may optionally be prefixed with "0x" or "x" unless 252 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If 253 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex 254 number may use '_' characters to separate digits. 255 256 =cut 257 258 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 259 which suppresses any message for non-portable numbers that are still valid 260 on this platform. 261 */ 262 263 UV 264 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 265 { 266 dVAR; 267 const char *s = start; 268 STRLEN len = *len_p; 269 UV value = 0; 270 NV value_nv = 0; 271 const UV max_div_16 = UV_MAX / 16; 272 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 273 bool overflowed = FALSE; 274 275 PERL_ARGS_ASSERT_GROK_HEX; 276 277 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 278 /* strip off leading x or 0x. 279 for compatibility silently suffer "x" and "0x" as valid hex numbers. 280 */ 281 if (len >= 1) { 282 if (s[0] == 'x' || s[0] == 'X') { 283 s++; 284 len--; 285 } 286 else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { 287 s+=2; 288 len-=2; 289 } 290 } 291 } 292 293 for (; len-- && *s; s++) { 294 const char *hexdigit = strchr(PL_hexdigit, *s); 295 if (hexdigit) { 296 /* Write it in this wonky order with a goto to attempt to get the 297 compiler to make the common case integer-only loop pretty tight. 298 With gcc seems to be much straighter code than old scan_hex. */ 299 redo: 300 if (!overflowed) { 301 if (value <= max_div_16) { 302 value = (value << 4) | ((hexdigit - PL_hexdigit) & 15); 303 continue; 304 } 305 /* Bah. We're just overflowed. */ 306 /* diag_listed_as: Integer overflow in %s number */ 307 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 308 "Integer overflow in hexadecimal number"); 309 overflowed = TRUE; 310 value_nv = (NV) value; 311 } 312 value_nv *= 16.0; 313 /* If an NV has not enough bits in its mantissa to 314 * represent a UV this summing of small low-order numbers 315 * is a waste of time (because the NV cannot preserve 316 * the low-order bits anyway): we could just remember when 317 * did we overflow and in the end just multiply value_nv by the 318 * right amount of 16-tuples. */ 319 value_nv += (NV)((hexdigit - PL_hexdigit) & 15); 320 continue; 321 } 322 if (*s == '_' && len && allow_underscores && s[1] 323 && (hexdigit = strchr(PL_hexdigit, s[1]))) 324 { 325 --len; 326 ++s; 327 goto redo; 328 } 329 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 330 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 331 "Illegal hexadecimal digit '%c' ignored", *s); 332 break; 333 } 334 335 if ( ( overflowed && value_nv > 4294967295.0) 336 #if UVSIZE > 4 337 || (!overflowed && value > 0xffffffff 338 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 339 #endif 340 ) { 341 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 342 "Hexadecimal number > 0xffffffff non-portable"); 343 } 344 *len_p = s - start; 345 if (!overflowed) { 346 *flags = 0; 347 return value; 348 } 349 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 350 if (result) 351 *result = value_nv; 352 return UV_MAX; 353 } 354 355 /* 356 =for apidoc grok_oct 357 358 converts a string representing an octal number to numeric form. 359 360 On entry I<start> and I<*len> give the string to scan, I<*flags> gives 361 conversion flags, and I<result> should be NULL or a pointer to an NV. 362 The scan stops at the end of the string, or the first invalid character. 363 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an 364 8 or 9 will also trigger a warning. 365 On return I<*len> is set to the length of the scanned string, 366 and I<*flags> gives output flags. 367 368 If the value is <= UV_MAX it is returned as a UV, the output flags are clear, 369 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct> 370 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 371 and writes the value to I<*result> (or the value is discarded if I<result> 372 is NULL). 373 374 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal 375 number may use '_' characters to separate digits. 376 377 =cut 378 379 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE> 380 which suppresses any message for non-portable numbers, but which are valid 381 on this platform. 382 */ 383 384 UV 385 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 386 { 387 const char *s = start; 388 STRLEN len = *len_p; 389 UV value = 0; 390 NV value_nv = 0; 391 const UV max_div_8 = UV_MAX / 8; 392 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 393 bool overflowed = FALSE; 394 395 PERL_ARGS_ASSERT_GROK_OCT; 396 397 for (; len-- && *s; s++) { 398 /* gcc 2.95 optimiser not smart enough to figure that this subtraction 399 out front allows slicker code. */ 400 int digit = *s - '0'; 401 if (digit >= 0 && digit <= 7) { 402 /* Write it in this wonky order with a goto to attempt to get the 403 compiler to make the common case integer-only loop pretty tight. 404 */ 405 redo: 406 if (!overflowed) { 407 if (value <= max_div_8) { 408 value = (value << 3) | digit; 409 continue; 410 } 411 /* Bah. We're just overflowed. */ 412 /* diag_listed_as: Integer overflow in %s number */ 413 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 414 "Integer overflow in octal number"); 415 overflowed = TRUE; 416 value_nv = (NV) value; 417 } 418 value_nv *= 8.0; 419 /* If an NV has not enough bits in its mantissa to 420 * represent a UV this summing of small low-order numbers 421 * is a waste of time (because the NV cannot preserve 422 * the low-order bits anyway): we could just remember when 423 * did we overflow and in the end just multiply value_nv by the 424 * right amount of 8-tuples. */ 425 value_nv += (NV)digit; 426 continue; 427 } 428 if (digit == ('_' - '0') && len && allow_underscores 429 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7)) 430 { 431 --len; 432 ++s; 433 goto redo; 434 } 435 /* Allow \octal to work the DWIM way (that is, stop scanning 436 * as soon as non-octal characters are seen, complain only if 437 * someone seems to want to use the digits eight and nine). */ 438 if (digit == 8 || digit == 9) { 439 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 440 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 441 "Illegal octal digit '%c' ignored", *s); 442 } 443 break; 444 } 445 446 if ( ( overflowed && value_nv > 4294967295.0) 447 #if UVSIZE > 4 448 || (!overflowed && value > 0xffffffff 449 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 450 #endif 451 ) { 452 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 453 "Octal number > 037777777777 non-portable"); 454 } 455 *len_p = s - start; 456 if (!overflowed) { 457 *flags = 0; 458 return value; 459 } 460 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 461 if (result) 462 *result = value_nv; 463 return UV_MAX; 464 } 465 466 /* 467 =for apidoc scan_bin 468 469 For backwards compatibility. Use C<grok_bin> instead. 470 471 =for apidoc scan_hex 472 473 For backwards compatibility. Use C<grok_hex> instead. 474 475 =for apidoc scan_oct 476 477 For backwards compatibility. Use C<grok_oct> instead. 478 479 =cut 480 */ 481 482 NV 483 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 484 { 485 NV rnv; 486 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 487 const UV ruv = grok_bin (start, &len, &flags, &rnv); 488 489 PERL_ARGS_ASSERT_SCAN_BIN; 490 491 *retlen = len; 492 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 493 } 494 495 NV 496 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 497 { 498 NV rnv; 499 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 500 const UV ruv = grok_oct (start, &len, &flags, &rnv); 501 502 PERL_ARGS_ASSERT_SCAN_OCT; 503 504 *retlen = len; 505 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 506 } 507 508 NV 509 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 510 { 511 NV rnv; 512 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 513 const UV ruv = grok_hex (start, &len, &flags, &rnv); 514 515 PERL_ARGS_ASSERT_SCAN_HEX; 516 517 *retlen = len; 518 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 519 } 520 521 /* 522 =for apidoc grok_numeric_radix 523 524 Scan and skip for a numeric decimal separator (radix). 525 526 =cut 527 */ 528 bool 529 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) 530 { 531 #ifdef USE_LOCALE_NUMERIC 532 dVAR; 533 534 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX; 535 536 if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) { 537 STRLEN len; 538 const char * const radix = SvPV(PL_numeric_radix_sv, len); 539 if (*sp + len <= send && memEQ(*sp, radix, len)) { 540 *sp += len; 541 return TRUE; 542 } 543 } 544 /* always try "." if numeric radix didn't match because 545 * we may have data from different locales mixed */ 546 #endif 547 548 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX; 549 550 if (*sp < send && **sp == '.') { 551 ++*sp; 552 return TRUE; 553 } 554 return FALSE; 555 } 556 557 /* 558 =for apidoc grok_number 559 560 Recognise (or not) a number. The type of the number is returned 561 (0 if unrecognised), otherwise it is a bit-ORed combination of 562 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT, 563 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h). 564 565 If the value of the number can fit an in UV, it is returned in the *valuep 566 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV 567 will never be set unless *valuep is valid, but *valuep may have been assigned 568 to during processing even though IS_NUMBER_IN_UV is not set on return. 569 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when 570 valuep is non-NULL, but no actual assignment (or SEGV) will occur. 571 572 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were 573 seen (in which case *valuep gives the true value truncated to an integer), and 574 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the 575 absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the 576 number is larger than a UV. 577 578 =cut 579 */ 580 int 581 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) 582 { 583 const char *s = pv; 584 const char * const send = pv + len; 585 const UV max_div_10 = UV_MAX / 10; 586 const char max_mod_10 = UV_MAX % 10; 587 int numtype = 0; 588 int sawinf = 0; 589 int sawnan = 0; 590 591 PERL_ARGS_ASSERT_GROK_NUMBER; 592 593 while (s < send && isSPACE(*s)) 594 s++; 595 if (s == send) { 596 return 0; 597 } else if (*s == '-') { 598 s++; 599 numtype = IS_NUMBER_NEG; 600 } 601 else if (*s == '+') 602 s++; 603 604 if (s == send) 605 return 0; 606 607 /* next must be digit or the radix separator or beginning of infinity */ 608 if (isDIGIT(*s)) { 609 /* UVs are at least 32 bits, so the first 9 decimal digits cannot 610 overflow. */ 611 UV value = *s - '0'; 612 /* This construction seems to be more optimiser friendly. 613 (without it gcc does the isDIGIT test and the *s - '0' separately) 614 With it gcc on arm is managing 6 instructions (6 cycles) per digit. 615 In theory the optimiser could deduce how far to unroll the loop 616 before checking for overflow. */ 617 if (++s < send) { 618 int digit = *s - '0'; 619 if (digit >= 0 && digit <= 9) { 620 value = value * 10 + digit; 621 if (++s < send) { 622 digit = *s - '0'; 623 if (digit >= 0 && digit <= 9) { 624 value = value * 10 + digit; 625 if (++s < send) { 626 digit = *s - '0'; 627 if (digit >= 0 && digit <= 9) { 628 value = value * 10 + digit; 629 if (++s < send) { 630 digit = *s - '0'; 631 if (digit >= 0 && digit <= 9) { 632 value = value * 10 + digit; 633 if (++s < send) { 634 digit = *s - '0'; 635 if (digit >= 0 && digit <= 9) { 636 value = value * 10 + digit; 637 if (++s < send) { 638 digit = *s - '0'; 639 if (digit >= 0 && digit <= 9) { 640 value = value * 10 + digit; 641 if (++s < send) { 642 digit = *s - '0'; 643 if (digit >= 0 && digit <= 9) { 644 value = value * 10 + digit; 645 if (++s < send) { 646 digit = *s - '0'; 647 if (digit >= 0 && digit <= 9) { 648 value = value * 10 + digit; 649 if (++s < send) { 650 /* Now got 9 digits, so need to check 651 each time for overflow. */ 652 digit = *s - '0'; 653 while (digit >= 0 && digit <= 9 654 && (value < max_div_10 655 || (value == max_div_10 656 && digit <= max_mod_10))) { 657 value = value * 10 + digit; 658 if (++s < send) 659 digit = *s - '0'; 660 else 661 break; 662 } 663 if (digit >= 0 && digit <= 9 664 && (s < send)) { 665 /* value overflowed. 666 skip the remaining digits, don't 667 worry about setting *valuep. */ 668 do { 669 s++; 670 } while (s < send && isDIGIT(*s)); 671 numtype |= 672 IS_NUMBER_GREATER_THAN_UV_MAX; 673 goto skip_value; 674 } 675 } 676 } 677 } 678 } 679 } 680 } 681 } 682 } 683 } 684 } 685 } 686 } 687 } 688 } 689 } 690 } 691 } 692 numtype |= IS_NUMBER_IN_UV; 693 if (valuep) 694 *valuep = value; 695 696 skip_value: 697 if (GROK_NUMERIC_RADIX(&s, send)) { 698 numtype |= IS_NUMBER_NOT_INT; 699 while (s < send && isDIGIT(*s)) /* optional digits after the radix */ 700 s++; 701 } 702 } 703 else if (GROK_NUMERIC_RADIX(&s, send)) { 704 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ 705 /* no digits before the radix means we need digits after it */ 706 if (s < send && isDIGIT(*s)) { 707 do { 708 s++; 709 } while (s < send && isDIGIT(*s)); 710 if (valuep) { 711 /* integer approximation is valid - it's 0. */ 712 *valuep = 0; 713 } 714 } 715 else 716 return 0; 717 } else if (*s == 'I' || *s == 'i') { 718 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 719 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; 720 s++; if (s < send && (*s == 'I' || *s == 'i')) { 721 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 722 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; 723 s++; if (s == send || (*s != 'T' && *s != 't')) return 0; 724 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; 725 s++; 726 } 727 sawinf = 1; 728 } else if (*s == 'N' || *s == 'n') { 729 /* XXX TODO: There are signaling NaNs and quiet NaNs. */ 730 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; 731 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 732 s++; 733 sawnan = 1; 734 } else 735 return 0; 736 737 if (sawinf) { 738 numtype &= IS_NUMBER_NEG; /* Keep track of sign */ 739 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; 740 } else if (sawnan) { 741 numtype &= IS_NUMBER_NEG; /* Keep track of sign */ 742 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 743 } else if (s < send) { 744 /* we can have an optional exponent part */ 745 if (*s == 'e' || *s == 'E') { 746 /* The only flag we keep is sign. Blow away any "it's UV" */ 747 numtype &= IS_NUMBER_NEG; 748 numtype |= IS_NUMBER_NOT_INT; 749 s++; 750 if (s < send && (*s == '-' || *s == '+')) 751 s++; 752 if (s < send && isDIGIT(*s)) { 753 do { 754 s++; 755 } while (s < send && isDIGIT(*s)); 756 } 757 else 758 return 0; 759 } 760 } 761 while (s < send && isSPACE(*s)) 762 s++; 763 if (s >= send) 764 return numtype; 765 if (len == 10 && memEQ(pv, "0 but true", 10)) { 766 if (valuep) 767 *valuep = 0; 768 return IS_NUMBER_IN_UV; 769 } 770 return 0; 771 } 772 773 STATIC NV 774 S_mulexp10(NV value, I32 exponent) 775 { 776 NV result = 1.0; 777 NV power = 10.0; 778 bool negative = 0; 779 I32 bit; 780 781 if (exponent == 0) 782 return value; 783 if (value == 0) 784 return (NV)0; 785 786 /* On OpenVMS VAX we by default use the D_FLOAT double format, 787 * and that format does not have *easy* capabilities [1] for 788 * overflowing doubles 'silently' as IEEE fp does. We also need 789 * to support G_FLOAT on both VAX and Alpha, and though the exponent 790 * range is much larger than D_FLOAT it still doesn't do silent 791 * overflow. Therefore we need to detect early whether we would 792 * overflow (this is the behaviour of the native string-to-float 793 * conversion routines, and therefore of native applications, too). 794 * 795 * [1] Trying to establish a condition handler to trap floating point 796 * exceptions is not a good idea. */ 797 798 /* In UNICOS and in certain Cray models (such as T90) there is no 799 * IEEE fp, and no way at all from C to catch fp overflows gracefully. 800 * There is something you can do if you are willing to use some 801 * inline assembler: the instruction is called DFI-- but that will 802 * disable *all* floating point interrupts, a little bit too large 803 * a hammer. Therefore we need to catch potential overflows before 804 * it's too late. */ 805 806 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP) 807 STMT_START { 808 const NV exp_v = log10(value); 809 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP) 810 return NV_MAX; 811 if (exponent < 0) { 812 if (-(exponent + exp_v) >= NV_MAX_10_EXP) 813 return 0.0; 814 while (-exponent >= NV_MAX_10_EXP) { 815 /* combination does not overflow, but 10^(-exponent) does */ 816 value /= 10; 817 ++exponent; 818 } 819 } 820 } STMT_END; 821 #endif 822 823 if (exponent < 0) { 824 negative = 1; 825 exponent = -exponent; 826 } 827 for (bit = 1; exponent; bit <<= 1) { 828 if (exponent & bit) { 829 exponent ^= bit; 830 result *= power; 831 /* Floating point exceptions are supposed to be turned off, 832 * but if we're obviously done, don't risk another iteration. 833 */ 834 if (exponent == 0) break; 835 } 836 power *= power; 837 } 838 return negative ? value / result : value * result; 839 } 840 841 NV 842 Perl_my_atof(pTHX_ const char* s) 843 { 844 NV x = 0.0; 845 #ifdef USE_LOCALE_NUMERIC 846 dVAR; 847 848 PERL_ARGS_ASSERT_MY_ATOF; 849 850 if (PL_numeric_local && PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) { 851 const char *standard = NULL, *local = NULL; 852 bool use_standard_radix; 853 854 /* Look through the string for the first thing that looks like a 855 * decimal point: either the value in the current locale or the 856 * standard fallback of '.'. The one which appears earliest in the 857 * input string is the one that we should have atof look for. Note that 858 * we have to determine this beforehand because on some systems, 859 * Perl_atof2 is just a wrapper around the system's atof. */ 860 standard = strchr(s, '.'); 861 local = strstr(s, SvPV_nolen(PL_numeric_radix_sv)); 862 863 use_standard_radix = standard && (!local || standard < local); 864 865 if (use_standard_radix) 866 SET_NUMERIC_STANDARD(); 867 868 Perl_atof2(s, x); 869 870 if (use_standard_radix) 871 SET_NUMERIC_LOCAL(); 872 } 873 else 874 Perl_atof2(s, x); 875 #else 876 Perl_atof2(s, x); 877 #endif 878 return x; 879 } 880 881 char* 882 Perl_my_atof2(pTHX_ const char* orig, NV* value) 883 { 884 NV result[3] = {0.0, 0.0, 0.0}; 885 const char* s = orig; 886 #ifdef USE_PERL_ATOF 887 UV accumulator[2] = {0,0}; /* before/after dp */ 888 bool negative = 0; 889 const char* send = s + strlen(orig) - 1; 890 bool seen_digit = 0; 891 I32 exp_adjust[2] = {0,0}; 892 I32 exp_acc[2] = {-1, -1}; 893 /* the current exponent adjust for the accumulators */ 894 I32 exponent = 0; 895 I32 seen_dp = 0; 896 I32 digit = 0; 897 I32 old_digit = 0; 898 I32 sig_digits = 0; /* noof significant digits seen so far */ 899 900 PERL_ARGS_ASSERT_MY_ATOF2; 901 902 /* There is no point in processing more significant digits 903 * than the NV can hold. Note that NV_DIG is a lower-bound value, 904 * while we need an upper-bound value. We add 2 to account for this; 905 * since it will have been conservative on both the first and last digit. 906 * For example a 32-bit mantissa with an exponent of 4 would have 907 * exact values in the set 908 * 4 909 * 8 910 * .. 911 * 17179869172 912 * 17179869176 913 * 17179869180 914 * 915 * where for the purposes of calculating NV_DIG we would have to discount 916 * both the first and last digit, since neither can hold all values from 917 * 0..9; but for calculating the value we must examine those two digits. 918 */ 919 #ifdef MAX_SIG_DIG_PLUS 920 /* It is not necessarily the case that adding 2 to NV_DIG gets all the 921 possible digits in a NV, especially if NVs are not IEEE compliant 922 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */ 923 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS) 924 #else 925 # define MAX_SIG_DIGITS (NV_DIG+2) 926 #endif 927 928 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */ 929 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10)) 930 931 /* leading whitespace */ 932 while (isSPACE(*s)) 933 ++s; 934 935 /* sign */ 936 switch (*s) { 937 case '-': 938 negative = 1; 939 /* fall through */ 940 case '+': 941 ++s; 942 } 943 944 /* punt to strtod for NaN/Inf; if no support for it there, tough luck */ 945 946 #ifdef HAS_STRTOD 947 if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') { 948 const char *p = negative ? s - 1 : s; 949 char *endp; 950 NV rslt; 951 rslt = strtod(p, &endp); 952 if (endp != p) { 953 *value = rslt; 954 return (char *)endp; 955 } 956 } 957 #endif 958 959 /* we accumulate digits into an integer; when this becomes too 960 * large, we add the total to NV and start again */ 961 962 while (1) { 963 if (isDIGIT(*s)) { 964 seen_digit = 1; 965 old_digit = digit; 966 digit = *s++ - '0'; 967 if (seen_dp) 968 exp_adjust[1]++; 969 970 /* don't start counting until we see the first significant 971 * digit, eg the 5 in 0.00005... */ 972 if (!sig_digits && digit == 0) 973 continue; 974 975 if (++sig_digits > MAX_SIG_DIGITS) { 976 /* limits of precision reached */ 977 if (digit > 5) { 978 ++accumulator[seen_dp]; 979 } else if (digit == 5) { 980 if (old_digit % 2) { /* round to even - Allen */ 981 ++accumulator[seen_dp]; 982 } 983 } 984 if (seen_dp) { 985 exp_adjust[1]--; 986 } else { 987 exp_adjust[0]++; 988 } 989 /* skip remaining digits */ 990 while (isDIGIT(*s)) { 991 ++s; 992 if (! seen_dp) { 993 exp_adjust[0]++; 994 } 995 } 996 /* warn of loss of precision? */ 997 } 998 else { 999 if (accumulator[seen_dp] > MAX_ACCUMULATE) { 1000 /* add accumulator to result and start again */ 1001 result[seen_dp] = S_mulexp10(result[seen_dp], 1002 exp_acc[seen_dp]) 1003 + (NV)accumulator[seen_dp]; 1004 accumulator[seen_dp] = 0; 1005 exp_acc[seen_dp] = 0; 1006 } 1007 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit; 1008 ++exp_acc[seen_dp]; 1009 } 1010 } 1011 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) { 1012 seen_dp = 1; 1013 if (sig_digits > MAX_SIG_DIGITS) { 1014 do { 1015 ++s; 1016 } while (isDIGIT(*s)); 1017 break; 1018 } 1019 } 1020 else { 1021 break; 1022 } 1023 } 1024 1025 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0]; 1026 if (seen_dp) { 1027 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1]; 1028 } 1029 1030 if (seen_digit && (*s == 'e' || *s == 'E')) { 1031 bool expnegative = 0; 1032 1033 ++s; 1034 switch (*s) { 1035 case '-': 1036 expnegative = 1; 1037 /* fall through */ 1038 case '+': 1039 ++s; 1040 } 1041 while (isDIGIT(*s)) 1042 exponent = exponent * 10 + (*s++ - '0'); 1043 if (expnegative) 1044 exponent = -exponent; 1045 } 1046 1047 1048 1049 /* now apply the exponent */ 1050 1051 if (seen_dp) { 1052 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]) 1053 + S_mulexp10(result[1],exponent-exp_adjust[1]); 1054 } else { 1055 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]); 1056 } 1057 1058 /* now apply the sign */ 1059 if (negative) 1060 result[2] = -result[2]; 1061 #endif /* USE_PERL_ATOF */ 1062 *value = result[2]; 1063 return (char *)s; 1064 } 1065 1066 #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL) 1067 long double 1068 Perl_my_modfl(long double x, long double *ip) 1069 { 1070 *ip = aintl(x); 1071 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1072 } 1073 #endif 1074 1075 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL) 1076 long double 1077 Perl_my_frexpl(long double x, int *e) { 1078 *e = x == 0.0L ? 0 : ilogbl(x) + 1; 1079 return (scalbnl(x, -*e)); 1080 } 1081 #endif 1082 1083 /* 1084 =for apidoc Perl_signbit 1085 1086 Return a non-zero integer if the sign bit on an NV is set, and 0 if 1087 it is not. 1088 1089 If Configure detects this system has a signbit() that will work with 1090 our NVs, then we just use it via the #define in perl.h. Otherwise, 1091 fall back on this implementation. As a first pass, this gets everything 1092 right except -0.0. Alas, catching -0.0 is the main use for this function, 1093 so this is not too helpful yet. Still, at least we have the scaffolding 1094 in place to support other systems, should that prove useful. 1095 1096 1097 Configure notes: This function is called 'Perl_signbit' instead of a 1098 plain 'signbit' because it is easy to imagine a system having a signbit() 1099 function or macro that doesn't happen to work with our particular choice 1100 of NVs. We shouldn't just re-#define signbit as Perl_signbit and expect 1101 the standard system headers to be happy. Also, this is a no-context 1102 function (no pTHX_) because Perl_signbit() is usually re-#defined in 1103 perl.h as a simple macro call to the system's signbit(). 1104 Users should just always call Perl_signbit(). 1105 1106 =cut 1107 */ 1108 #if !defined(HAS_SIGNBIT) 1109 int 1110 Perl_signbit(NV x) { 1111 return (x < 0.0) ? 1 : 0; 1112 } 1113 #endif 1114 1115 /* 1116 * Local variables: 1117 * c-indentation-style: bsd 1118 * c-basic-offset: 4 1119 * indent-tabs-mode: nil 1120 * End: 1121 * 1122 * ex: set ts=8 sts=4 sw=4 et: 1123 */ 1124