1################################################################################ 2## 3## Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz. 4## Version 2.x, Copyright (C) 2001, Paul Marquess. 5## Version 1.x, Copyright (C) 1999, Kenneth Albanowski. 6## 7## This program is free software; you can redistribute it and/or 8## modify it under the same terms as Perl itself. 9## 10################################################################################ 11 12=provides 13 14grok_hex 15grok_oct 16grok_bin 17grok_numeric_radix 18grok_number 19__UNDEFINED__ 20 21=implementation 22 23__UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling) 24__UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE) 25__UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE) 26__UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) 27 28__UNDEFINED__ IS_NUMBER_IN_UV 0x01 29__UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02 30__UNDEFINED__ IS_NUMBER_NOT_INT 0x04 31__UNDEFINED__ IS_NUMBER_NEG 0x08 32__UNDEFINED__ IS_NUMBER_INFINITY 0x10 33__UNDEFINED__ IS_NUMBER_NAN 0x20 34 35__UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) 36 37__UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02 38__UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04 39__UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01 40__UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02 41 42#ifndef grok_numeric_radix 43#if { NEED grok_numeric_radix } 44bool 45grok_numeric_radix(pTHX_ const char **sp, const char *send) 46{ 47#ifdef USE_LOCALE_NUMERIC 48#ifdef PL_numeric_radix_sv 49 if (PL_numeric_radix_sv && IN_LOCALE) { 50 STRLEN len; 51 char* radix = SvPV(PL_numeric_radix_sv, len); 52 if (*sp + len <= send && memEQ(*sp, radix, len)) { 53 *sp += len; 54 return TRUE; 55 } 56 } 57#else 58 /* older perls don't have PL_numeric_radix_sv so the radix 59 * must manually be requested from locale.h 60 */ 61#include <locale.h> 62 dTHR; /* needed for older threaded perls */ 63 struct lconv *lc = localeconv(); 64 char *radix = lc->decimal_point; 65 if (radix && IN_LOCALE) { 66 STRLEN len = strlen(radix); 67 if (*sp + len <= send && memEQ(*sp, radix, len)) { 68 *sp += len; 69 return TRUE; 70 } 71 } 72#endif 73#endif /* USE_LOCALE_NUMERIC */ 74 /* always try "." if numeric radix didn't match because 75 * we may have data from different locales mixed */ 76 if (*sp < send && **sp == '.') { 77 ++*sp; 78 return TRUE; 79 } 80 return FALSE; 81} 82#endif 83#endif 84 85#ifndef grok_number 86#if { NEED grok_number } 87int 88grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) 89{ 90 const char *s = pv; 91 const char *send = pv + len; 92 const UV max_div_10 = UV_MAX / 10; 93 const char max_mod_10 = UV_MAX % 10; 94 int numtype = 0; 95 int sawinf = 0; 96 int sawnan = 0; 97 98 while (s < send && isSPACE(*s)) 99 s++; 100 if (s == send) { 101 return 0; 102 } else if (*s == '-') { 103 s++; 104 numtype = IS_NUMBER_NEG; 105 } 106 else if (*s == '+') 107 s++; 108 109 if (s == send) 110 return 0; 111 112 /* next must be digit or the radix separator or beginning of infinity */ 113 if (isDIGIT(*s)) { 114 /* UVs are at least 32 bits, so the first 9 decimal digits cannot 115 overflow. */ 116 UV value = *s - '0'; 117 /* This construction seems to be more optimiser friendly. 118 (without it gcc does the isDIGIT test and the *s - '0' separately) 119 With it gcc on arm is managing 6 instructions (6 cycles) per digit. 120 In theory the optimiser could deduce how far to unroll the loop 121 before checking for overflow. */ 122 if (++s < send) { 123 int digit = *s - '0'; 124 if (digit >= 0 && digit <= 9) { 125 value = value * 10 + digit; 126 if (++s < send) { 127 digit = *s - '0'; 128 if (digit >= 0 && digit <= 9) { 129 value = value * 10 + digit; 130 if (++s < send) { 131 digit = *s - '0'; 132 if (digit >= 0 && digit <= 9) { 133 value = value * 10 + digit; 134 if (++s < send) { 135 digit = *s - '0'; 136 if (digit >= 0 && digit <= 9) { 137 value = value * 10 + digit; 138 if (++s < send) { 139 digit = *s - '0'; 140 if (digit >= 0 && digit <= 9) { 141 value = value * 10 + digit; 142 if (++s < send) { 143 digit = *s - '0'; 144 if (digit >= 0 && digit <= 9) { 145 value = value * 10 + digit; 146 if (++s < send) { 147 digit = *s - '0'; 148 if (digit >= 0 && digit <= 9) { 149 value = value * 10 + digit; 150 if (++s < send) { 151 digit = *s - '0'; 152 if (digit >= 0 && digit <= 9) { 153 value = value * 10 + digit; 154 if (++s < send) { 155 /* Now got 9 digits, so need to check 156 each time for overflow. */ 157 digit = *s - '0'; 158 while (digit >= 0 && digit <= 9 159 && (value < max_div_10 160 || (value == max_div_10 161 && digit <= max_mod_10))) { 162 value = value * 10 + digit; 163 if (++s < send) 164 digit = *s - '0'; 165 else 166 break; 167 } 168 if (digit >= 0 && digit <= 9 169 && (s < send)) { 170 /* value overflowed. 171 skip the remaining digits, don't 172 worry about setting *valuep. */ 173 do { 174 s++; 175 } while (s < send && isDIGIT(*s)); 176 numtype |= 177 IS_NUMBER_GREATER_THAN_UV_MAX; 178 goto skip_value; 179 } 180 } 181 } 182 } 183 } 184 } 185 } 186 } 187 } 188 } 189 } 190 } 191 } 192 } 193 } 194 } 195 } 196 } 197 numtype |= IS_NUMBER_IN_UV; 198 if (valuep) 199 *valuep = value; 200 201 skip_value: 202 if (GROK_NUMERIC_RADIX(&s, send)) { 203 numtype |= IS_NUMBER_NOT_INT; 204 while (s < send && isDIGIT(*s)) /* optional digits after the radix */ 205 s++; 206 } 207 } 208 else if (GROK_NUMERIC_RADIX(&s, send)) { 209 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ 210 /* no digits before the radix means we need digits after it */ 211 if (s < send && isDIGIT(*s)) { 212 do { 213 s++; 214 } while (s < send && isDIGIT(*s)); 215 if (valuep) { 216 /* integer approximation is valid - it's 0. */ 217 *valuep = 0; 218 } 219 } 220 else 221 return 0; 222 } else if (*s == 'I' || *s == 'i') { 223 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 224 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; 225 s++; if (s < send && (*s == 'I' || *s == 'i')) { 226 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 227 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; 228 s++; if (s == send || (*s != 'T' && *s != 't')) return 0; 229 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; 230 s++; 231 } 232 sawinf = 1; 233 } else if (*s == 'N' || *s == 'n') { 234 /* XXX TODO: There are signaling NaNs and quiet NaNs. */ 235 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; 236 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; 237 s++; 238 sawnan = 1; 239 } else 240 return 0; 241 242 if (sawinf) { 243 numtype &= IS_NUMBER_NEG; /* Keep track of sign */ 244 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; 245 } else if (sawnan) { 246 numtype &= IS_NUMBER_NEG; /* Keep track of sign */ 247 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 248 } else if (s < send) { 249 /* we can have an optional exponent part */ 250 if (*s == 'e' || *s == 'E') { 251 /* The only flag we keep is sign. Blow away any "it's UV" */ 252 numtype &= IS_NUMBER_NEG; 253 numtype |= IS_NUMBER_NOT_INT; 254 s++; 255 if (s < send && (*s == '-' || *s == '+')) 256 s++; 257 if (s < send && isDIGIT(*s)) { 258 do { 259 s++; 260 } while (s < send && isDIGIT(*s)); 261 } 262 else 263 return 0; 264 } 265 } 266 while (s < send && isSPACE(*s)) 267 s++; 268 if (s >= send) 269 return numtype; 270 if (len == 10 && memEQ(pv, "0 but true", 10)) { 271 if (valuep) 272 *valuep = 0; 273 return IS_NUMBER_IN_UV; 274 } 275 return 0; 276} 277#endif 278#endif 279 280/* 281 * The grok_* routines have been modified to use warn() instead of 282 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit, 283 * which is why the stack variable has been renamed to 'xdigit'. 284 */ 285 286#ifndef grok_bin 287#if { NEED grok_bin } 288UV 289grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 290{ 291 const char *s = start; 292 STRLEN len = *len_p; 293 UV value = 0; 294 NV value_nv = 0; 295 296 const UV max_div_2 = UV_MAX / 2; 297 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; 298 bool overflowed = FALSE; 299 300 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 301 /* strip off leading b or 0b. 302 for compatibility silently suffer "b" and "0b" as valid binary 303 numbers. */ 304 if (len >= 1) { 305 if (s[0] == 'b') { 306 s++; 307 len--; 308 } 309 else if (len >= 2 && s[0] == '0' && s[1] == 'b') { 310 s+=2; 311 len-=2; 312 } 313 } 314 } 315 316 for (; len-- && *s; s++) { 317 char bit = *s; 318 if (bit == '0' || bit == '1') { 319 /* Write it in this wonky order with a goto to attempt to get the 320 compiler to make the common case integer-only loop pretty tight. 321 With gcc seems to be much straighter code than old scan_bin. */ 322 redo: 323 if (!overflowed) { 324 if (value <= max_div_2) { 325 value = (value << 1) | (bit - '0'); 326 continue; 327 } 328 /* Bah. We're just overflowed. */ 329 warn("Integer overflow in binary number"); 330 overflowed = TRUE; 331 value_nv = (NV) value; 332 } 333 value_nv *= 2.0; 334 /* If an NV has not enough bits in its mantissa to 335 * represent a UV this summing of small low-order numbers 336 * is a waste of time (because the NV cannot preserve 337 * the low-order bits anyway): we could just remember when 338 * did we overflow and in the end just multiply value_nv by the 339 * right amount. */ 340 value_nv += (NV)(bit - '0'); 341 continue; 342 } 343 if (bit == '_' && len && allow_underscores && (bit = s[1]) 344 && (bit == '0' || bit == '1')) 345 { 346 --len; 347 ++s; 348 goto redo; 349 } 350 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 351 warn("Illegal binary digit '%c' ignored", *s); 352 break; 353 } 354 355 if ( ( overflowed && value_nv > 4294967295.0) 356#if UVSIZE > 4 357 || (!overflowed && value > 0xffffffff ) 358#endif 359 ) { 360 warn("Binary number > 0b11111111111111111111111111111111 non-portable"); 361 } 362 *len_p = s - start; 363 if (!overflowed) { 364 *flags = 0; 365 return value; 366 } 367 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 368 if (result) 369 *result = value_nv; 370 return UV_MAX; 371} 372#endif 373#endif 374 375#ifndef grok_hex 376#if { NEED grok_hex } 377UV 378grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 379{ 380 const char *s = start; 381 STRLEN len = *len_p; 382 UV value = 0; 383 NV value_nv = 0; 384 385 const UV max_div_16 = UV_MAX / 16; 386 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; 387 bool overflowed = FALSE; 388 const char *xdigit; 389 390 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 391 /* strip off leading x or 0x. 392 for compatibility silently suffer "x" and "0x" as valid hex numbers. 393 */ 394 if (len >= 1) { 395 if (s[0] == 'x') { 396 s++; 397 len--; 398 } 399 else if (len >= 2 && s[0] == '0' && s[1] == 'x') { 400 s+=2; 401 len-=2; 402 } 403 } 404 } 405 406 for (; len-- && *s; s++) { 407 xdigit = strchr((char *) PL_hexdigit, *s); 408 if (xdigit) { 409 /* Write it in this wonky order with a goto to attempt to get the 410 compiler to make the common case integer-only loop pretty tight. 411 With gcc seems to be much straighter code than old scan_hex. */ 412 redo: 413 if (!overflowed) { 414 if (value <= max_div_16) { 415 value = (value << 4) | ((xdigit - PL_hexdigit) & 15); 416 continue; 417 } 418 warn("Integer overflow in hexadecimal number"); 419 overflowed = TRUE; 420 value_nv = (NV) value; 421 } 422 value_nv *= 16.0; 423 /* If an NV has not enough bits in its mantissa to 424 * represent a UV this summing of small low-order numbers 425 * is a waste of time (because the NV cannot preserve 426 * the low-order bits anyway): we could just remember when 427 * did we overflow and in the end just multiply value_nv by the 428 * right amount of 16-tuples. */ 429 value_nv += (NV)((xdigit - PL_hexdigit) & 15); 430 continue; 431 } 432 if (*s == '_' && len && allow_underscores && s[1] 433 && (xdigit = strchr((char *) PL_hexdigit, s[1]))) 434 { 435 --len; 436 ++s; 437 goto redo; 438 } 439 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 440 warn("Illegal hexadecimal digit '%c' ignored", *s); 441 break; 442 } 443 444 if ( ( overflowed && value_nv > 4294967295.0) 445#if UVSIZE > 4 446 || (!overflowed && value > 0xffffffff ) 447#endif 448 ) { 449 warn("Hexadecimal number > 0xffffffff non-portable"); 450 } 451 *len_p = s - start; 452 if (!overflowed) { 453 *flags = 0; 454 return value; 455 } 456 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 457 if (result) 458 *result = value_nv; 459 return UV_MAX; 460} 461#endif 462#endif 463 464#ifndef grok_oct 465#if { NEED grok_oct } 466UV 467grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 468{ 469 const char *s = start; 470 STRLEN len = *len_p; 471 UV value = 0; 472 NV value_nv = 0; 473 474 const UV max_div_8 = UV_MAX / 8; 475 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; 476 bool overflowed = FALSE; 477 478 for (; len-- && *s; s++) { 479 /* gcc 2.95 optimiser not smart enough to figure that this subtraction 480 out front allows slicker code. */ 481 int digit = *s - '0'; 482 if (digit >= 0 && digit <= 7) { 483 /* Write it in this wonky order with a goto to attempt to get the 484 compiler to make the common case integer-only loop pretty tight. 485 */ 486 redo: 487 if (!overflowed) { 488 if (value <= max_div_8) { 489 value = (value << 3) | digit; 490 continue; 491 } 492 /* Bah. We're just overflowed. */ 493 warn("Integer overflow in octal number"); 494 overflowed = TRUE; 495 value_nv = (NV) value; 496 } 497 value_nv *= 8.0; 498 /* If an NV has not enough bits in its mantissa to 499 * represent a UV this summing of small low-order numbers 500 * is a waste of time (because the NV cannot preserve 501 * the low-order bits anyway): we could just remember when 502 * did we overflow and in the end just multiply value_nv by the 503 * right amount of 8-tuples. */ 504 value_nv += (NV)digit; 505 continue; 506 } 507 if (digit == ('_' - '0') && len && allow_underscores 508 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7)) 509 { 510 --len; 511 ++s; 512 goto redo; 513 } 514 /* Allow \octal to work the DWIM way (that is, stop scanning 515 * as soon as non-octal characters are seen, complain only iff 516 * someone seems to want to use the digits eight and nine). */ 517 if (digit == 8 || digit == 9) { 518 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 519 warn("Illegal octal digit '%c' ignored", *s); 520 } 521 break; 522 } 523 524 if ( ( overflowed && value_nv > 4294967295.0) 525#if UVSIZE > 4 526 || (!overflowed && value > 0xffffffff ) 527#endif 528 ) { 529 warn("Octal number > 037777777777 non-portable"); 530 } 531 *len_p = s - start; 532 if (!overflowed) { 533 *flags = 0; 534 return value; 535 } 536 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 537 if (result) 538 *result = value_nv; 539 return UV_MAX; 540} 541#endif 542#endif 543 544=xsinit 545 546#define NEED_grok_number 547#define NEED_grok_numeric_radix 548#define NEED_grok_bin 549#define NEED_grok_hex 550#define NEED_grok_oct 551 552=xsubs 553 554UV 555grok_number(string) 556 SV *string 557 PREINIT: 558 const char *pv; 559 STRLEN len; 560 CODE: 561 pv = SvPV(string, len); 562 if (!grok_number(pv, len, &RETVAL)) 563 XSRETURN_UNDEF; 564 OUTPUT: 565 RETVAL 566 567UV 568grok_bin(string) 569 SV *string 570 PREINIT: 571 char *pv; 572 I32 flags = 0; 573 STRLEN len; 574 CODE: 575 pv = SvPV(string, len); 576 RETVAL = grok_bin(pv, &len, &flags, NULL); 577 OUTPUT: 578 RETVAL 579 580UV 581grok_hex(string) 582 SV *string 583 PREINIT: 584 char *pv; 585 I32 flags = 0; 586 STRLEN len; 587 CODE: 588 pv = SvPV(string, len); 589 RETVAL = grok_hex(pv, &len, &flags, NULL); 590 OUTPUT: 591 RETVAL 592 593UV 594grok_oct(string) 595 SV *string 596 PREINIT: 597 char *pv; 598 I32 flags = 0; 599 STRLEN len; 600 CODE: 601 pv = SvPV(string, len); 602 RETVAL = grok_oct(pv, &len, &flags, NULL); 603 OUTPUT: 604 RETVAL 605 606UV 607Perl_grok_number(string) 608 SV *string 609 PREINIT: 610 const char *pv; 611 STRLEN len; 612 CODE: 613 pv = SvPV(string, len); 614 if (!Perl_grok_number(aTHX_ pv, len, &RETVAL)) 615 XSRETURN_UNDEF; 616 OUTPUT: 617 RETVAL 618 619UV 620Perl_grok_bin(string) 621 SV *string 622 PREINIT: 623 char *pv; 624 I32 flags = 0; 625 STRLEN len; 626 CODE: 627 pv = SvPV(string, len); 628 RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL); 629 OUTPUT: 630 RETVAL 631 632UV 633Perl_grok_hex(string) 634 SV *string 635 PREINIT: 636 char *pv; 637 I32 flags = 0; 638 STRLEN len; 639 CODE: 640 pv = SvPV(string, len); 641 RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL); 642 OUTPUT: 643 RETVAL 644 645UV 646Perl_grok_oct(string) 647 SV *string 648 PREINIT: 649 char *pv; 650 I32 flags = 0; 651 STRLEN len; 652 CODE: 653 pv = SvPV(string, len); 654 RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL); 655 OUTPUT: 656 RETVAL 657 658=tests plan => 10 659 660is(&Devel::PPPort::grok_number("42"), 42); 661ok(!defined(&Devel::PPPort::grok_number("A"))); 662is(&Devel::PPPort::grok_bin("10000001"), 129); 663is(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef); 664is(&Devel::PPPort::grok_oct("377"), 255); 665 666is(&Devel::PPPort::Perl_grok_number("42"), 42); 667ok(!defined(&Devel::PPPort::Perl_grok_number("A"))); 668is(&Devel::PPPort::Perl_grok_bin("10000001"), 129); 669is(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef); 670is(&Devel::PPPort::Perl_grok_oct("377"), 255); 671