1 /*    numeric.c
2  *
3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * "That only makes eleven (plus one mislaid) and not fourteen,
13  *  unless wizards count differently to other people."  --Beorn
14  *
15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
16  */
17 
18 /*
19 
20 This file contains all the stuff needed by perl for manipulating numeric
21 values, including such things as replacements for the OS's atof() function
22 
23 */
24 
25 #include "EXTERN.h"
26 #define PERL_IN_NUMERIC_C
27 #include "perl.h"
28 
29 #ifdef Perl_strtod
30 
31 PERL_STATIC_INLINE NV
S_strtod(pTHX_ const char * const s,char ** e)32 S_strtod(pTHX_ const char * const s, char ** e)
33 {
34     DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
35     NV result;
36 
37     STORE_LC_NUMERIC_SET_TO_NEEDED();
38 
39 #  ifdef USE_QUADMATH
40 
41     result = strtoflt128(s, e);
42 
43 #  elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE)    \
44                              && defined(USE_LONG_DOUBLE)
45 #    if defined(__MINGW64_VERSION_MAJOR)
46       /***********************************************
47        We are unable to use strtold because of
48         https://sourceforge.net/p/mingw-w64/bugs/711/
49         &
50         https://sourceforge.net/p/mingw-w64/bugs/725/
51 
52        but __mingw_strtold is fine.
53       ***********************************************/
54 
55     result = __mingw_strtold(s, e);
56 
57 #    else
58 
59     result = strtold(s, e);
60 
61 #    endif
62 #  elif defined(HAS_STRTOD)
63 
64     result = strtod(s, e);
65 
66 #  else
67 #    error No strtod() equivalent found
68 #  endif
69 
70     RESTORE_LC_NUMERIC();
71 
72     return result;
73 }
74 
75 #endif  /* #ifdef Perl_strtod */
76 
77 /*
78 
79 =for apidoc my_strtod
80 
81 This function is equivalent to the libc strtod() function, and is available
82 even on platforms that lack plain strtod().  Its return value is the best
83 available precision depending on platform capabilities and F<Configure>
84 options.
85 
86 It properly handles the locale radix character, meaning it expects a dot except
87 when called from within the scope of S<C<use locale>>, in which case the radix
88 character should be that specified by the current locale.
89 
90 The synonym Strtod() may be used instead.
91 
92 =cut
93 
94 */
95 
96 NV
Perl_my_strtod(const char * const s,char ** e)97 Perl_my_strtod(const char * const s, char **e)
98 {
99     dTHX;
100 
101     PERL_ARGS_ASSERT_MY_STRTOD;
102 
103 #ifdef Perl_strtod
104 
105     return S_strtod(aTHX_ s, e);
106 
107 #else
108 
109     {
110         NV result;
111         char ** end_ptr = NULL;
112 
113         *end_ptr = my_atof2(s, &result);
114         if (e) {
115             *e = *end_ptr;
116         }
117 
118         if (! *end_ptr) {
119             result = 0.0;
120         }
121 
122         return result;
123     }
124 
125 #endif
126 
127 }
128 
129 
130 U32
Perl_cast_ulong(NV f)131 Perl_cast_ulong(NV f)
132 {
133   if (f < 0.0)
134     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
135   if (f < U32_MAX_P1) {
136 #if CASTFLAGS & 2
137     if (f < U32_MAX_P1_HALF)
138       return (U32) f;
139     f -= U32_MAX_P1_HALF;
140     return ((U32) f) | (1 + (U32_MAX >> 1));
141 #else
142     return (U32) f;
143 #endif
144   }
145   return f > 0 ? U32_MAX : 0 /* NaN */;
146 }
147 
148 I32
Perl_cast_i32(NV f)149 Perl_cast_i32(NV f)
150 {
151   if (f < I32_MAX_P1)
152     return f < I32_MIN ? I32_MIN : (I32) f;
153   if (f < U32_MAX_P1) {
154 #if CASTFLAGS & 2
155     if (f < U32_MAX_P1_HALF)
156       return (I32)(U32) f;
157     f -= U32_MAX_P1_HALF;
158     return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
159 #else
160     return (I32)(U32) f;
161 #endif
162   }
163   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
164 }
165 
166 IV
Perl_cast_iv(NV f)167 Perl_cast_iv(NV f)
168 {
169   if (f < IV_MAX_P1)
170     return f < IV_MIN ? IV_MIN : (IV) f;
171   if (f < UV_MAX_P1) {
172 #if CASTFLAGS & 2
173     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
174     if (f < UV_MAX_P1_HALF)
175       return (IV)(UV) f;
176     f -= UV_MAX_P1_HALF;
177     return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
178 #else
179     return (IV)(UV) f;
180 #endif
181   }
182   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
183 }
184 
185 UV
Perl_cast_uv(NV f)186 Perl_cast_uv(NV f)
187 {
188   if (f < 0.0)
189     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
190   if (f < UV_MAX_P1) {
191 #if CASTFLAGS & 2
192     if (f < UV_MAX_P1_HALF)
193       return (UV) f;
194     f -= UV_MAX_P1_HALF;
195     return ((UV) f) | (1 + (UV_MAX >> 1));
196 #else
197     return (UV) f;
198 #endif
199   }
200   return f > 0 ? UV_MAX : 0 /* NaN */;
201 }
202 
203 /*
204 =for apidoc grok_bin
205 
206 converts a string representing a binary number to numeric form.
207 
208 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
209 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.  The
210 scan stops at the end of the string, or at just before the first invalid
211 character.  Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
212 encountering an invalid character (except NUL) will also trigger a warning.  On
213 return C<*len_p> is set to the length of the scanned string, and C<*flags>
214 gives output flags.
215 
216 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
217 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_bin>
218 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
219 and writes an approximation of the correct value into C<*result> (which is an
220 NV; or the approximation is discarded if C<result> is NULL).
221 
222 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
223 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
224 
225 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
226 digits may be separated from each other by a single underscore; also a single
227 leading underscore is accepted.
228 
229 =for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
230 =for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
231 =for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
232 =for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
233 
234 =cut
235 
236 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
237 which suppresses any message for non-portable numbers that are still valid
238 on this platform.
239  */
240 
241 UV
Perl_grok_bin(pTHX_ const char * start,STRLEN * len_p,I32 * flags,NV * result)242 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
243 {
244     PERL_ARGS_ASSERT_GROK_BIN;
245 
246     return grok_bin(start, len_p, flags, result);
247 }
248 
249 /*
250 =for apidoc grok_hex
251 
252 converts a string representing a hex number to numeric form.
253 
254 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
255 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.  The
256 scan stops at the end of the string, or at just before the first invalid
257 character.  Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
258 encountering an invalid character (except NUL) will also trigger a warning.  On
259 return C<*len_p> is set to the length of the scanned string, and C<*flags>
260 gives output flags.
261 
262 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
263 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_hex>
264 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
265 and writes an approximation of the correct value into C<*result> (which is an
266 NV; or the approximation is discarded if C<result> is NULL).
267 
268 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
269 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
270 
271 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
272 digits may be separated from each other by a single underscore; also a single
273 leading underscore is accepted.
274 
275 =cut
276 
277 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
278 which suppresses any message for non-portable numbers, but which are valid
279 on this platform.  But, C<*flags>  will have the corresponding flag bit set.
280  */
281 
282 UV
Perl_grok_hex(pTHX_ const char * start,STRLEN * len_p,I32 * flags,NV * result)283 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
284 {
285     PERL_ARGS_ASSERT_GROK_HEX;
286 
287     return grok_hex(start, len_p, flags, result);
288 }
289 
290 /*
291 =for apidoc grok_oct
292 
293 converts a string representing an octal number to numeric form.
294 
295 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
296 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.  The
297 scan stops at the end of the string, or at just before the first invalid
298 character.  Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
299 encountering an invalid character (except NUL) will also trigger a warning.  On
300 return C<*len_p> is set to the length of the scanned string, and C<*flags>
301 gives output flags.
302 
303 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
304 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_oct>
305 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
306 and writes an approximation of the correct value into C<*result> (which is an
307 NV; or the approximation is discarded if C<result> is NULL).
308 
309 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
310 digits may be separated from each other by a single underscore; also a single
311 leading underscore is accepted.
312 
313 The C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for
314 this function.
315 
316 =cut
317 
318 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
319 which suppresses any message for non-portable numbers, but which are valid
320 on this platform.
321  */
322 
323 UV
Perl_grok_oct(pTHX_ const char * start,STRLEN * len_p,I32 * flags,NV * result)324 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
325 {
326     PERL_ARGS_ASSERT_GROK_OCT;
327 
328     return grok_oct(start, len_p, flags, result);
329 }
330 
331 STATIC void
S_output_non_portable(pTHX_ const U8 base)332 S_output_non_portable(pTHX_ const U8 base)
333 {
334     /* Display the proper message for a number in the given input base not
335      * fitting in 32 bits */
336     const char * which = (base == 2)
337                       ? "Binary number > 0b11111111111111111111111111111111"
338                       : (base == 8)
339                         ? "Octal number > 037777777777"
340                         : "Hexadecimal number > 0xffffffff";
341 
342     PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
343 
344     /* Also there are listings for the other two.  That's because, since they
345      * are the first word, it would be hard for a user to find them there
346      * starting with a %s */
347     /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
348     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
349 }
350 
351 UV
Perl_grok_bin_oct_hex(pTHX_ const char * start,STRLEN * len_p,I32 * flags,NV * result,const unsigned shift,const U8 class_bit,const char prefix)352 Perl_grok_bin_oct_hex(pTHX_ const char *start,
353                         STRLEN *len_p,
354                         I32 *flags,
355                         NV *result,
356                         const unsigned shift, /* 1 for binary; 3 for octal;
357                                                  4 for hex */
358                         const U8 class_bit,
359                         const char prefix
360                      )
361 
362 {
363     const char *s0 = start;
364     const char *s;
365     STRLEN len = *len_p;
366     STRLEN bytes_so_far;    /* How many real digits have been processed */
367     UV value = 0;
368     NV value_nv = 0;
369     const PERL_UINT_FAST8_T base = 1 << shift;  /* 2, 8, or 16 */
370     const UV max_div= UV_MAX / base;    /* Value above which, the next digit
371                                            processed would overflow */
372     const I32 input_flags = *flags;
373     const bool allow_underscores =
374                                 cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
375     bool overflowed = FALSE;
376 
377     /* In overflows, this keeps track of how much to multiply the overflowed NV
378      * by as we continue to parse the remaining digits */
379     NV factor = 0;
380 
381     /* This function unifies the core of grok_bin, grok_oct, and grok_hex.  It
382      * is optimized for hex conversion.  For example, it uses XDIGIT_VALUE to
383      * find the numeric value of a digit.  That requires more instructions than
384      * OCTAL_VALUE would, but gives the same result for the narrowed range of
385      * octal digits; same for binary.  If it were ever critical to squeeze more
386      * performance from this, the function could become grok_hex, and a regen
387      * perl script could scan it and write out two edited copies for the other
388      * two functions.  That would improve the performance of all three
389      * somewhat.  Besides eliminating XDIGIT_VALUE for the other two, extra
390      * parameters are now passed to this to avoid conditionals.  Those could
391      * become declared consts, like:
392      *      const U8 base = 16;
393      *      const U8 base = 8;
394      *      ...
395      */
396 
397     PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
398 
399     ASSUME(inRANGE(shift, 1, 4) && shift != 2);
400 
401     /* Clear output flags; unlikely to find a problem that sets them */
402     *flags = 0;
403 
404     if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
405 
406         /* strip off leading b or 0b; x or 0x.
407            for compatibility silently suffer "b" and "0b" as valid binary; "x"
408            and "0x" as valid hex numbers. */
409         if (len >= 1) {
410             if (isALPHA_FOLD_EQ(s0[0], prefix)) {
411                 s0++;
412                 len--;
413             }
414             else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
415                 s0+=2;
416                 len-=2;
417             }
418         }
419     }
420 
421     s = s0; /* s0 potentially advanced from 'start' */
422 
423     /* Unroll the loop so that the first 8 digits are branchless except for the
424      * switch.  A ninth hex one overflows a 32 bit word. */
425     switch (len) {
426       case 0:
427           return 0;
428       default:
429           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
430           value = (value << shift) | XDIGIT_VALUE(*s);
431           s++;
432           /* FALLTHROUGH */
433       case 7:
434           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
435           value = (value << shift) | XDIGIT_VALUE(*s);
436           s++;
437           /* FALLTHROUGH */
438       case 6:
439           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
440           value = (value << shift) | XDIGIT_VALUE(*s);
441           s++;
442           /* FALLTHROUGH */
443       case 5:
444           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
445           value = (value << shift) | XDIGIT_VALUE(*s);
446           s++;
447           /* FALLTHROUGH */
448       case 4:
449           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
450           value = (value << shift) | XDIGIT_VALUE(*s);
451           s++;
452           /* FALLTHROUGH */
453       case 3:
454           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
455           value = (value << shift) | XDIGIT_VALUE(*s);
456           s++;
457           /* FALLTHROUGH */
458       case 2:
459           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
460           value = (value << shift) | XDIGIT_VALUE(*s);
461           s++;
462           /* FALLTHROUGH */
463       case 1:
464           if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
465           value = (value << shift) | XDIGIT_VALUE(*s);
466 
467           if (LIKELY(len <= 8)) {
468               return value;
469           }
470 
471           s++;
472           break;
473     }
474 
475     bytes_so_far = s - s0;
476     factor = shift << bytes_so_far;
477     len -= bytes_so_far;
478 
479     for (; len--; s++) {
480         if (_generic_isCC(*s, class_bit)) {
481             /* Write it in this wonky order with a goto to attempt to get the
482                compiler to make the common case integer-only loop pretty tight.
483                With gcc seems to be much straighter code than old scan_hex.
484                (khw suspects that adding a LIKELY() just above would do the
485                same thing) */
486           redo:
487             if (LIKELY(value <= max_div)) {
488                 value = (value << shift) | XDIGIT_VALUE(*s);
489                     /* Note XDIGIT_VALUE() is branchless, works on binary
490                      * and octal as well, so can be used here, without
491                      * slowing those down */
492                 factor *= 1 << shift;
493                 continue;
494             }
495 
496             /* Bah. We are about to overflow.  Instead, add the unoverflowed
497              * value to an NV that contains an approximation to the correct
498              * value.  Each time through the loop we have increased 'factor' so
499              * that it gives how much the current approximation needs to
500              * effectively be shifted to make room for this new value */
501             value_nv *= factor;
502             value_nv += (NV) value;
503 
504             /* Then we keep accumulating digits, until all are parsed.  We
505              * start over using the current input value.  This will be added to
506              * 'value_nv' eventually, either when all digits are gone, or we
507              * have overflowed this fresh start. */
508             value = XDIGIT_VALUE(*s);
509             factor = 1 << shift;
510 
511             if (! overflowed) {
512                 overflowed = TRUE;
513                 if (   ! (input_flags & PERL_SCAN_SILENT_OVERFLOW)
514                     &&    ckWARN_d(WARN_OVERFLOW))
515                 {
516                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
517                                        "Integer overflow in %s number",
518                                        (base == 16) ? "hexadecimal"
519                                                     : (base == 2)
520                                                       ? "binary"
521                                                       : "octal");
522                 }
523             }
524             continue;
525         }
526 
527         if (   *s == '_'
528             && len
529             && allow_underscores
530             && _generic_isCC(s[1], class_bit)
531 
532                 /* Don't allow a leading underscore if the only-medial bit is
533                  * set */
534             && (   LIKELY(s > s0)
535                 || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
536                                         != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
537         {
538             --len;
539             ++s;
540             goto redo;
541         }
542 
543         if (*s) {
544             if (   ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
545                 &&    ckWARN(WARN_DIGIT))
546             {
547                 if (base != 8) {
548                     Perl_warner(aTHX_ packWARN(WARN_DIGIT),
549                                            "Illegal %s digit '%c' ignored",
550                                            ((base == 2)
551                                             ? "binary"
552                                               : "hexadecimal"),
553                                             *s);
554                 }
555                 else if (isDIGIT(*s)) { /* octal base */
556 
557                     /* Allow \octal to work the DWIM way (that is, stop
558                      * scanning as soon as non-octal characters are seen,
559                      * complain only if someone seems to want to use the digits
560                      * eight and nine.  Since we know it is not octal, then if
561                      * isDIGIT, must be an 8 or 9). */
562                     Perl_warner(aTHX_ packWARN(WARN_DIGIT),
563                                        "Illegal octal digit '%c' ignored", *s);
564                 }
565             }
566 
567             if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) {
568                 *flags |= PERL_SCAN_NOTIFY_ILLDIGIT;
569             }
570         }
571 
572         break;
573     }
574 
575     *len_p = s - start;
576 
577     if (LIKELY(! overflowed)) {
578 #if UVSIZE > 4
579         if (      UNLIKELY(value > 0xffffffff)
580             && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
581         {
582             output_non_portable(base);
583             *flags |= PERL_SCAN_SILENT_NON_PORTABLE;
584         }
585 #endif
586         return value;
587     }
588 
589     /* Overflowed: Calculate the final overflow approximation */
590     value_nv *= factor;
591     value_nv += (NV) value;
592 
593     output_non_portable(base);
594 
595     *flags |= PERL_SCAN_GREATER_THAN_UV_MAX
596            |  PERL_SCAN_SILENT_NON_PORTABLE;
597     if (result)
598         *result = value_nv;
599     return UV_MAX;
600 }
601 
602 /*
603 =for apidoc scan_bin
604 
605 For backwards compatibility.  Use C<grok_bin> instead.
606 
607 =for apidoc scan_hex
608 
609 For backwards compatibility.  Use C<grok_hex> instead.
610 
611 =for apidoc scan_oct
612 
613 For backwards compatibility.  Use C<grok_oct> instead.
614 
615 =cut
616  */
617 
618 NV
Perl_scan_bin(pTHX_ const char * start,STRLEN len,STRLEN * retlen)619 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
620 {
621     NV rnv;
622     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
623     const UV ruv = grok_bin (start, &len, &flags, &rnv);
624 
625     PERL_ARGS_ASSERT_SCAN_BIN;
626 
627     *retlen = len;
628     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
629 }
630 
631 NV
Perl_scan_oct(pTHX_ const char * start,STRLEN len,STRLEN * retlen)632 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
633 {
634     NV rnv;
635     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
636     const UV ruv = grok_oct (start, &len, &flags, &rnv);
637 
638     PERL_ARGS_ASSERT_SCAN_OCT;
639 
640     *retlen = len;
641     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
642 }
643 
644 NV
Perl_scan_hex(pTHX_ const char * start,STRLEN len,STRLEN * retlen)645 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
646 {
647     NV rnv;
648     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
649     const UV ruv = grok_hex (start, &len, &flags, &rnv);
650 
651     PERL_ARGS_ASSERT_SCAN_HEX;
652 
653     *retlen = len;
654     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
655 }
656 
657 /*
658 =for apidoc grok_numeric_radix
659 
660 Scan and skip for a numeric decimal separator (radix).
661 
662 =cut
663  */
664 bool
Perl_grok_numeric_radix(pTHX_ const char ** sp,const char * send)665 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
666 {
667     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
668 
669 #ifdef USE_LOCALE_NUMERIC
670 
671     if (IN_LC(LC_NUMERIC)) {
672         STRLEN len;
673         char * radix;
674         bool matches_radix = FALSE;
675         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
676 
677         STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
678 
679         radix = SvPV(PL_numeric_radix_sv, len);
680         radix = savepvn(radix, len);
681 
682         RESTORE_LC_NUMERIC();
683 
684         if (*sp + len <= send) {
685             matches_radix = memEQ(*sp, radix, len);
686         }
687 
688         Safefree(radix);
689 
690         if (matches_radix) {
691             *sp += len;
692             return TRUE;
693         }
694     }
695 
696 #endif
697 
698     /* always try "." if numeric radix didn't match because
699      * we may have data from different locales mixed */
700     if (*sp < send && **sp == '.') {
701         ++*sp;
702         return TRUE;
703     }
704 
705     return FALSE;
706 }
707 
708 /*
709 =for apidoc grok_infnan
710 
711 Helper for C<grok_number()>, accepts various ways of spelling "infinity"
712 or "not a number", and returns one of the following flag combinations:
713 
714   IS_NUMBER_INFINITY
715   IS_NUMBER_NAN
716   IS_NUMBER_INFINITY | IS_NUMBER_NEG
717   IS_NUMBER_NAN | IS_NUMBER_NEG
718   0
719 
720 possibly |-ed with C<IS_NUMBER_TRAILING>.
721 
722 If an infinity or a not-a-number is recognized, C<*sp> will point to
723 one byte past the end of the recognized string.  If the recognition fails,
724 zero is returned, and C<*sp> will not move.
725 
726 =for apidoc Amnh|bool|IS_NUMBER_GREATER_THAN_UV_MAX
727 =for apidoc Amnh|bool|IS_NUMBER_INFINITY
728 =for apidoc Amnh|bool|IS_NUMBER_IN_UV
729 =for apidoc Amnh|bool|IS_NUMBER_NAN
730 =for apidoc Amnh|bool|IS_NUMBER_NEG
731 =for apidoc Amnh|bool|IS_NUMBER_NOT_INT
732 
733 =cut
734 */
735 
736 int
Perl_grok_infnan(pTHX_ const char ** sp,const char * send)737 Perl_grok_infnan(pTHX_ const char** sp, const char* send)
738 {
739     const char* s = *sp;
740     int flags = 0;
741 #if defined(NV_INF) || defined(NV_NAN)
742     bool odh = FALSE; /* one-dot-hash: 1.#INF */
743 
744     PERL_ARGS_ASSERT_GROK_INFNAN;
745 
746     if (*s == '+') {
747         s++; if (s == send) return 0;
748     }
749     else if (*s == '-') {
750         flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
751         s++; if (s == send) return 0;
752     }
753 
754     if (*s == '1') {
755         /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
756          * Let's keep the dot optional. */
757         s++; if (s == send) return 0;
758         if (*s == '.') {
759             s++; if (s == send) return 0;
760         }
761         if (*s == '#') {
762             s++; if (s == send) return 0;
763         } else
764             return 0;
765         odh = TRUE;
766     }
767 
768     if (isALPHA_FOLD_EQ(*s, 'I')) {
769         /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
770 
771         s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
772         s++; if (s == send) return 0;
773         if (isALPHA_FOLD_EQ(*s, 'F')) {
774             s++;
775             if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
776                 int fail =
777                     flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
778                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
779                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
780                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
781                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
782                 s++;
783             } else if (odh) {
784                 while (s < send && *s == '0') { /* 1.#INF00 */
785                     s++;
786                 }
787             }
788             while (s < send && isSPACE(*s))
789                 s++;
790             if (s < send && *s) {
791                 flags |= IS_NUMBER_TRAILING;
792             }
793             flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
794         }
795         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
796             s++;
797             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
798             while (s < send && *s == '0') { /* 1.#IND00 */
799                 s++;
800             }
801             if (s < send && *s) {
802                 flags |= IS_NUMBER_TRAILING;
803             }
804         } else
805             return 0;
806     }
807     else {
808         /* Maybe NAN of some sort */
809 
810         if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
811             /* snan, qNaN */
812             /* XXX do something with the snan/qnan difference */
813             s++; if (s == send) return 0;
814         }
815 
816         if (isALPHA_FOLD_EQ(*s, 'N')) {
817             s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
818             s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
819             s++;
820 
821             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
822             if (s == send) {
823                 return flags;
824             }
825 
826             /* NaN can be followed by various stuff (NaNQ, NaNS), but
827              * there are also multiple different NaN values, and some
828              * implementations output the "payload" values,
829              * e.g. NaN123, NAN(abc), while some legacy implementations
830              * have weird stuff like NaN%. */
831             if (isALPHA_FOLD_EQ(*s, 'q') ||
832                 isALPHA_FOLD_EQ(*s, 's')) {
833                 /* "nanq" or "nans" are ok, though generating
834                  * these portably is tricky. */
835                 s++;
836                 if (s == send) {
837                     return flags;
838                 }
839             }
840             if (*s == '(') {
841                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
842                 const char *t;
843                 s++;
844                 if (s == send) {
845                     return flags | IS_NUMBER_TRAILING;
846                 }
847                 t = s + 1;
848                 while (t < send && *t && *t != ')') {
849                     t++;
850                 }
851                 if (t == send) {
852                     return flags | IS_NUMBER_TRAILING;
853                 }
854                 if (*t == ')') {
855                     int nantype;
856                     UV nanval;
857                     if (s[0] == '0' && s + 2 < t &&
858                         isALPHA_FOLD_EQ(s[1], 'x') &&
859                         isXDIGIT(s[2])) {
860                         STRLEN len = t - s;
861                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
862                         nanval = grok_hex(s, &len, &flags, NULL);
863                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
864                             nantype = 0;
865                         } else {
866                             nantype = IS_NUMBER_IN_UV;
867                         }
868                         s += len;
869                     } else if (s[0] == '0' && s + 2 < t &&
870                                isALPHA_FOLD_EQ(s[1], 'b') &&
871                                (s[2] == '0' || s[2] == '1')) {
872                         STRLEN len = t - s;
873                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
874                         nanval = grok_bin(s, &len, &flags, NULL);
875                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
876                             nantype = 0;
877                         } else {
878                             nantype = IS_NUMBER_IN_UV;
879                         }
880                         s += len;
881                     } else {
882                         const char *u;
883                         nantype =
884                             grok_number_flags(s, t - s, &nanval,
885                                               PERL_SCAN_TRAILING |
886                                               PERL_SCAN_ALLOW_UNDERSCORES);
887                         /* Unfortunately grok_number_flags() doesn't
888                          * tell how far we got and the ')' will always
889                          * be "trailing", so we need to double-check
890                          * whether we had something dubious. */
891                         for (u = s; u < t; u++) {
892                             if (!isDIGIT(*u)) {
893                                 flags |= IS_NUMBER_TRAILING;
894                                 break;
895                             }
896                         }
897                         s = u;
898                     }
899 
900                     /* XXX Doesn't do octal: nan("0123").
901                      * Probably not a big loss. */
902 
903                     if ((nantype & IS_NUMBER_NOT_INT) ||
904                         !(nantype && IS_NUMBER_IN_UV)) {
905                         /* XXX the nanval is currently unused, that is,
906                          * not inserted as the NaN payload of the NV.
907                          * But the above code already parses the C99
908                          * nan(...)  format.  See below, and see also
909                          * the nan() in POSIX.xs.
910                          *
911                          * Certain configuration combinations where
912                          * NVSIZE is greater than UVSIZE mean that
913                          * a single UV cannot contain all the possible
914                          * NaN payload bits.  There would need to be
915                          * some more generic syntax than "nan($uv)".
916                          *
917                          * Issues to keep in mind:
918                          *
919                          * (1) In most common cases there would
920                          * not be an integral number of bytes that
921                          * could be set, only a certain number of bits.
922                          * For example for the common case of
923                          * NVSIZE == UVSIZE == 8 there is room for 52
924                          * bits in the payload, but the most significant
925                          * bit is commonly reserved for the
926                          * signaling/quiet bit, leaving 51 bits.
927                          * Furthermore, the C99 nan() is supposed
928                          * to generate quiet NaNs, so it is doubtful
929                          * whether it should be able to generate
930                          * signaling NaNs.  For the x86 80-bit doubles
931                          * (if building a long double Perl) there would
932                          * be 62 bits (s/q bit being the 63rd).
933                          *
934                          * (2) Endianness of the payload bits. If the
935                          * payload is specified as an UV, the low-order
936                          * bits of the UV are naturally little-endianed
937                          * (rightmost) bits of the payload.  The endianness
938                          * of UVs and NVs can be different. */
939                         return 0;
940                     }
941                     if (s < t) {
942                         flags |= IS_NUMBER_TRAILING;
943                     }
944                 } else {
945                     /* Looked like nan(...), but no close paren. */
946                     flags |= IS_NUMBER_TRAILING;
947                 }
948             } else {
949                 while (s < send && isSPACE(*s))
950                     s++;
951                 if (s < send && *s) {
952                     /* Note that we here implicitly accept (parse as
953                      * "nan", but with warnings) also any other weird
954                      * trailing stuff for "nan".  In the above we just
955                      * check that if we got the C99-style "nan(...)",
956                      * the "..."  looks sane.
957                      * If in future we accept more ways of specifying
958                      * the nan payload, the accepting would happen around
959                      * here. */
960                     flags |= IS_NUMBER_TRAILING;
961                 }
962             }
963             s = send;
964         }
965         else
966             return 0;
967     }
968 
969     while (s < send && isSPACE(*s))
970         s++;
971 
972 #else
973     PERL_UNUSED_ARG(send);
974 #endif /* #if defined(NV_INF) || defined(NV_NAN) */
975     *sp = s;
976     return flags;
977 }
978 
979 /*
980 =for apidoc grok_number_flags
981 
982 Recognise (or not) a number.  The type of the number is returned
983 (0 if unrecognised), otherwise it is a bit-ORed combination of
984 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
985 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
986 
987 If the value of the number can fit in a UV, it is returned in C<*valuep>.
988 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
989 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
990 to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
991 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
992 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
993 
994 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
995 seen (in which case C<*valuep> gives the true value truncated to an integer), and
996 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
997 absolute value).  C<IS_NUMBER_IN_UV> is not set if C<e> notation was used or the
998 number is larger than a UV.
999 
1000 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
1001 non-numeric text on an otherwise successful I<grok>, setting
1002 C<IS_NUMBER_TRAILING> on the result.
1003 
1004 =for apidoc Amnh||PERL_SCAN_TRAILING
1005 
1006 =for apidoc grok_number
1007 
1008 Identical to C<grok_number_flags()> with C<flags> set to zero.
1009 
1010 =cut
1011  */
1012 int
Perl_grok_number(pTHX_ const char * pv,STRLEN len,UV * valuep)1013 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
1014 {
1015     PERL_ARGS_ASSERT_GROK_NUMBER;
1016 
1017     return grok_number_flags(pv, len, valuep, 0);
1018 }
1019 
1020 static const UV uv_max_div_10 = UV_MAX / 10;
1021 static const U8 uv_max_mod_10 = UV_MAX % 10;
1022 
1023 int
Perl_grok_number_flags(pTHX_ const char * pv,STRLEN len,UV * valuep,U32 flags)1024 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
1025 {
1026   const char *s = pv;
1027   const char * const send = pv + len;
1028   const char *d;
1029   int numtype = 0;
1030 
1031   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
1032 
1033   if (UNLIKELY(isSPACE(*s))) {
1034       s++;
1035       while (s < send) {
1036         if (LIKELY(! isSPACE(*s))) goto non_space;
1037         s++;
1038       }
1039       return 0;
1040     non_space: ;
1041   }
1042 
1043   /* See if signed.  This assumes it is more likely to be unsigned, so
1044    * penalizes signed by an extra conditional; rewarding unsigned by one fewer
1045    * (because we detect '+' and '-' with a single test and then add a
1046    * conditional to determine which) */
1047   if (UNLIKELY((*s & ~('+' ^ '-')) == ('+' & '-') )) {
1048 
1049     /* Here, on ASCII platforms, *s is one of: 0x29 = ')', 2B = '+', 2D = '-',
1050      * 2F = '/'.  That is, it is either a sign, or a character that doesn't
1051      * belong in a number at all (unless it's a radix character in a weird
1052      * locale).  Given this, it's far more likely to be a minus than the
1053      * others.  (On EBCDIC it is one of 42, 44, 46, 48, 4A, 4C, 4E,  (not 40
1054      * because can't be a space)    60, 62, 64, 66, 68, 6A, 6C, 6E.  Again,
1055      * only potentially a weird radix character, or 4E='+', or 60='-') */
1056     if (LIKELY(*s == '-')) {
1057         s++;
1058         numtype = IS_NUMBER_NEG;
1059     }
1060     else if (LIKELY(*s == '+'))
1061         s++;
1062     else  /* Can't just return failure here, as it could be a weird radix
1063              character */
1064         goto done_sign;
1065 
1066     if (UNLIKELY(s == send))
1067         return 0;
1068   done_sign: ;
1069     }
1070 
1071   /* The first digit (after optional sign): note that might
1072    * also point to "infinity" or "nan", or "1.#INF". */
1073   d = s;
1074 
1075   /* next must be digit or the radix separator or beginning of infinity/nan */
1076   if (LIKELY(isDIGIT(*s))) {
1077     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
1078        overflow.  */
1079     UV value = *s - '0';    /* Process this first (perhaps only) digit */
1080     int digit;
1081 
1082     s++;
1083 
1084     switch(send - s) {
1085       default:      /* 8 or more remaining characters */
1086         digit = *s - '0';
1087         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1088         value = value * 10 + digit;
1089         s++;
1090         /* FALLTHROUGH */
1091       case 7:
1092         digit = *s - '0';
1093         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1094         value = value * 10 + digit;
1095         s++;
1096         /* FALLTHROUGH */
1097       case 6:
1098         digit = *s - '0';
1099         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1100         value = value * 10 + digit;
1101         s++;
1102         /* FALLTHROUGH */
1103       case 5:
1104         digit = *s - '0';
1105         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1106         value = value * 10 + digit;
1107         s++;
1108         /* FALLTHROUGH */
1109       case 4:
1110         digit = *s - '0';
1111         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1112         value = value * 10 + digit;
1113         s++;
1114         /* FALLTHROUGH */
1115       case 3:
1116         digit = *s - '0';
1117         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1118         value = value * 10 + digit;
1119         s++;
1120         /* FALLTHROUGH */
1121       case 2:
1122         digit = *s - '0';
1123         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1124         value = value * 10 + digit;
1125         s++;
1126         /* FALLTHROUGH */
1127       case 1:
1128         digit = *s - '0';
1129         if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1130         value = value * 10 + digit;
1131         s++;
1132         /* FALLTHROUGH */
1133       case 0:       /* This case means the string consists of just the one
1134                        digit we already have processed */
1135 
1136         /* If we got here by falling through other than the default: case, we
1137          * have processed the whole string, and know it consists entirely of
1138          * digits, and can't have overflowed. */
1139         if (s >= send) {
1140             if (valuep)
1141               *valuep = value;
1142             return numtype|IS_NUMBER_IN_UV;
1143         }
1144 
1145         /* Here, there are extra characters beyond the first 9 digits.  Use a
1146          * loop to accumulate any remaining digits, until we get a non-digit or
1147          * would overflow.  Note that leading zeros could cause us to get here
1148          * without being close to overflowing.
1149          *
1150          * (The conditional 's >= send' above could be eliminated by making the
1151          * default: in the switch to instead be 'case 8:', and process longer
1152          * strings separately by using the loop below.  This would penalize
1153          * these inputs by the extra instructions needed for looping.  That
1154          * could be eliminated by copying the unwound code from above to handle
1155          * the firt 9 digits of these.  khw didn't think this saving of a
1156          * single conditional was worth it.) */
1157         do {
1158             digit = *s - '0';
1159             if (! inRANGE(digit, 0, 9)) goto mantissa_done;
1160             if (       value < uv_max_div_10
1161                 || (   value == uv_max_div_10
1162                     && digit <= uv_max_mod_10))
1163             {
1164                 value = value * 10 + digit;
1165                 s++;
1166             }
1167             else { /* value would overflow.  skip the remaining digits, don't
1168                       worry about setting *valuep.  */
1169                 do {
1170                     s++;
1171                 } while (s < send && isDIGIT(*s));
1172                 numtype |=
1173                     IS_NUMBER_GREATER_THAN_UV_MAX;
1174                 goto skip_value;
1175             }
1176         } while (s < send);
1177     }   /* End switch on input length */
1178 
1179   mantissa_done:
1180     numtype |= IS_NUMBER_IN_UV;
1181     if (valuep)
1182       *valuep = value;
1183 
1184   skip_value:
1185     if (GROK_NUMERIC_RADIX(&s, send)) {
1186       numtype |= IS_NUMBER_NOT_INT;
1187       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
1188         s++;
1189     }
1190   } /* End of *s is a digit */
1191   else if (GROK_NUMERIC_RADIX(&s, send)) {
1192     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1193     /* no digits before the radix means we need digits after it */
1194     if (s < send && isDIGIT(*s)) {
1195       do {
1196         s++;
1197       } while (s < send && isDIGIT(*s));
1198       if (valuep) {
1199         /* integer approximation is valid - it's 0.  */
1200         *valuep = 0;
1201       }
1202     }
1203     else
1204         return 0;
1205   }
1206 
1207   if (LIKELY(s > d) && s < send) {
1208     /* we can have an optional exponent part */
1209     if (UNLIKELY(isALPHA_FOLD_EQ(*s, 'e'))) {
1210       s++;
1211       if (s < send && (*s == '-' || *s == '+'))
1212         s++;
1213       if (s < send && isDIGIT(*s)) {
1214         do {
1215           s++;
1216         } while (s < send && isDIGIT(*s));
1217       }
1218       else if (flags & PERL_SCAN_TRAILING)
1219         return numtype | IS_NUMBER_TRAILING;
1220       else
1221         return 0;
1222 
1223       /* The only flag we keep is sign.  Blow away any "it's UV"  */
1224       numtype &= IS_NUMBER_NEG;
1225       numtype |= IS_NUMBER_NOT_INT;
1226     }
1227   }
1228 
1229   while (s < send) {
1230     if (LIKELY(! isSPACE(*s))) goto end_space;
1231     s++;
1232   }
1233   return numtype;
1234 
1235  end_space:
1236 
1237   if (UNLIKELY(memEQs(pv, len, "0 but true"))) {
1238     if (valuep)
1239       *valuep = 0;
1240     return IS_NUMBER_IN_UV;
1241   }
1242 
1243   /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
1244   if ((s + 2 < send) && UNLIKELY(memCHRs("inqs#", toFOLD(*s)))) {
1245       /* Really detect inf/nan. Start at d, not s, since the above
1246        * code might have already consumed the "1." or "1". */
1247       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
1248       if ((infnan & IS_NUMBER_INFINITY)) {
1249           return (numtype | infnan); /* Keep sign for infinity. */
1250       }
1251       else if ((infnan & IS_NUMBER_NAN)) {
1252           return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1253       }
1254   }
1255   else if (flags & PERL_SCAN_TRAILING) {
1256     return numtype | IS_NUMBER_TRAILING;
1257   }
1258 
1259   return 0;
1260 }
1261 
1262 /*
1263 =for apidoc grok_atoUV
1264 
1265 parse a string, looking for a decimal unsigned integer.
1266 
1267 On entry, C<pv> points to the beginning of the string;
1268 C<valptr> points to a UV that will receive the converted value, if found;
1269 C<endptr> is either NULL or points to a variable that points to one byte
1270 beyond the point in C<pv> that this routine should examine.
1271 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
1272 
1273 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1274 no leading zeros).  Otherwise it returns TRUE, and sets C<*valptr> to that
1275 value.
1276 
1277 If you constrain the portion of C<pv> that is looked at by this function (by
1278 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1279 valid value, it will return TRUE, setting C<*endptr> to the byte following the
1280 final digit of the value.  But if there is no constraint at what's looked at,
1281 all of C<pv> must be valid in order for TRUE to be returned.  C<*endptr> is
1282 unchanged from its value on input if FALSE is returned;
1283 
1284 The only characters this accepts are the decimal digits '0'..'9'.
1285 
1286 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1287 leading whitespace, nor negative inputs.  If such features are required, the
1288 calling code needs to explicitly implement those.
1289 
1290 Note that this function returns FALSE for inputs that would overflow a UV,
1291 or have leading zeros.  Thus a single C<0> is accepted, but not C<00> nor
1292 C<01>, C<002>, I<etc>.
1293 
1294 Background: C<atoi> has severe problems with illegal inputs, it cannot be
1295 used for incremental parsing, and therefore should be avoided
1296 C<atoi> and C<strtol> are also affected by locale settings, which can also be
1297 seen as a bug (global state controlled by user environment).
1298 
1299 =cut
1300 
1301 */
1302 
1303 bool
Perl_grok_atoUV(const char * pv,UV * valptr,const char ** endptr)1304 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
1305 {
1306     const char* s = pv;
1307     const char** eptr;
1308     const char* end2; /* Used in case endptr is NULL. */
1309     UV val = 0; /* The parsed value. */
1310 
1311     PERL_ARGS_ASSERT_GROK_ATOUV;
1312 
1313     if (endptr) {
1314         eptr = endptr;
1315     }
1316     else {
1317         end2 = s + strlen(s);
1318         eptr = &end2;
1319     }
1320 
1321     if (   *eptr <= s
1322         || ! isDIGIT(*s))
1323     {
1324         return FALSE;
1325     }
1326 
1327     /* Single-digit inputs are quite common. */
1328     val = *s++ - '0';
1329     if (s < *eptr && isDIGIT(*s)) {
1330         /* Fail on extra leading zeros. */
1331         if (val == 0)
1332             return FALSE;
1333         while (s < *eptr && isDIGIT(*s)) {
1334             /* This could be unrolled like in grok_number(), but
1335                 * the expected uses of this are not speed-needy, and
1336                 * unlikely to need full 64-bitness. */
1337             const U8 digit = *s++ - '0';
1338             if (val < uv_max_div_10 ||
1339                 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1340                 val = val * 10 + digit;
1341             } else {
1342                 return FALSE;
1343             }
1344         }
1345     }
1346 
1347     if (endptr == NULL) {
1348         if (*s) {
1349             return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1350         }
1351     }
1352     else {
1353         *endptr = s;
1354     }
1355 
1356     *valptr = val;
1357     return TRUE;
1358 }
1359 
1360 #ifndef Perl_strtod
1361 STATIC NV
S_mulexp10(NV value,I32 exponent)1362 S_mulexp10(NV value, I32 exponent)
1363 {
1364     NV result = 1.0;
1365     NV power = 10.0;
1366     bool negative = 0;
1367     I32 bit;
1368 
1369     if (exponent == 0)
1370         return value;
1371     if (value == 0)
1372         return (NV)0;
1373 
1374     /* On OpenVMS VAX we by default use the D_FLOAT double format,
1375      * and that format does not have *easy* capabilities [1] for
1376      * overflowing doubles 'silently' as IEEE fp does.  We also need
1377      * to support G_FLOAT on both VAX and Alpha, and though the exponent
1378      * range is much larger than D_FLOAT it still doesn't do silent
1379      * overflow.  Therefore we need to detect early whether we would
1380      * overflow (this is the behaviour of the native string-to-float
1381      * conversion routines, and therefore of native applications, too).
1382      *
1383      * [1] Trying to establish a condition handler to trap floating point
1384      *     exceptions is not a good idea. */
1385 
1386     /* In UNICOS and in certain Cray models (such as T90) there is no
1387      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1388      * There is something you can do if you are willing to use some
1389      * inline assembler: the instruction is called DFI-- but that will
1390      * disable *all* floating point interrupts, a little bit too large
1391      * a hammer.  Therefore we need to catch potential overflows before
1392      * it's too late. */
1393 
1394 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
1395     STMT_START {
1396         const NV exp_v = log10(value);
1397         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1398             return NV_MAX;
1399         if (exponent < 0) {
1400             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1401                 return 0.0;
1402             while (-exponent >= NV_MAX_10_EXP) {
1403                 /* combination does not overflow, but 10^(-exponent) does */
1404                 value /= 10;
1405                 ++exponent;
1406             }
1407         }
1408     } STMT_END;
1409 #endif
1410 
1411     if (exponent < 0) {
1412         negative = 1;
1413         exponent = -exponent;
1414 #ifdef NV_MAX_10_EXP
1415         /* for something like 1234 x 10^-309, the action of calculating
1416          * the intermediate value 10^309 then returning 1234 / (10^309)
1417          * will fail, since 10^309 becomes infinity. In this case try to
1418          * refactor it as 123 / (10^308) etc.
1419          */
1420         while (value && exponent > NV_MAX_10_EXP) {
1421             exponent--;
1422             value /= 10;
1423         }
1424         if (value == 0.0)
1425             return value;
1426 #endif
1427     }
1428 #if defined(__osf__)
1429     /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1430      * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1431      * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1432      * but that breaks another set of infnan.t tests. */
1433 #  define FP_OVERFLOWS_TO_ZERO
1434 #endif
1435     for (bit = 1; exponent; bit <<= 1) {
1436         if (exponent & bit) {
1437             exponent ^= bit;
1438             result *= power;
1439 #ifdef FP_OVERFLOWS_TO_ZERO
1440             if (result == 0)
1441 # ifdef NV_INF
1442                 return value < 0 ? -NV_INF : NV_INF;
1443 # else
1444                 return value < 0 ? -FLT_MAX : FLT_MAX;
1445 # endif
1446 #endif
1447             /* Floating point exceptions are supposed to be turned off,
1448              *  but if we're obviously done, don't risk another iteration.
1449              */
1450              if (exponent == 0) break;
1451         }
1452         power *= power;
1453     }
1454     return negative ? value / result : value * result;
1455 }
1456 #endif /* #ifndef Perl_strtod */
1457 
1458 #ifdef Perl_strtod
1459 #  define ATOF(s, x) my_atof2(s, &x)
1460 #else
1461 #  define ATOF(s, x) Perl_atof2(s, x)
1462 #endif
1463 
1464 NV
Perl_my_atof(pTHX_ const char * s)1465 Perl_my_atof(pTHX_ const char* s)
1466 {
1467 
1468 /*
1469 =for apidoc my_atof
1470 
1471 L<C<atof>(3)>, but properly works with Perl locale handling, accepting a dot
1472 radix character always, but also the current locale's radix character if and
1473 only if called from within the lexical scope of a Perl C<use locale> statement.
1474 
1475 N.B. C<s> must be NUL terminated.
1476 
1477 =cut
1478 */
1479 
1480     NV x = 0.0;
1481 
1482     PERL_ARGS_ASSERT_MY_ATOF;
1483 
1484 #if ! defined(USE_LOCALE_NUMERIC)
1485 
1486     ATOF(s, x);
1487 
1488 #else
1489 
1490     {
1491         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1492         STORE_LC_NUMERIC_SET_TO_NEEDED();
1493         if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1494             ATOF(s,x);
1495         }
1496         else {
1497 
1498             /* Look through the string for the first thing that looks like a
1499              * decimal point: either the value in the current locale or the
1500              * standard fallback of '.'. The one which appears earliest in the
1501              * input string is the one that we should have atof look for. Note
1502              * that we have to determine this beforehand because on some
1503              * systems, Perl_atof2 is just a wrapper around the system's atof.
1504              * */
1505             const char * const standard_pos = strchr(s, '.');
1506             const char * const local_pos
1507                                   = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1508             const bool use_standard_radix
1509                     = standard_pos && (!local_pos || standard_pos < local_pos);
1510 
1511             if (use_standard_radix) {
1512                 SET_NUMERIC_STANDARD();
1513                 LOCK_LC_NUMERIC_STANDARD();
1514             }
1515 
1516             ATOF(s,x);
1517 
1518             if (use_standard_radix) {
1519                 UNLOCK_LC_NUMERIC_STANDARD();
1520                 SET_NUMERIC_UNDERLYING();
1521             }
1522         }
1523         RESTORE_LC_NUMERIC();
1524     }
1525 
1526 #endif
1527 
1528     return x;
1529 }
1530 
1531 #if defined(NV_INF) || defined(NV_NAN)
1532 
1533 static char*
S_my_atof_infnan(pTHX_ const char * s,bool negative,const char * send,NV * value)1534 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
1535 {
1536     const char *p0 = negative ? s - 1 : s;
1537     const char *p = p0;
1538     const int infnan = grok_infnan(&p, send);
1539     if (infnan && p != p0) {
1540         /* If we can generate inf/nan directly, let's do so. */
1541 #ifdef NV_INF
1542         if ((infnan & IS_NUMBER_INFINITY)) {
1543             *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
1544             return (char*)p;
1545         }
1546 #endif
1547 #ifdef NV_NAN
1548         if ((infnan & IS_NUMBER_NAN)) {
1549             *value = NV_NAN;
1550             return (char*)p;
1551         }
1552 #endif
1553 #ifdef Perl_strtod
1554         /* If still here, we didn't have either NV_INF or NV_NAN,
1555          * and can try falling back to native strtod/strtold.
1556          *
1557          * The native interface might not recognize all the possible
1558          * inf/nan strings Perl recognizes.  What we can try
1559          * is to try faking the input.  We will try inf/-inf/nan
1560          * as the most promising/portable input. */
1561         {
1562             const char* fake = "silence compiler warning";
1563             char* endp;
1564             NV nv;
1565 #ifdef NV_INF
1566             if ((infnan & IS_NUMBER_INFINITY)) {
1567                 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1568             }
1569 #endif
1570 #ifdef NV_NAN
1571             if ((infnan & IS_NUMBER_NAN)) {
1572                 fake = "nan";
1573             }
1574 #endif
1575             assert(strNE(fake, "silence compiler warning"));
1576             nv = S_strtod(aTHX_ fake, &endp);
1577             if (fake != endp) {
1578 #ifdef NV_INF
1579                 if ((infnan & IS_NUMBER_INFINITY)) {
1580 #  ifdef Perl_isinf
1581                     if (Perl_isinf(nv))
1582                         *value = nv;
1583 #  else
1584                     /* last resort, may generate SIGFPE */
1585                     *value = Perl_exp((NV)1e9);
1586                     if ((infnan & IS_NUMBER_NEG))
1587                         *value = -*value;
1588 #  endif
1589                     return (char*)p; /* p, not endp */
1590                 }
1591 #endif
1592 #ifdef NV_NAN
1593                 if ((infnan & IS_NUMBER_NAN)) {
1594 #  ifdef Perl_isnan
1595                     if (Perl_isnan(nv))
1596                         *value = nv;
1597 #  else
1598                     /* last resort, may generate SIGFPE */
1599                     *value = Perl_log((NV)-1.0);
1600 #  endif
1601                     return (char*)p; /* p, not endp */
1602 #endif
1603                 }
1604             }
1605         }
1606 #endif /* #ifdef Perl_strtod */
1607     }
1608     return NULL;
1609 }
1610 
1611 #endif /* if defined(NV_INF) || defined(NV_NAN) */
1612 
1613 char*
Perl_my_atof2(pTHX_ const char * orig,NV * value)1614 Perl_my_atof2(pTHX_ const char* orig, NV* value)
1615 {
1616     PERL_ARGS_ASSERT_MY_ATOF2;
1617     return my_atof3(orig, value, 0);
1618 }
1619 
1620 char*
Perl_my_atof3(pTHX_ const char * orig,NV * value,const STRLEN len)1621 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
1622 {
1623     const char* s = orig;
1624     NV result[3] = {0.0, 0.0, 0.0};
1625 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1626     const char* send = s + ((len != 0)
1627                            ? len
1628                            : strlen(orig)); /* one past the last */
1629     bool negative = 0;
1630 #endif
1631 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
1632     UV accumulator[2] = {0,0};	/* before/after dp */
1633     bool seen_digit = 0;
1634     I32 exp_adjust[2] = {0,0};
1635     I32 exp_acc[2] = {-1, -1};
1636     /* the current exponent adjust for the accumulators */
1637     I32 exponent = 0;
1638     I32	seen_dp  = 0;
1639     I32 digit = 0;
1640     I32 old_digit = 0;
1641     I32 sig_digits = 0; /* noof significant digits seen so far */
1642 #endif
1643 
1644 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1645     PERL_ARGS_ASSERT_MY_ATOF3;
1646 
1647     /* leading whitespace */
1648     while (s < send && isSPACE(*s))
1649         ++s;
1650 
1651     /* sign */
1652     switch (*s) {
1653         case '-':
1654             negative = 1;
1655             /* FALLTHROUGH */
1656         case '+':
1657             ++s;
1658     }
1659 #endif
1660 
1661 #ifdef Perl_strtod
1662     {
1663         char* endp;
1664         char* copy = NULL;
1665 
1666         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1667             return endp;
1668 
1669         /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1670            0b-prefixed binary numbers, which is backward incompatible
1671         */
1672         if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
1673             (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1674             *value = 0;
1675             return (char *)s+1;
1676         }
1677 
1678         /* If the length is passed in, the input string isn't NUL-terminated,
1679          * and in it turns out the function below assumes it is; therefore we
1680          * create a copy and NUL-terminate that */
1681         if (len) {
1682             Newx(copy, len + 1, char);
1683             Copy(orig, copy, len, char);
1684             copy[len] = '\0';
1685             s = copy + (s - orig);
1686         }
1687 
1688         result[2] = S_strtod(aTHX_ s, &endp);
1689 
1690         /* If we created a copy, 'endp' is in terms of that.  Convert back to
1691          * the original */
1692         if (copy) {
1693             s = (s - copy) + (char *) orig;
1694             endp = (endp - copy) + (char *) orig;
1695             Safefree(copy);
1696         }
1697 
1698         if (s != endp) {
1699             *value = negative ? -result[2] : result[2];
1700             return endp;
1701         }
1702         return NULL;
1703     }
1704 #elif defined(USE_PERL_ATOF)
1705 
1706 /* There is no point in processing more significant digits
1707  * than the NV can hold. Note that NV_DIG is a lower-bound value,
1708  * while we need an upper-bound value. We add 2 to account for this;
1709  * since it will have been conservative on both the first and last digit.
1710  * For example a 32-bit mantissa with an exponent of 4 would have
1711  * exact values in the set
1712  *               4
1713  *               8
1714  *              ..
1715  *     17179869172
1716  *     17179869176
1717  *     17179869180
1718  *
1719  * where for the purposes of calculating NV_DIG we would have to discount
1720  * both the first and last digit, since neither can hold all values from
1721  * 0..9; but for calculating the value we must examine those two digits.
1722  */
1723 #ifdef MAX_SIG_DIG_PLUS
1724     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1725        possible digits in a NV, especially if NVs are not IEEE compliant
1726        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1727 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1728 #else
1729 # define MAX_SIG_DIGITS (NV_DIG+2)
1730 #endif
1731 
1732 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1733 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
1734 
1735 #if defined(NV_INF) || defined(NV_NAN)
1736     {
1737         char* endp;
1738         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1739             return endp;
1740     }
1741 #endif
1742 
1743     /* we accumulate digits into an integer; when this becomes too
1744      * large, we add the total to NV and start again */
1745 
1746     while (s < send) {
1747         if (isDIGIT(*s)) {
1748             seen_digit = 1;
1749             old_digit = digit;
1750             digit = *s++ - '0';
1751             if (seen_dp)
1752                 exp_adjust[1]++;
1753 
1754             /* don't start counting until we see the first significant
1755              * digit, eg the 5 in 0.00005... */
1756             if (!sig_digits && digit == 0)
1757                 continue;
1758 
1759             if (++sig_digits > MAX_SIG_DIGITS) {
1760                 /* limits of precision reached */
1761                 if (digit > 5) {
1762                     ++accumulator[seen_dp];
1763                 } else if (digit == 5) {
1764                     if (old_digit % 2) { /* round to even - Allen */
1765                         ++accumulator[seen_dp];
1766                     }
1767                 }
1768                 if (seen_dp) {
1769                     exp_adjust[1]--;
1770                 } else {
1771                     exp_adjust[0]++;
1772                 }
1773                 /* skip remaining digits */
1774                 while (s < send && isDIGIT(*s)) {
1775                     ++s;
1776                     if (! seen_dp) {
1777                         exp_adjust[0]++;
1778                     }
1779                 }
1780                 /* warn of loss of precision? */
1781             }
1782             else {
1783                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1784                     /* add accumulator to result and start again */
1785                     result[seen_dp] = S_mulexp10(result[seen_dp],
1786                                                  exp_acc[seen_dp])
1787                         + (NV)accumulator[seen_dp];
1788                     accumulator[seen_dp] = 0;
1789                     exp_acc[seen_dp] = 0;
1790                 }
1791                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1792                 ++exp_acc[seen_dp];
1793             }
1794         }
1795         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1796             seen_dp = 1;
1797             if (sig_digits > MAX_SIG_DIGITS) {
1798                 while (s < send && isDIGIT(*s)) {
1799                     ++s;
1800                 }
1801                 break;
1802             }
1803         }
1804         else {
1805             break;
1806         }
1807     }
1808 
1809     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1810     if (seen_dp) {
1811         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1812     }
1813 
1814     if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
1815         bool expnegative = 0;
1816 
1817         ++s;
1818         switch (*s) {
1819             case '-':
1820                 expnegative = 1;
1821                 /* FALLTHROUGH */
1822             case '+':
1823                 ++s;
1824         }
1825         while (s < send && isDIGIT(*s))
1826             exponent = exponent * 10 + (*s++ - '0');
1827         if (expnegative)
1828             exponent = -exponent;
1829     }
1830 
1831     /* now apply the exponent */
1832 
1833     if (seen_dp) {
1834         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1835                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1836     } else {
1837         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1838     }
1839 
1840     /* now apply the sign */
1841     if (negative)
1842         result[2] = -result[2];
1843     *value = result[2];
1844     return (char *)s;
1845 #else  /* USE_PERL_ATOF */
1846     /* If you see this error you both don't have strtod (or configured -Ud_strtod or
1847        or it's long double/quadmath equivalent) and disabled USE_PERL_ATOF, thus
1848        removing any way for perl to convert strings to floating point numbers.
1849     */
1850 # error No mechanism to convert strings to numbers available
1851 #endif
1852 }
1853 
1854 /*
1855 =for apidoc isinfnan
1856 
1857 C<Perl_isinfnan()> is a utility function that returns true if the NV
1858 argument is either an infinity or a C<NaN>, false otherwise.  To test
1859 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
1860 
1861 This is also the logical inverse of Perl_isfinite().
1862 
1863 =cut
1864 */
1865 bool
Perl_isinfnan(NV nv)1866 Perl_isinfnan(NV nv)
1867 {
1868   PERL_UNUSED_ARG(nv);
1869 #ifdef Perl_isinf
1870     if (Perl_isinf(nv))
1871         return TRUE;
1872 #endif
1873 #ifdef Perl_isnan
1874     if (Perl_isnan(nv))
1875         return TRUE;
1876 #endif
1877     return FALSE;
1878 }
1879 
1880 /*
1881 =for apidoc isinfnansv
1882 
1883 Checks whether the argument would be either an infinity or C<NaN> when used
1884 as a number, but is careful not to trigger non-numeric or uninitialized
1885 warnings.  it assumes the caller has done C<SvGETMAGIC(sv)> already.
1886 
1887 =cut
1888 */
1889 
1890 bool
Perl_isinfnansv(pTHX_ SV * sv)1891 Perl_isinfnansv(pTHX_ SV *sv)
1892 {
1893     PERL_ARGS_ASSERT_ISINFNANSV;
1894     if (!SvOK(sv))
1895         return FALSE;
1896     if (SvNOKp(sv))
1897         return Perl_isinfnan(SvNVX(sv));
1898     if (SvIOKp(sv))
1899         return FALSE;
1900     {
1901         STRLEN len;
1902         const char *s = SvPV_nomg_const(sv, len);
1903         return cBOOL(grok_infnan(&s, s+len));
1904     }
1905 }
1906 
1907 #ifndef HAS_MODFL
1908 /* C99 has truncl, pre-C99 Solaris had aintl.  We can use either with
1909  * copysignl to emulate modfl, which is in some platforms missing or
1910  * broken. */
1911 #  if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1912 long double
Perl_my_modfl(long double x,long double * ip)1913 Perl_my_modfl(long double x, long double *ip)
1914 {
1915     *ip = truncl(x);
1916     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1917 }
1918 #  elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1919 long double
Perl_my_modfl(long double x,long double * ip)1920 Perl_my_modfl(long double x, long double *ip)
1921 {
1922     *ip = aintl(x);
1923     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1924 }
1925 #  endif
1926 #endif
1927 
1928 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
1929 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1930 long double
Perl_my_frexpl(long double x,int * e)1931 Perl_my_frexpl(long double x, int *e) {
1932     *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1933     return (scalbnl(x, -*e));
1934 }
1935 #endif
1936 
1937 /*
1938 =for apidoc Perl_signbit
1939 
1940 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1941 it is not.
1942 
1943 If F<Configure> detects this system has a C<signbit()> that will work with
1944 our NVs, then we just use it via the C<#define> in F<perl.h>.  Otherwise,
1945 fall back on this implementation.  The main use of this function
1946 is catching C<-0.0>.
1947 
1948 C<Configure> notes:  This function is called C<'Perl_signbit'> instead of a
1949 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
1950 function or macro that doesn't happen to work with our particular choice
1951 of NVs.  We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
1952 the standard system headers to be happy.  Also, this is a no-context
1953 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1954 F<perl.h> as a simple macro call to the system's C<signbit()>.
1955 Users should just always call C<Perl_signbit()>.
1956 
1957 =cut
1958 */
1959 #if !defined(HAS_SIGNBIT)
1960 int
Perl_signbit(NV x)1961 Perl_signbit(NV x) {
1962 #  ifdef Perl_fp_class_nzero
1963     return Perl_fp_class_nzero(x);
1964     /* Try finding the high byte, and assume it's highest bit
1965      * is the sign.  This assumption is probably wrong somewhere. */
1966 #  elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1967     return (((unsigned char *)&x)[9] & 0x80);
1968 #  elif defined(NV_LITTLE_ENDIAN)
1969     /* Note that NVSIZE is sizeof(NV), which would make the below be
1970      * wrong if the end bytes are unused, which happens with the x86
1971      * 80-bit long doubles, which is why take care of that above. */
1972     return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1973 #  elif defined(NV_BIG_ENDIAN)
1974     return (((unsigned char *)&x)[0] & 0x80);
1975 #  else
1976     /* This last resort fallback is wrong for the negative zero. */
1977     return (x < 0.0) ? 1 : 0;
1978 #  endif
1979 }
1980 #endif
1981 
1982 /*
1983  * ex: set ts=8 sts=4 sw=4 et:
1984  */
1985