1 /* -*- Mode: C; c-file-style: "python" -*- */
2 
3 #include <Python.h>
4 #include <locale.h>
5 
6 /* Case-insensitive string match used for nan and inf detection; t should be
7    lower-case.  Returns 1 for a successful match, 0 otherwise. */
8 
9 static int
case_insensitive_match(const char * s,const char * t)10 case_insensitive_match(const char *s, const char *t)
11 {
12     while(*t && Py_TOLOWER(*s) == *t) {
13         s++;
14         t++;
15     }
16     return *t ? 0 : 1;
17 }
18 
19 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20    "infinity", with an optional leading sign of "+" or "-".  On success,
21    return the NaN or Infinity as a double and set *endptr to point just beyond
22    the successfully parsed portion of the string.  On failure, return -1.0 and
23    set *endptr to point to the start of the string. */
24 
25 #ifndef PY_NO_SHORT_FLOAT_REPR
26 
27 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)28 _Py_parse_inf_or_nan(const char *p, char **endptr)
29 {
30     double retval;
31     const char *s;
32     int negate = 0;
33 
34     s = p;
35     if (*s == '-') {
36         negate = 1;
37         s++;
38     }
39     else if (*s == '+') {
40         s++;
41     }
42     if (case_insensitive_match(s, "inf")) {
43         s += 3;
44         if (case_insensitive_match(s, "inity"))
45             s += 5;
46         retval = _Py_dg_infinity(negate);
47     }
48     else if (case_insensitive_match(s, "nan")) {
49         s += 3;
50         retval = _Py_dg_stdnan(negate);
51     }
52     else {
53         s = p;
54         retval = -1.0;
55     }
56     *endptr = (char *)s;
57     return retval;
58 }
59 
60 #else
61 
62 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)63 _Py_parse_inf_or_nan(const char *p, char **endptr)
64 {
65     double retval;
66     const char *s;
67     int negate = 0;
68 
69     s = p;
70     if (*s == '-') {
71         negate = 1;
72         s++;
73     }
74     else if (*s == '+') {
75         s++;
76     }
77     if (case_insensitive_match(s, "inf")) {
78         s += 3;
79         if (case_insensitive_match(s, "inity"))
80             s += 5;
81         retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82     }
83 #ifdef Py_NAN
84     else if (case_insensitive_match(s, "nan")) {
85         s += 3;
86         retval = negate ? -Py_NAN : Py_NAN;
87     }
88 #endif
89     else {
90         s = p;
91         retval = -1.0;
92     }
93     *endptr = (char *)s;
94     return retval;
95 }
96 
97 #endif
98 
99 /**
100  * _PyOS_ascii_strtod:
101  * @nptr:    the string to convert to a numeric value.
102  * @endptr:  if non-%NULL, it returns the character after
103  *           the last character used in the conversion.
104  *
105  * Converts a string to a #gdouble value.
106  * This function behaves like the standard strtod() function
107  * does in the C locale. It does this without actually
108  * changing the current locale, since that would not be
109  * thread-safe.
110  *
111  * This function is typically used when reading configuration
112  * files or other non-user input that should be locale independent.
113  * To handle input from the user you should normally use the
114  * locale-sensitive system strtod() function.
115  *
116  * If the correct value would cause overflow, plus or minus %HUGE_VAL
117  * is returned (according to the sign of the value), and %ERANGE is
118  * stored in %errno. If the correct value would cause underflow,
119  * zero is returned and %ERANGE is stored in %errno.
120  * If memory allocation fails, %ENOMEM is stored in %errno.
121  *
122  * This function resets %errno before calling strtod() so that
123  * you can reliably detect overflow and underflow.
124  *
125  * Return value: the #gdouble value.
126  **/
127 
128 #ifndef PY_NO_SHORT_FLOAT_REPR
129 
130 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)131 _PyOS_ascii_strtod(const char *nptr, char **endptr)
132 {
133     double result;
134     _Py_SET_53BIT_PRECISION_HEADER;
135 
136     assert(nptr != NULL);
137     /* Set errno to zero, so that we can distinguish zero results
138        and underflows */
139     errno = 0;
140 
141     _Py_SET_53BIT_PRECISION_START;
142     result = _Py_dg_strtod(nptr, endptr);
143     _Py_SET_53BIT_PRECISION_END;
144 
145     if (*endptr == nptr)
146         /* string might represent an inf or nan */
147         result = _Py_parse_inf_or_nan(nptr, endptr);
148 
149     return result;
150 
151 }
152 
153 #else
154 
155 /*
156    Use system strtod;  since strtod is locale aware, we may
157    have to first fix the decimal separator.
158 
159    Note that unlike _Py_dg_strtod, the system strtod may not always give
160    correctly rounded results.
161 */
162 
163 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)164 _PyOS_ascii_strtod(const char *nptr, char **endptr)
165 {
166     char *fail_pos;
167     double val;
168     struct lconv *locale_data;
169     const char *decimal_point;
170     size_t decimal_point_len;
171     const char *p, *decimal_point_pos;
172     const char *end = NULL; /* Silence gcc */
173     const char *digits_pos = NULL;
174     int negate = 0;
175 
176     assert(nptr != NULL);
177 
178     fail_pos = NULL;
179 
180     locale_data = localeconv();
181     decimal_point = locale_data->decimal_point;
182     decimal_point_len = strlen(decimal_point);
183 
184     assert(decimal_point_len != 0);
185 
186     decimal_point_pos = NULL;
187 
188     /* Parse infinities and nans */
189     val = _Py_parse_inf_or_nan(nptr, endptr);
190     if (*endptr != nptr)
191         return val;
192 
193     /* Set errno to zero, so that we can distinguish zero results
194        and underflows */
195     errno = 0;
196 
197     /* We process the optional sign manually, then pass the remainder to
198        the system strtod.  This ensures that the result of an underflow
199        has the correct sign. (bug #1725)  */
200     p = nptr;
201     /* Process leading sign, if present */
202     if (*p == '-') {
203         negate = 1;
204         p++;
205     }
206     else if (*p == '+') {
207         p++;
208     }
209 
210     /* Some platform strtods accept hex floats; Python shouldn't (at the
211        moment), so we check explicitly for strings starting with '0x'. */
212     if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213         goto invalid_string;
214 
215     /* Check that what's left begins with a digit or decimal point */
216     if (!Py_ISDIGIT(*p) && *p != '.')
217         goto invalid_string;
218 
219     digits_pos = p;
220     if (decimal_point[0] != '.' ||
221         decimal_point[1] != 0)
222     {
223         /* Look for a '.' in the input; if present, it'll need to be
224            swapped for the current locale's decimal point before we
225            call strtod.  On the other hand, if we find the current
226            locale's decimal point then the input is invalid. */
227         while (Py_ISDIGIT(*p))
228             p++;
229 
230         if (*p == '.')
231         {
232             decimal_point_pos = p++;
233 
234             /* locate end of number */
235             while (Py_ISDIGIT(*p))
236                 p++;
237 
238             if (*p == 'e' || *p == 'E')
239                 p++;
240             if (*p == '+' || *p == '-')
241                 p++;
242             while (Py_ISDIGIT(*p))
243                 p++;
244             end = p;
245         }
246         else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247             /* Python bug #1417699 */
248             goto invalid_string;
249         /* For the other cases, we need not convert the decimal
250            point */
251     }
252 
253     if (decimal_point_pos) {
254         char *copy, *c;
255         /* Create a copy of the input, with the '.' converted to the
256            locale-specific decimal point */
257         copy = (char *)PyMem_MALLOC(end - digits_pos +
258                                     1 + decimal_point_len);
259         if (copy == NULL) {
260             *endptr = (char *)nptr;
261             errno = ENOMEM;
262             return val;
263         }
264 
265         c = copy;
266         memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267         c += decimal_point_pos - digits_pos;
268         memcpy(c, decimal_point, decimal_point_len);
269         c += decimal_point_len;
270         memcpy(c, decimal_point_pos + 1,
271                end - (decimal_point_pos + 1));
272         c += end - (decimal_point_pos + 1);
273         *c = 0;
274 
275         val = strtod(copy, &fail_pos);
276 
277         if (fail_pos)
278         {
279             if (fail_pos > decimal_point_pos)
280                 fail_pos = (char *)digits_pos +
281                     (fail_pos - copy) -
282                     (decimal_point_len - 1);
283             else
284                 fail_pos = (char *)digits_pos +
285                     (fail_pos - copy);
286         }
287 
288         PyMem_FREE(copy);
289 
290     }
291     else {
292         val = strtod(digits_pos, &fail_pos);
293     }
294 
295     if (fail_pos == digits_pos)
296         goto invalid_string;
297 
298     if (negate && fail_pos != nptr)
299         val = -val;
300     *endptr = fail_pos;
301 
302     return val;
303 
304   invalid_string:
305     *endptr = (char*)nptr;
306     errno = EINVAL;
307     return -1.0;
308 }
309 
310 #endif
311 
312 /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313    as a string of ASCII characters) to a float.  The string should not have
314    leading or trailing whitespace.  The conversion is independent of the
315    current locale.
316 
317    If endptr is NULL, try to convert the whole string.  Raise ValueError and
318    return -1.0 if the string is not a valid representation of a floating-point
319    number.
320 
321    If endptr is non-NULL, try to convert as much of the string as possible.
322    If no initial segment of the string is the valid representation of a
323    floating-point number then *endptr is set to point to the beginning of the
324    string, -1.0 is returned and again ValueError is raised.
325 
326    On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327    if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328    exception is raised.  Otherwise, overflow_exception should point to
329    a Python exception, this exception will be raised, -1.0 will be returned,
330    and *endptr will point just past the end of the converted value.
331 
332    If any other failure occurs (for example lack of memory), -1.0 is returned
333    and the appropriate Python exception will have been set.
334 */
335 
336 double
PyOS_string_to_double(const char * s,char ** endptr,PyObject * overflow_exception)337 PyOS_string_to_double(const char *s,
338                       char **endptr,
339                       PyObject *overflow_exception)
340 {
341     double x, result=-1.0;
342     char *fail_pos;
343 
344     errno = 0;
345     PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346     x = _PyOS_ascii_strtod(s, &fail_pos);
347     PyFPE_END_PROTECT(x)
348 
349     if (errno == ENOMEM) {
350         PyErr_NoMemory();
351         fail_pos = (char *)s;
352     }
353     else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354         PyErr_Format(PyExc_ValueError,
355                       "could not convert string to float: "
356                       "'%.200s'", s);
357     else if (fail_pos == s)
358         PyErr_Format(PyExc_ValueError,
359                       "could not convert string to float: "
360                       "'%.200s'", s);
361     else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362         PyErr_Format(overflow_exception,
363                       "value too large to convert to float: "
364                       "'%.200s'", s);
365     else
366         result = x;
367 
368     if (endptr != NULL)
369         *endptr = fail_pos;
370     return result;
371 }
372 
373 /* Remove underscores that follow the underscore placement rule from
374    the string and then call the `innerfunc` function on the result.
375    It should return a new object or NULL on exception.
376 
377    `what` is used for the error message emitted when underscores are detected
378    that don't follow the rule. `arg` is an opaque pointer passed to the inner
379    function.
380 
381    This is used to implement underscore-agnostic conversion for floats
382    and complex numbers.
383 */
384 PyObject *
_Py_string_to_number_with_underscores(const char * s,Py_ssize_t orig_len,const char * what,PyObject * obj,void * arg,PyObject * (* innerfunc)(const char *,Py_ssize_t,void *))385 _Py_string_to_number_with_underscores(
386     const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
387     PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
388 {
389     char prev;
390     const char *p, *last;
391     char *dup, *end;
392     PyObject *result;
393 
394     assert(s[orig_len] == '\0');
395 
396     if (strchr(s, '_') == NULL) {
397         return innerfunc(s, orig_len, arg);
398     }
399 
400     dup = PyMem_Malloc(orig_len + 1);
401     if (dup == NULL) {
402         return PyErr_NoMemory();
403     }
404     end = dup;
405     prev = '\0';
406     last = s + orig_len;
407     for (p = s; *p; p++) {
408         if (*p == '_') {
409             /* Underscores are only allowed after digits. */
410             if (!(prev >= '0' && prev <= '9')) {
411                 goto error;
412             }
413         }
414         else {
415             *end++ = *p;
416             /* Underscores are only allowed before digits. */
417             if (prev == '_' && !(*p >= '0' && *p <= '9')) {
418                 goto error;
419             }
420         }
421         prev = *p;
422     }
423     /* Underscores are not allowed at the end. */
424     if (prev == '_') {
425         goto error;
426     }
427     /* No embedded NULs allowed. */
428     if (p != last) {
429         goto error;
430     }
431     *end = '\0';
432     result = innerfunc(dup, end - dup, arg);
433     PyMem_Free(dup);
434     return result;
435 
436   error:
437     PyMem_Free(dup);
438     PyErr_Format(PyExc_ValueError,
439                  "could not convert string to %s: "
440                  "%R", what, obj);
441     return NULL;
442 }
443 
444 #ifdef PY_NO_SHORT_FLOAT_REPR
445 
446 /* Given a string that may have a decimal point in the current
447    locale, change it back to a dot.  Since the string cannot get
448    longer, no need for a maximum buffer size parameter. */
449 Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char * buffer)450 change_decimal_from_locale_to_dot(char* buffer)
451 {
452     struct lconv *locale_data = localeconv();
453     const char *decimal_point = locale_data->decimal_point;
454 
455     if (decimal_point[0] != '.' || decimal_point[1] != 0) {
456         size_t decimal_point_len = strlen(decimal_point);
457 
458         if (*buffer == '+' || *buffer == '-')
459             buffer++;
460         while (Py_ISDIGIT(*buffer))
461             buffer++;
462         if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
463             *buffer = '.';
464             buffer++;
465             if (decimal_point_len > 1) {
466                 /* buffer needs to get smaller */
467                 size_t rest_len = strlen(buffer +
468                                      (decimal_point_len - 1));
469                 memmove(buffer,
470                     buffer + (decimal_point_len - 1),
471                     rest_len);
472                 buffer[rest_len] = 0;
473             }
474         }
475     }
476 }
477 
478 
479 /* From the C99 standard, section 7.19.6:
480 The exponent always contains at least two digits, and only as many more digits
481 as necessary to represent the exponent.
482 */
483 #define MIN_EXPONENT_DIGITS 2
484 
485 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
486    in length. */
487 Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char * buffer,size_t buf_size)488 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
489 {
490     char *p = strpbrk(buffer, "eE");
491     if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
492         char *start = p + 2;
493         int exponent_digit_cnt = 0;
494         int leading_zero_cnt = 0;
495         int in_leading_zeros = 1;
496         int significant_digit_cnt;
497 
498         /* Skip over the exponent and the sign. */
499         p += 2;
500 
501         /* Find the end of the exponent, keeping track of leading
502            zeros. */
503         while (*p && Py_ISDIGIT(*p)) {
504             if (in_leading_zeros && *p == '0')
505                 ++leading_zero_cnt;
506             if (*p != '0')
507                 in_leading_zeros = 0;
508             ++p;
509             ++exponent_digit_cnt;
510         }
511 
512         significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
513         if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
514             /* If there are 2 exactly digits, we're done,
515                regardless of what they contain */
516         }
517         else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
518             int extra_zeros_cnt;
519 
520             /* There are more than 2 digits in the exponent.  See
521                if we can delete some of the leading zeros */
522             if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
523                 significant_digit_cnt = MIN_EXPONENT_DIGITS;
524             extra_zeros_cnt = exponent_digit_cnt -
525                 significant_digit_cnt;
526 
527             /* Delete extra_zeros_cnt worth of characters from the
528                front of the exponent */
529             assert(extra_zeros_cnt >= 0);
530 
531             /* Add one to significant_digit_cnt to copy the
532                trailing 0 byte, thus setting the length */
533             memmove(start,
534                 start + extra_zeros_cnt,
535                 significant_digit_cnt + 1);
536         }
537         else {
538             /* If there are fewer than 2 digits, add zeros
539                until there are 2, if there's enough room */
540             int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
541             if (start + zeros + exponent_digit_cnt + 1
542                   < buffer + buf_size) {
543                 memmove(start + zeros, start,
544                     exponent_digit_cnt + 1);
545                 memset(start, '0', zeros);
546             }
547         }
548     }
549 }
550 
551 /* Remove trailing zeros after the decimal point from a numeric string; also
552    remove the decimal point if all digits following it are zero.  The numeric
553    string must end in '\0', and should not have any leading or trailing
554    whitespace.  Assumes that the decimal point is '.'. */
555 Py_LOCAL_INLINE(void)
remove_trailing_zeros(char * buffer)556 remove_trailing_zeros(char *buffer)
557 {
558     char *old_fraction_end, *new_fraction_end, *end, *p;
559 
560     p = buffer;
561     if (*p == '-' || *p == '+')
562         /* Skip leading sign, if present */
563         ++p;
564     while (Py_ISDIGIT(*p))
565         ++p;
566 
567     /* if there's no decimal point there's nothing to do */
568     if (*p++ != '.')
569         return;
570 
571     /* scan any digits after the point */
572     while (Py_ISDIGIT(*p))
573         ++p;
574     old_fraction_end = p;
575 
576     /* scan up to ending '\0' */
577     while (*p != '\0')
578         p++;
579     /* +1 to make sure that we move the null byte as well */
580     end = p+1;
581 
582     /* scan back from fraction_end, looking for removable zeros */
583     p = old_fraction_end;
584     while (*(p-1) == '0')
585         --p;
586     /* and remove point if we've got that far */
587     if (*(p-1) == '.')
588         --p;
589     new_fraction_end = p;
590 
591     memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
592 }
593 
594 /* Ensure that buffer has a decimal point in it.  The decimal point will not
595    be in the current locale, it will always be '.'. Don't add a decimal point
596    if an exponent is present.  Also, convert to exponential notation where
597    adding a '.0' would produce too many significant digits (see issue 5864).
598 
599    Returns a pointer to the fixed buffer, or NULL on failure.
600 */
601 Py_LOCAL_INLINE(char *)
ensure_decimal_point(char * buffer,size_t buf_size,int precision)602 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
603 {
604     int digit_count, insert_count = 0, convert_to_exp = 0;
605     const char *chars_to_insert;
606     char *digits_start;
607 
608     /* search for the first non-digit character */
609     char *p = buffer;
610     if (*p == '-' || *p == '+')
611         /* Skip leading sign, if present.  I think this could only
612            ever be '-', but it can't hurt to check for both. */
613         ++p;
614     digits_start = p;
615     while (*p && Py_ISDIGIT(*p))
616         ++p;
617     digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
618 
619     if (*p == '.') {
620         if (Py_ISDIGIT(*(p+1))) {
621             /* Nothing to do, we already have a decimal
622                point and a digit after it */
623         }
624         else {
625             /* We have a decimal point, but no following
626                digit.  Insert a zero after the decimal. */
627             /* can't ever get here via PyOS_double_to_string */
628             assert(precision == -1);
629             ++p;
630             chars_to_insert = "0";
631             insert_count = 1;
632         }
633     }
634     else if (!(*p == 'e' || *p == 'E')) {
635         /* Don't add ".0" if we have an exponent. */
636         if (digit_count == precision) {
637             /* issue 5864: don't add a trailing .0 in the case
638                where the '%g'-formatted result already has as many
639                significant digits as were requested.  Switch to
640                exponential notation instead. */
641             convert_to_exp = 1;
642             /* no exponent, no point, and we shouldn't land here
643                for infs and nans, so we must be at the end of the
644                string. */
645             assert(*p == '\0');
646         }
647         else {
648             assert(precision == -1 || digit_count < precision);
649             chars_to_insert = ".0";
650             insert_count = 2;
651         }
652     }
653     if (insert_count) {
654         size_t buf_len = strlen(buffer);
655         if (buf_len + insert_count + 1 >= buf_size) {
656             /* If there is not enough room in the buffer
657                for the additional text, just skip it.  It's
658                not worth generating an error over. */
659         }
660         else {
661             memmove(p + insert_count, p,
662                 buffer + strlen(buffer) - p + 1);
663             memcpy(p, chars_to_insert, insert_count);
664         }
665     }
666     if (convert_to_exp) {
667         int written;
668         size_t buf_avail;
669         p = digits_start;
670         /* insert decimal point */
671         assert(digit_count >= 1);
672         memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
673         p[1] = '.';
674         p += digit_count+1;
675         assert(p <= buf_size+buffer);
676         buf_avail = buf_size+buffer-p;
677         if (buf_avail == 0)
678             return NULL;
679         /* Add exponent.  It's okay to use lower case 'e': we only
680            arrive here as a result of using the empty format code or
681            repr/str builtins and those never want an upper case 'E' */
682         written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
683         if (!(0 <= written &&
684               written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
685             /* output truncated, or something else bad happened */
686             return NULL;
687         remove_trailing_zeros(buffer);
688     }
689     return buffer;
690 }
691 
692 /* see FORMATBUFLEN in unicodeobject.c */
693 #define FLOAT_FORMATBUFLEN 120
694 
695 /**
696  * _PyOS_ascii_formatd:
697  * @buffer: A buffer to place the resulting string in
698  * @buf_size: The length of the buffer.
699  * @format: The printf()-style format to use for the
700  *          code to use for converting.
701  * @d: The #gdouble to convert
702  * @precision: The precision to use when formatting.
703  *
704  * Converts a #gdouble to a string, using the '.' as
705  * decimal point. To format the number you pass in
706  * a printf()-style format string. Allowed conversion
707  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
708  *
709  * 'Z' is the same as 'g', except it always has a decimal and
710  *     at least one digit after the decimal.
711  *
712  * Return value: The pointer to the buffer with the converted string.
713  * On failure returns NULL but does not set any Python exception.
714  **/
715 static char *
_PyOS_ascii_formatd(char * buffer,size_t buf_size,const char * format,double d,int precision)716 _PyOS_ascii_formatd(char       *buffer,
717                    size_t      buf_size,
718                    const char *format,
719                    double      d,
720                    int         precision)
721 {
722     char format_char;
723     size_t format_len = strlen(format);
724 
725     /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
726        also with at least one character past the decimal. */
727     char tmp_format[FLOAT_FORMATBUFLEN];
728 
729     /* The last character in the format string must be the format char */
730     format_char = format[format_len - 1];
731 
732     if (format[0] != '%')
733         return NULL;
734 
735     /* I'm not sure why this test is here.  It's ensuring that the format
736        string after the first character doesn't have a single quote, a
737        lowercase l, or a percent. This is the reverse of the commented-out
738        test about 10 lines ago. */
739     if (strpbrk(format + 1, "'l%"))
740         return NULL;
741 
742     /* Also curious about this function is that it accepts format strings
743        like "%xg", which are invalid for floats.  In general, the
744        interface to this function is not very good, but changing it is
745        difficult because it's a public API. */
746 
747     if (!(format_char == 'e' || format_char == 'E' ||
748           format_char == 'f' || format_char == 'F' ||
749           format_char == 'g' || format_char == 'G' ||
750           format_char == 'Z'))
751         return NULL;
752 
753     /* Map 'Z' format_char to 'g', by copying the format string and
754        replacing the final char with a 'g' */
755     if (format_char == 'Z') {
756         if (format_len + 1 >= sizeof(tmp_format)) {
757             /* The format won't fit in our copy.  Error out.  In
758                practice, this will never happen and will be
759                detected by returning NULL */
760             return NULL;
761         }
762         strcpy(tmp_format, format);
763         tmp_format[format_len - 1] = 'g';
764         format = tmp_format;
765     }
766 
767 
768     /* Have PyOS_snprintf do the hard work */
769     PyOS_snprintf(buffer, buf_size, format, d);
770 
771     /* Do various fixups on the return string */
772 
773     /* Get the current locale, and find the decimal point string.
774        Convert that string back to a dot. */
775     change_decimal_from_locale_to_dot(buffer);
776 
777     /* If an exponent exists, ensure that the exponent is at least
778        MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
779        for the extra zeros.  Also, if there are more than
780        MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
781        back to MIN_EXPONENT_DIGITS */
782     ensure_minimum_exponent_length(buffer, buf_size);
783 
784     /* If format_char is 'Z', make sure we have at least one character
785        after the decimal point (and make sure we have a decimal point);
786        also switch to exponential notation in some edge cases where the
787        extra character would produce more significant digits that we
788        really want. */
789     if (format_char == 'Z')
790         buffer = ensure_decimal_point(buffer, buf_size, precision);
791 
792     return buffer;
793 }
794 
795 /* The fallback code to use if _Py_dg_dtoa is not available. */
796 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)797 char * PyOS_double_to_string(double val,
798                                          char format_code,
799                                          int precision,
800                                          int flags,
801                                          int *type)
802 {
803     char format[32];
804     Py_ssize_t bufsize;
805     char *buf;
806     int t, exp;
807     int upper = 0;
808 
809     /* Validate format_code, and map upper and lower case */
810     switch (format_code) {
811     case 'e':          /* exponent */
812     case 'f':          /* fixed */
813     case 'g':          /* general */
814         break;
815     case 'E':
816         upper = 1;
817         format_code = 'e';
818         break;
819     case 'F':
820         upper = 1;
821         format_code = 'f';
822         break;
823     case 'G':
824         upper = 1;
825         format_code = 'g';
826         break;
827     case 'r':          /* repr format */
828         /* Supplied precision is unused, must be 0. */
829         if (precision != 0) {
830             PyErr_BadInternalCall();
831             return NULL;
832         }
833         /* The repr() precision (17 significant decimal digits) is the
834            minimal number that is guaranteed to have enough precision
835            so that if the number is read back in the exact same binary
836            value is recreated.  This is true for IEEE floating point
837            by design, and also happens to work for all other modern
838            hardware. */
839         precision = 17;
840         format_code = 'g';
841         break;
842     default:
843         PyErr_BadInternalCall();
844         return NULL;
845     }
846 
847     /* Here's a quick-and-dirty calculation to figure out how big a buffer
848        we need.  In general, for a finite float we need:
849 
850          1 byte for each digit of the decimal significand, and
851 
852          1 for a possible sign
853          1 for a possible decimal point
854          2 for a possible [eE][+-]
855          1 for each digit of the exponent;  if we allow 19 digits
856            total then we're safe up to exponents of 2**63.
857          1 for the trailing nul byte
858 
859        This gives a total of 24 + the number of digits in the significand,
860        and the number of digits in the significand is:
861 
862          for 'g' format: at most precision, except possibly
863            when precision == 0, when it's 1.
864          for 'e' format: precision+1
865          for 'f' format: precision digits after the point, at least 1
866            before.  To figure out how many digits appear before the point
867            we have to examine the size of the number.  If fabs(val) < 1.0
868            then there will be only one digit before the point.  If
869            fabs(val) >= 1.0, then there are at most
870 
871          1+floor(log10(ceiling(fabs(val))))
872 
873            digits before the point (where the 'ceiling' allows for the
874            possibility that the rounding rounds the integer part of val
875            up).  A safe upper bound for the above quantity is
876            1+floor(exp/3), where exp is the unique integer such that 0.5
877            <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
878            frexp.
879 
880        So we allow room for precision+1 digits for all formats, plus an
881        extra floor(exp/3) digits for 'f' format.
882 
883     */
884 
885     if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
886         /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
887         bufsize = 5;
888     else {
889         bufsize = 25 + precision;
890         if (format_code == 'f' && fabs(val) >= 1.0) {
891             frexp(val, &exp);
892             bufsize += exp/3;
893         }
894     }
895 
896     buf = PyMem_Malloc(bufsize);
897     if (buf == NULL) {
898         PyErr_NoMemory();
899         return NULL;
900     }
901 
902     /* Handle nan and inf. */
903     if (Py_IS_NAN(val)) {
904         strcpy(buf, "nan");
905         t = Py_DTST_NAN;
906     } else if (Py_IS_INFINITY(val)) {
907         if (copysign(1., val) == 1.)
908             strcpy(buf, "inf");
909         else
910             strcpy(buf, "-inf");
911         t = Py_DTST_INFINITE;
912     } else {
913         t = Py_DTST_FINITE;
914         if (flags & Py_DTSF_ADD_DOT_0)
915             format_code = 'Z';
916 
917         PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
918                       (flags & Py_DTSF_ALT ? "#" : ""), precision,
919                       format_code);
920         _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
921     }
922 
923     /* Add sign when requested.  It's convenient (esp. when formatting
924      complex numbers) to include a sign even for inf and nan. */
925     if (flags & Py_DTSF_SIGN && buf[0] != '-') {
926         size_t len = strlen(buf);
927         /* the bufsize calculations above should ensure that we've got
928            space to add a sign */
929         assert((size_t)bufsize >= len+2);
930         memmove(buf+1, buf, len+1);
931         buf[0] = '+';
932     }
933     if (upper) {
934         /* Convert to upper case. */
935         char *p1;
936         for (p1 = buf; *p1; p1++)
937             *p1 = Py_TOUPPER(*p1);
938     }
939 
940     if (type)
941         *type = t;
942     return buf;
943 }
944 
945 #else
946 
947 /* _Py_dg_dtoa is available. */
948 
949 /* I'm using a lookup table here so that I don't have to invent a non-locale
950    specific way to convert to uppercase */
951 #define OFS_INF 0
952 #define OFS_NAN 1
953 #define OFS_E 2
954 
955 /* The lengths of these are known to the code below, so don't change them */
956 static const char * const lc_float_strings[] = {
957     "inf",
958     "nan",
959     "e",
960 };
961 static const char * const uc_float_strings[] = {
962     "INF",
963     "NAN",
964     "E",
965 };
966 
967 
968 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
969    memory contain the resulting string.
970 
971    Arguments:
972      d is the double to be converted
973      format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
974        correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
975      mode is one of '0', '2' or '3', and is completely determined by
976        format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
977      precision is the desired precision
978      always_add_sign is nonzero if a '+' sign should be included for positive
979        numbers
980      add_dot_0_if_integer is nonzero if integers in non-exponential form
981        should have ".0" added.  Only applies to format codes 'r' and 'g'.
982      use_alt_formatting is nonzero if alternative formatting should be
983        used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
984        at most one of use_alt_formatting and add_dot_0_if_integer should
985        be nonzero.
986      type, if non-NULL, will be set to one of these constants to identify
987        the type of the 'd' argument:
988      Py_DTST_FINITE
989      Py_DTST_INFINITE
990      Py_DTST_NAN
991 
992    Returns a PyMem_Malloc'd block of memory containing the resulting string,
993     or NULL on error. If NULL is returned, the Python error has been set.
994  */
995 
996 static char *
format_float_short(double d,char format_code,int mode,int precision,int always_add_sign,int add_dot_0_if_integer,int use_alt_formatting,const char * const * float_strings,int * type)997 format_float_short(double d, char format_code,
998                    int mode, int precision,
999                    int always_add_sign, int add_dot_0_if_integer,
1000                    int use_alt_formatting, const char * const *float_strings,
1001                    int *type)
1002 {
1003     char *buf = NULL;
1004     char *p = NULL;
1005     Py_ssize_t bufsize = 0;
1006     char *digits, *digits_end;
1007     int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1008     Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1009     _Py_SET_53BIT_PRECISION_HEADER;
1010 
1011     /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1012        Must be matched by a call to _Py_dg_freedtoa. */
1013     _Py_SET_53BIT_PRECISION_START;
1014     digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1015                          &digits_end);
1016     _Py_SET_53BIT_PRECISION_END;
1017 
1018     decpt = (Py_ssize_t)decpt_as_int;
1019     if (digits == NULL) {
1020         /* The only failure mode is no memory. */
1021         PyErr_NoMemory();
1022         goto exit;
1023     }
1024     assert(digits_end != NULL && digits_end >= digits);
1025     digits_len = digits_end - digits;
1026 
1027     if (digits_len && !Py_ISDIGIT(digits[0])) {
1028         /* Infinities and nans here; adapt Gay's output,
1029            so convert Infinity to inf and NaN to nan, and
1030            ignore sign of nan. Then return. */
1031 
1032         /* ignore the actual sign of a nan */
1033         if (digits[0] == 'n' || digits[0] == 'N')
1034             sign = 0;
1035 
1036         /* We only need 5 bytes to hold the result "+inf\0" . */
1037         bufsize = 5; /* Used later in an assert. */
1038         buf = (char *)PyMem_Malloc(bufsize);
1039         if (buf == NULL) {
1040             PyErr_NoMemory();
1041             goto exit;
1042         }
1043         p = buf;
1044 
1045         if (sign == 1) {
1046             *p++ = '-';
1047         }
1048         else if (always_add_sign) {
1049             *p++ = '+';
1050         }
1051         if (digits[0] == 'i' || digits[0] == 'I') {
1052             strncpy(p, float_strings[OFS_INF], 3);
1053             p += 3;
1054 
1055             if (type)
1056                 *type = Py_DTST_INFINITE;
1057         }
1058         else if (digits[0] == 'n' || digits[0] == 'N') {
1059             strncpy(p, float_strings[OFS_NAN], 3);
1060             p += 3;
1061 
1062             if (type)
1063                 *type = Py_DTST_NAN;
1064         }
1065         else {
1066             /* shouldn't get here: Gay's code should always return
1067                something starting with a digit, an 'I',  or 'N' */
1068             Py_UNREACHABLE();
1069         }
1070         goto exit;
1071     }
1072 
1073     /* The result must be finite (not inf or nan). */
1074     if (type)
1075         *type = Py_DTST_FINITE;
1076 
1077 
1078     /* We got digits back, format them.  We may need to pad 'digits'
1079        either on the left or right (or both) with extra zeros, so in
1080        general the resulting string has the form
1081 
1082          [<sign>]<zeros><digits><zeros>[<exponent>]
1083 
1084        where either of the <zeros> pieces could be empty, and there's a
1085        decimal point that could appear either in <digits> or in the
1086        leading or trailing <zeros>.
1087 
1088        Imagine an infinite 'virtual' string vdigits, consisting of the
1089        string 'digits' (starting at index 0) padded on both the left and
1090        right with infinite strings of zeros.  We want to output a slice
1091 
1092          vdigits[vdigits_start : vdigits_end]
1093 
1094        of this virtual string.  Thus if vdigits_start < 0 then we'll end
1095        up producing some leading zeros; if vdigits_end > digits_len there
1096        will be trailing zeros in the output.  The next section of code
1097        determines whether to use an exponent or not, figures out the
1098        position 'decpt' of the decimal point, and computes 'vdigits_start'
1099        and 'vdigits_end'. */
1100     vdigits_end = digits_len;
1101     switch (format_code) {
1102     case 'e':
1103         use_exp = 1;
1104         vdigits_end = precision;
1105         break;
1106     case 'f':
1107         vdigits_end = decpt + precision;
1108         break;
1109     case 'g':
1110         if (decpt <= -4 || decpt >
1111             (add_dot_0_if_integer ? precision-1 : precision))
1112             use_exp = 1;
1113         if (use_alt_formatting)
1114             vdigits_end = precision;
1115         break;
1116     case 'r':
1117         /* convert to exponential format at 1e16.  We used to convert
1118            at 1e17, but that gives odd-looking results for some values
1119            when a 16-digit 'shortest' repr is padded with bogus zeros.
1120            For example, repr(2e16+8) would give 20000000000000010.0;
1121            the true value is 20000000000000008.0. */
1122         if (decpt <= -4 || decpt > 16)
1123             use_exp = 1;
1124         break;
1125     default:
1126         PyErr_BadInternalCall();
1127         goto exit;
1128     }
1129 
1130     /* if using an exponent, reset decimal point position to 1 and adjust
1131        exponent accordingly.*/
1132     if (use_exp) {
1133         exp = (int)decpt - 1;
1134         decpt = 1;
1135     }
1136     /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1137        decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1138     vdigits_start = decpt <= 0 ? decpt-1 : 0;
1139     if (!use_exp && add_dot_0_if_integer)
1140         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1141     else
1142         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1143 
1144     /* double check inequalities */
1145     assert(vdigits_start <= 0 &&
1146            0 <= digits_len &&
1147            digits_len <= vdigits_end);
1148     /* decimal point should be in (vdigits_start, vdigits_end] */
1149     assert(vdigits_start < decpt && decpt <= vdigits_end);
1150 
1151     /* Compute an upper bound how much memory we need. This might be a few
1152        chars too long, but no big deal. */
1153     bufsize =
1154         /* sign, decimal point and trailing 0 byte */
1155         3 +
1156 
1157         /* total digit count (including zero padding on both sides) */
1158         (vdigits_end - vdigits_start) +
1159 
1160         /* exponent "e+100", max 3 numerical digits */
1161         (use_exp ? 5 : 0);
1162 
1163     /* Now allocate the memory and initialize p to point to the start of
1164        it. */
1165     buf = (char *)PyMem_Malloc(bufsize);
1166     if (buf == NULL) {
1167         PyErr_NoMemory();
1168         goto exit;
1169     }
1170     p = buf;
1171 
1172     /* Add a negative sign if negative, and a plus sign if non-negative
1173        and always_add_sign is true. */
1174     if (sign == 1)
1175         *p++ = '-';
1176     else if (always_add_sign)
1177         *p++ = '+';
1178 
1179     /* note that exactly one of the three 'if' conditions is true,
1180        so we include exactly one decimal point */
1181     /* Zero padding on left of digit string */
1182     if (decpt <= 0) {
1183         memset(p, '0', decpt-vdigits_start);
1184         p += decpt - vdigits_start;
1185         *p++ = '.';
1186         memset(p, '0', 0-decpt);
1187         p += 0-decpt;
1188     }
1189     else {
1190         memset(p, '0', 0-vdigits_start);
1191         p += 0 - vdigits_start;
1192     }
1193 
1194     /* Digits, with included decimal point */
1195     if (0 < decpt && decpt <= digits_len) {
1196         strncpy(p, digits, decpt-0);
1197         p += decpt-0;
1198         *p++ = '.';
1199         strncpy(p, digits+decpt, digits_len-decpt);
1200         p += digits_len-decpt;
1201     }
1202     else {
1203         strncpy(p, digits, digits_len);
1204         p += digits_len;
1205     }
1206 
1207     /* And zeros on the right */
1208     if (digits_len < decpt) {
1209         memset(p, '0', decpt-digits_len);
1210         p += decpt-digits_len;
1211         *p++ = '.';
1212         memset(p, '0', vdigits_end-decpt);
1213         p += vdigits_end-decpt;
1214     }
1215     else {
1216         memset(p, '0', vdigits_end-digits_len);
1217         p += vdigits_end-digits_len;
1218     }
1219 
1220     /* Delete a trailing decimal pt unless using alternative formatting. */
1221     if (p[-1] == '.' && !use_alt_formatting)
1222         p--;
1223 
1224     /* Now that we've done zero padding, add an exponent if needed. */
1225     if (use_exp) {
1226         *p++ = float_strings[OFS_E][0];
1227         exp_len = sprintf(p, "%+.02d", exp);
1228         p += exp_len;
1229     }
1230   exit:
1231     if (buf) {
1232         *p = '\0';
1233         /* It's too late if this fails, as we've already stepped on
1234            memory that isn't ours. But it's an okay debugging test. */
1235         assert(p-buf < bufsize);
1236     }
1237     if (digits)
1238         _Py_dg_freedtoa(digits);
1239 
1240     return buf;
1241 }
1242 
1243 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)1244 char * PyOS_double_to_string(double val,
1245                                          char format_code,
1246                                          int precision,
1247                                          int flags,
1248                                          int *type)
1249 {
1250     const char * const *float_strings = lc_float_strings;
1251     int mode;
1252 
1253     /* Validate format_code, and map upper and lower case. Compute the
1254        mode and make any adjustments as needed. */
1255     switch (format_code) {
1256     /* exponent */
1257     case 'E':
1258         float_strings = uc_float_strings;
1259         format_code = 'e';
1260         /* Fall through. */
1261     case 'e':
1262         mode = 2;
1263         precision++;
1264         break;
1265 
1266     /* fixed */
1267     case 'F':
1268         float_strings = uc_float_strings;
1269         format_code = 'f';
1270         /* Fall through. */
1271     case 'f':
1272         mode = 3;
1273         break;
1274 
1275     /* general */
1276     case 'G':
1277         float_strings = uc_float_strings;
1278         format_code = 'g';
1279         /* Fall through. */
1280     case 'g':
1281         mode = 2;
1282         /* precision 0 makes no sense for 'g' format; interpret as 1 */
1283         if (precision == 0)
1284             precision = 1;
1285         break;
1286 
1287     /* repr format */
1288     case 'r':
1289         mode = 0;
1290         /* Supplied precision is unused, must be 0. */
1291         if (precision != 0) {
1292             PyErr_BadInternalCall();
1293             return NULL;
1294         }
1295         break;
1296 
1297     default:
1298         PyErr_BadInternalCall();
1299         return NULL;
1300     }
1301 
1302     return format_float_short(val, format_code, mode, precision,
1303                               flags & Py_DTSF_SIGN,
1304                               flags & Py_DTSF_ADD_DOT_0,
1305                               flags & Py_DTSF_ALT,
1306                               float_strings, type);
1307 }
1308 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */
1309