1 /* -*- Mode: C; c-file-style: "python" -*- */
2
3 #include <Python.h>
4 #include <locale.h>
5
6 /* Case-insensitive string match used for nan and inf detection; t should be
7 lower-case. Returns 1 for a successful match, 0 otherwise. */
8
9 static int
case_insensitive_match(const char * s,const char * t)10 case_insensitive_match(const char *s, const char *t)
11 {
12 while(*t && Py_TOLOWER(*s) == *t) {
13 s++;
14 t++;
15 }
16 return *t ? 0 : 1;
17 }
18
19 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20 "infinity", with an optional leading sign of "+" or "-". On success,
21 return the NaN or Infinity as a double and set *endptr to point just beyond
22 the successfully parsed portion of the string. On failure, return -1.0 and
23 set *endptr to point to the start of the string. */
24
25 #ifndef PY_NO_SHORT_FLOAT_REPR
26
27 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)28 _Py_parse_inf_or_nan(const char *p, char **endptr)
29 {
30 double retval;
31 const char *s;
32 int negate = 0;
33
34 s = p;
35 if (*s == '-') {
36 negate = 1;
37 s++;
38 }
39 else if (*s == '+') {
40 s++;
41 }
42 if (case_insensitive_match(s, "inf")) {
43 s += 3;
44 if (case_insensitive_match(s, "inity"))
45 s += 5;
46 retval = _Py_dg_infinity(negate);
47 }
48 else if (case_insensitive_match(s, "nan")) {
49 s += 3;
50 retval = _Py_dg_stdnan(negate);
51 }
52 else {
53 s = p;
54 retval = -1.0;
55 }
56 *endptr = (char *)s;
57 return retval;
58 }
59
60 #else
61
62 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)63 _Py_parse_inf_or_nan(const char *p, char **endptr)
64 {
65 double retval;
66 const char *s;
67 int negate = 0;
68
69 s = p;
70 if (*s == '-') {
71 negate = 1;
72 s++;
73 }
74 else if (*s == '+') {
75 s++;
76 }
77 if (case_insensitive_match(s, "inf")) {
78 s += 3;
79 if (case_insensitive_match(s, "inity"))
80 s += 5;
81 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82 }
83 #ifdef Py_NAN
84 else if (case_insensitive_match(s, "nan")) {
85 s += 3;
86 retval = negate ? -Py_NAN : Py_NAN;
87 }
88 #endif
89 else {
90 s = p;
91 retval = -1.0;
92 }
93 *endptr = (char *)s;
94 return retval;
95 }
96
97 #endif
98
99 /**
100 * _PyOS_ascii_strtod:
101 * @nptr: the string to convert to a numeric value.
102 * @endptr: if non-%NULL, it returns the character after
103 * the last character used in the conversion.
104 *
105 * Converts a string to a #gdouble value.
106 * This function behaves like the standard strtod() function
107 * does in the C locale. It does this without actually
108 * changing the current locale, since that would not be
109 * thread-safe.
110 *
111 * This function is typically used when reading configuration
112 * files or other non-user input that should be locale independent.
113 * To handle input from the user you should normally use the
114 * locale-sensitive system strtod() function.
115 *
116 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 * is returned (according to the sign of the value), and %ERANGE is
118 * stored in %errno. If the correct value would cause underflow,
119 * zero is returned and %ERANGE is stored in %errno.
120 * If memory allocation fails, %ENOMEM is stored in %errno.
121 *
122 * This function resets %errno before calling strtod() so that
123 * you can reliably detect overflow and underflow.
124 *
125 * Return value: the #gdouble value.
126 **/
127
128 #ifndef PY_NO_SHORT_FLOAT_REPR
129
130 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)131 _PyOS_ascii_strtod(const char *nptr, char **endptr)
132 {
133 double result;
134 _Py_SET_53BIT_PRECISION_HEADER;
135
136 assert(nptr != NULL);
137 /* Set errno to zero, so that we can distinguish zero results
138 and underflows */
139 errno = 0;
140
141 _Py_SET_53BIT_PRECISION_START;
142 result = _Py_dg_strtod(nptr, endptr);
143 _Py_SET_53BIT_PRECISION_END;
144
145 if (*endptr == nptr)
146 /* string might represent an inf or nan */
147 result = _Py_parse_inf_or_nan(nptr, endptr);
148
149 return result;
150
151 }
152
153 #else
154
155 /*
156 Use system strtod; since strtod is locale aware, we may
157 have to first fix the decimal separator.
158
159 Note that unlike _Py_dg_strtod, the system strtod may not always give
160 correctly rounded results.
161 */
162
163 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)164 _PyOS_ascii_strtod(const char *nptr, char **endptr)
165 {
166 char *fail_pos;
167 double val;
168 struct lconv *locale_data;
169 const char *decimal_point;
170 size_t decimal_point_len;
171 const char *p, *decimal_point_pos;
172 const char *end = NULL; /* Silence gcc */
173 const char *digits_pos = NULL;
174 int negate = 0;
175
176 assert(nptr != NULL);
177
178 fail_pos = NULL;
179
180 locale_data = localeconv();
181 decimal_point = locale_data->decimal_point;
182 decimal_point_len = strlen(decimal_point);
183
184 assert(decimal_point_len != 0);
185
186 decimal_point_pos = NULL;
187
188 /* Parse infinities and nans */
189 val = _Py_parse_inf_or_nan(nptr, endptr);
190 if (*endptr != nptr)
191 return val;
192
193 /* Set errno to zero, so that we can distinguish zero results
194 and underflows */
195 errno = 0;
196
197 /* We process the optional sign manually, then pass the remainder to
198 the system strtod. This ensures that the result of an underflow
199 has the correct sign. (bug #1725) */
200 p = nptr;
201 /* Process leading sign, if present */
202 if (*p == '-') {
203 negate = 1;
204 p++;
205 }
206 else if (*p == '+') {
207 p++;
208 }
209
210 /* Some platform strtods accept hex floats; Python shouldn't (at the
211 moment), so we check explicitly for strings starting with '0x'. */
212 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213 goto invalid_string;
214
215 /* Check that what's left begins with a digit or decimal point */
216 if (!Py_ISDIGIT(*p) && *p != '.')
217 goto invalid_string;
218
219 digits_pos = p;
220 if (decimal_point[0] != '.' ||
221 decimal_point[1] != 0)
222 {
223 /* Look for a '.' in the input; if present, it'll need to be
224 swapped for the current locale's decimal point before we
225 call strtod. On the other hand, if we find the current
226 locale's decimal point then the input is invalid. */
227 while (Py_ISDIGIT(*p))
228 p++;
229
230 if (*p == '.')
231 {
232 decimal_point_pos = p++;
233
234 /* locate end of number */
235 while (Py_ISDIGIT(*p))
236 p++;
237
238 if (*p == 'e' || *p == 'E')
239 p++;
240 if (*p == '+' || *p == '-')
241 p++;
242 while (Py_ISDIGIT(*p))
243 p++;
244 end = p;
245 }
246 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247 /* Python bug #1417699 */
248 goto invalid_string;
249 /* For the other cases, we need not convert the decimal
250 point */
251 }
252
253 if (decimal_point_pos) {
254 char *copy, *c;
255 /* Create a copy of the input, with the '.' converted to the
256 locale-specific decimal point */
257 copy = (char *)PyMem_MALLOC(end - digits_pos +
258 1 + decimal_point_len);
259 if (copy == NULL) {
260 *endptr = (char *)nptr;
261 errno = ENOMEM;
262 return val;
263 }
264
265 c = copy;
266 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267 c += decimal_point_pos - digits_pos;
268 memcpy(c, decimal_point, decimal_point_len);
269 c += decimal_point_len;
270 memcpy(c, decimal_point_pos + 1,
271 end - (decimal_point_pos + 1));
272 c += end - (decimal_point_pos + 1);
273 *c = 0;
274
275 val = strtod(copy, &fail_pos);
276
277 if (fail_pos)
278 {
279 if (fail_pos > decimal_point_pos)
280 fail_pos = (char *)digits_pos +
281 (fail_pos - copy) -
282 (decimal_point_len - 1);
283 else
284 fail_pos = (char *)digits_pos +
285 (fail_pos - copy);
286 }
287
288 PyMem_FREE(copy);
289
290 }
291 else {
292 val = strtod(digits_pos, &fail_pos);
293 }
294
295 if (fail_pos == digits_pos)
296 goto invalid_string;
297
298 if (negate && fail_pos != nptr)
299 val = -val;
300 *endptr = fail_pos;
301
302 return val;
303
304 invalid_string:
305 *endptr = (char*)nptr;
306 errno = EINVAL;
307 return -1.0;
308 }
309
310 #endif
311
312 /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313 as a string of ASCII characters) to a float. The string should not have
314 leading or trailing whitespace. The conversion is independent of the
315 current locale.
316
317 If endptr is NULL, try to convert the whole string. Raise ValueError and
318 return -1.0 if the string is not a valid representation of a floating-point
319 number.
320
321 If endptr is non-NULL, try to convert as much of the string as possible.
322 If no initial segment of the string is the valid representation of a
323 floating-point number then *endptr is set to point to the beginning of the
324 string, -1.0 is returned and again ValueError is raised.
325
326 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328 exception is raised. Otherwise, overflow_exception should point to
329 a Python exception, this exception will be raised, -1.0 will be returned,
330 and *endptr will point just past the end of the converted value.
331
332 If any other failure occurs (for example lack of memory), -1.0 is returned
333 and the appropriate Python exception will have been set.
334 */
335
336 double
PyOS_string_to_double(const char * s,char ** endptr,PyObject * overflow_exception)337 PyOS_string_to_double(const char *s,
338 char **endptr,
339 PyObject *overflow_exception)
340 {
341 double x, result=-1.0;
342 char *fail_pos;
343
344 errno = 0;
345 PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346 x = _PyOS_ascii_strtod(s, &fail_pos);
347 PyFPE_END_PROTECT(x)
348
349 if (errno == ENOMEM) {
350 PyErr_NoMemory();
351 fail_pos = (char *)s;
352 }
353 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354 PyErr_Format(PyExc_ValueError,
355 "could not convert string to float: "
356 "'%.200s'", s);
357 else if (fail_pos == s)
358 PyErr_Format(PyExc_ValueError,
359 "could not convert string to float: "
360 "'%.200s'", s);
361 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362 PyErr_Format(overflow_exception,
363 "value too large to convert to float: "
364 "'%.200s'", s);
365 else
366 result = x;
367
368 if (endptr != NULL)
369 *endptr = fail_pos;
370 return result;
371 }
372
373 /* Remove underscores that follow the underscore placement rule from
374 the string and then call the `innerfunc` function on the result.
375 It should return a new object or NULL on exception.
376
377 `what` is used for the error message emitted when underscores are detected
378 that don't follow the rule. `arg` is an opaque pointer passed to the inner
379 function.
380
381 This is used to implement underscore-agnostic conversion for floats
382 and complex numbers.
383 */
384 PyObject *
_Py_string_to_number_with_underscores(const char * s,Py_ssize_t orig_len,const char * what,PyObject * obj,void * arg,PyObject * (* innerfunc)(const char *,Py_ssize_t,void *))385 _Py_string_to_number_with_underscores(
386 const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
387 PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
388 {
389 char prev;
390 const char *p, *last;
391 char *dup, *end;
392 PyObject *result;
393
394 assert(s[orig_len] == '\0');
395
396 if (strchr(s, '_') == NULL) {
397 return innerfunc(s, orig_len, arg);
398 }
399
400 dup = PyMem_Malloc(orig_len + 1);
401 if (dup == NULL) {
402 return PyErr_NoMemory();
403 }
404 end = dup;
405 prev = '\0';
406 last = s + orig_len;
407 for (p = s; *p; p++) {
408 if (*p == '_') {
409 /* Underscores are only allowed after digits. */
410 if (!(prev >= '0' && prev <= '9')) {
411 goto error;
412 }
413 }
414 else {
415 *end++ = *p;
416 /* Underscores are only allowed before digits. */
417 if (prev == '_' && !(*p >= '0' && *p <= '9')) {
418 goto error;
419 }
420 }
421 prev = *p;
422 }
423 /* Underscores are not allowed at the end. */
424 if (prev == '_') {
425 goto error;
426 }
427 /* No embedded NULs allowed. */
428 if (p != last) {
429 goto error;
430 }
431 *end = '\0';
432 result = innerfunc(dup, end - dup, arg);
433 PyMem_Free(dup);
434 return result;
435
436 error:
437 PyMem_Free(dup);
438 PyErr_Format(PyExc_ValueError,
439 "could not convert string to %s: "
440 "%R", what, obj);
441 return NULL;
442 }
443
444 #ifdef PY_NO_SHORT_FLOAT_REPR
445
446 /* Given a string that may have a decimal point in the current
447 locale, change it back to a dot. Since the string cannot get
448 longer, no need for a maximum buffer size parameter. */
449 Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char * buffer)450 change_decimal_from_locale_to_dot(char* buffer)
451 {
452 struct lconv *locale_data = localeconv();
453 const char *decimal_point = locale_data->decimal_point;
454
455 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
456 size_t decimal_point_len = strlen(decimal_point);
457
458 if (*buffer == '+' || *buffer == '-')
459 buffer++;
460 while (Py_ISDIGIT(*buffer))
461 buffer++;
462 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
463 *buffer = '.';
464 buffer++;
465 if (decimal_point_len > 1) {
466 /* buffer needs to get smaller */
467 size_t rest_len = strlen(buffer +
468 (decimal_point_len - 1));
469 memmove(buffer,
470 buffer + (decimal_point_len - 1),
471 rest_len);
472 buffer[rest_len] = 0;
473 }
474 }
475 }
476 }
477
478
479 /* From the C99 standard, section 7.19.6:
480 The exponent always contains at least two digits, and only as many more digits
481 as necessary to represent the exponent.
482 */
483 #define MIN_EXPONENT_DIGITS 2
484
485 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
486 in length. */
487 Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char * buffer,size_t buf_size)488 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
489 {
490 char *p = strpbrk(buffer, "eE");
491 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
492 char *start = p + 2;
493 int exponent_digit_cnt = 0;
494 int leading_zero_cnt = 0;
495 int in_leading_zeros = 1;
496 int significant_digit_cnt;
497
498 /* Skip over the exponent and the sign. */
499 p += 2;
500
501 /* Find the end of the exponent, keeping track of leading
502 zeros. */
503 while (*p && Py_ISDIGIT(*p)) {
504 if (in_leading_zeros && *p == '0')
505 ++leading_zero_cnt;
506 if (*p != '0')
507 in_leading_zeros = 0;
508 ++p;
509 ++exponent_digit_cnt;
510 }
511
512 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
513 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
514 /* If there are 2 exactly digits, we're done,
515 regardless of what they contain */
516 }
517 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
518 int extra_zeros_cnt;
519
520 /* There are more than 2 digits in the exponent. See
521 if we can delete some of the leading zeros */
522 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
523 significant_digit_cnt = MIN_EXPONENT_DIGITS;
524 extra_zeros_cnt = exponent_digit_cnt -
525 significant_digit_cnt;
526
527 /* Delete extra_zeros_cnt worth of characters from the
528 front of the exponent */
529 assert(extra_zeros_cnt >= 0);
530
531 /* Add one to significant_digit_cnt to copy the
532 trailing 0 byte, thus setting the length */
533 memmove(start,
534 start + extra_zeros_cnt,
535 significant_digit_cnt + 1);
536 }
537 else {
538 /* If there are fewer than 2 digits, add zeros
539 until there are 2, if there's enough room */
540 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
541 if (start + zeros + exponent_digit_cnt + 1
542 < buffer + buf_size) {
543 memmove(start + zeros, start,
544 exponent_digit_cnt + 1);
545 memset(start, '0', zeros);
546 }
547 }
548 }
549 }
550
551 /* Remove trailing zeros after the decimal point from a numeric string; also
552 remove the decimal point if all digits following it are zero. The numeric
553 string must end in '\0', and should not have any leading or trailing
554 whitespace. Assumes that the decimal point is '.'. */
555 Py_LOCAL_INLINE(void)
remove_trailing_zeros(char * buffer)556 remove_trailing_zeros(char *buffer)
557 {
558 char *old_fraction_end, *new_fraction_end, *end, *p;
559
560 p = buffer;
561 if (*p == '-' || *p == '+')
562 /* Skip leading sign, if present */
563 ++p;
564 while (Py_ISDIGIT(*p))
565 ++p;
566
567 /* if there's no decimal point there's nothing to do */
568 if (*p++ != '.')
569 return;
570
571 /* scan any digits after the point */
572 while (Py_ISDIGIT(*p))
573 ++p;
574 old_fraction_end = p;
575
576 /* scan up to ending '\0' */
577 while (*p != '\0')
578 p++;
579 /* +1 to make sure that we move the null byte as well */
580 end = p+1;
581
582 /* scan back from fraction_end, looking for removable zeros */
583 p = old_fraction_end;
584 while (*(p-1) == '0')
585 --p;
586 /* and remove point if we've got that far */
587 if (*(p-1) == '.')
588 --p;
589 new_fraction_end = p;
590
591 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
592 }
593
594 /* Ensure that buffer has a decimal point in it. The decimal point will not
595 be in the current locale, it will always be '.'. Don't add a decimal point
596 if an exponent is present. Also, convert to exponential notation where
597 adding a '.0' would produce too many significant digits (see issue 5864).
598
599 Returns a pointer to the fixed buffer, or NULL on failure.
600 */
601 Py_LOCAL_INLINE(char *)
ensure_decimal_point(char * buffer,size_t buf_size,int precision)602 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
603 {
604 int digit_count, insert_count = 0, convert_to_exp = 0;
605 const char *chars_to_insert;
606 char *digits_start;
607
608 /* search for the first non-digit character */
609 char *p = buffer;
610 if (*p == '-' || *p == '+')
611 /* Skip leading sign, if present. I think this could only
612 ever be '-', but it can't hurt to check for both. */
613 ++p;
614 digits_start = p;
615 while (*p && Py_ISDIGIT(*p))
616 ++p;
617 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
618
619 if (*p == '.') {
620 if (Py_ISDIGIT(*(p+1))) {
621 /* Nothing to do, we already have a decimal
622 point and a digit after it */
623 }
624 else {
625 /* We have a decimal point, but no following
626 digit. Insert a zero after the decimal. */
627 /* can't ever get here via PyOS_double_to_string */
628 assert(precision == -1);
629 ++p;
630 chars_to_insert = "0";
631 insert_count = 1;
632 }
633 }
634 else if (!(*p == 'e' || *p == 'E')) {
635 /* Don't add ".0" if we have an exponent. */
636 if (digit_count == precision) {
637 /* issue 5864: don't add a trailing .0 in the case
638 where the '%g'-formatted result already has as many
639 significant digits as were requested. Switch to
640 exponential notation instead. */
641 convert_to_exp = 1;
642 /* no exponent, no point, and we shouldn't land here
643 for infs and nans, so we must be at the end of the
644 string. */
645 assert(*p == '\0');
646 }
647 else {
648 assert(precision == -1 || digit_count < precision);
649 chars_to_insert = ".0";
650 insert_count = 2;
651 }
652 }
653 if (insert_count) {
654 size_t buf_len = strlen(buffer);
655 if (buf_len + insert_count + 1 >= buf_size) {
656 /* If there is not enough room in the buffer
657 for the additional text, just skip it. It's
658 not worth generating an error over. */
659 }
660 else {
661 memmove(p + insert_count, p,
662 buffer + strlen(buffer) - p + 1);
663 memcpy(p, chars_to_insert, insert_count);
664 }
665 }
666 if (convert_to_exp) {
667 int written;
668 size_t buf_avail;
669 p = digits_start;
670 /* insert decimal point */
671 assert(digit_count >= 1);
672 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
673 p[1] = '.';
674 p += digit_count+1;
675 assert(p <= buf_size+buffer);
676 buf_avail = buf_size+buffer-p;
677 if (buf_avail == 0)
678 return NULL;
679 /* Add exponent. It's okay to use lower case 'e': we only
680 arrive here as a result of using the empty format code or
681 repr/str builtins and those never want an upper case 'E' */
682 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
683 if (!(0 <= written &&
684 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
685 /* output truncated, or something else bad happened */
686 return NULL;
687 remove_trailing_zeros(buffer);
688 }
689 return buffer;
690 }
691
692 /* see FORMATBUFLEN in unicodeobject.c */
693 #define FLOAT_FORMATBUFLEN 120
694
695 /**
696 * _PyOS_ascii_formatd:
697 * @buffer: A buffer to place the resulting string in
698 * @buf_size: The length of the buffer.
699 * @format: The printf()-style format to use for the
700 * code to use for converting.
701 * @d: The #gdouble to convert
702 * @precision: The precision to use when formatting.
703 *
704 * Converts a #gdouble to a string, using the '.' as
705 * decimal point. To format the number you pass in
706 * a printf()-style format string. Allowed conversion
707 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
708 *
709 * 'Z' is the same as 'g', except it always has a decimal and
710 * at least one digit after the decimal.
711 *
712 * Return value: The pointer to the buffer with the converted string.
713 * On failure returns NULL but does not set any Python exception.
714 **/
715 static char *
_PyOS_ascii_formatd(char * buffer,size_t buf_size,const char * format,double d,int precision)716 _PyOS_ascii_formatd(char *buffer,
717 size_t buf_size,
718 const char *format,
719 double d,
720 int precision)
721 {
722 char format_char;
723 size_t format_len = strlen(format);
724
725 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
726 also with at least one character past the decimal. */
727 char tmp_format[FLOAT_FORMATBUFLEN];
728
729 /* The last character in the format string must be the format char */
730 format_char = format[format_len - 1];
731
732 if (format[0] != '%')
733 return NULL;
734
735 /* I'm not sure why this test is here. It's ensuring that the format
736 string after the first character doesn't have a single quote, a
737 lowercase l, or a percent. This is the reverse of the commented-out
738 test about 10 lines ago. */
739 if (strpbrk(format + 1, "'l%"))
740 return NULL;
741
742 /* Also curious about this function is that it accepts format strings
743 like "%xg", which are invalid for floats. In general, the
744 interface to this function is not very good, but changing it is
745 difficult because it's a public API. */
746
747 if (!(format_char == 'e' || format_char == 'E' ||
748 format_char == 'f' || format_char == 'F' ||
749 format_char == 'g' || format_char == 'G' ||
750 format_char == 'Z'))
751 return NULL;
752
753 /* Map 'Z' format_char to 'g', by copying the format string and
754 replacing the final char with a 'g' */
755 if (format_char == 'Z') {
756 if (format_len + 1 >= sizeof(tmp_format)) {
757 /* The format won't fit in our copy. Error out. In
758 practice, this will never happen and will be
759 detected by returning NULL */
760 return NULL;
761 }
762 strcpy(tmp_format, format);
763 tmp_format[format_len - 1] = 'g';
764 format = tmp_format;
765 }
766
767
768 /* Have PyOS_snprintf do the hard work */
769 PyOS_snprintf(buffer, buf_size, format, d);
770
771 /* Do various fixups on the return string */
772
773 /* Get the current locale, and find the decimal point string.
774 Convert that string back to a dot. */
775 change_decimal_from_locale_to_dot(buffer);
776
777 /* If an exponent exists, ensure that the exponent is at least
778 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
779 for the extra zeros. Also, if there are more than
780 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
781 back to MIN_EXPONENT_DIGITS */
782 ensure_minimum_exponent_length(buffer, buf_size);
783
784 /* If format_char is 'Z', make sure we have at least one character
785 after the decimal point (and make sure we have a decimal point);
786 also switch to exponential notation in some edge cases where the
787 extra character would produce more significant digits that we
788 really want. */
789 if (format_char == 'Z')
790 buffer = ensure_decimal_point(buffer, buf_size, precision);
791
792 return buffer;
793 }
794
795 /* The fallback code to use if _Py_dg_dtoa is not available. */
796
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)797 char * PyOS_double_to_string(double val,
798 char format_code,
799 int precision,
800 int flags,
801 int *type)
802 {
803 char format[32];
804 Py_ssize_t bufsize;
805 char *buf;
806 int t, exp;
807 int upper = 0;
808
809 /* Validate format_code, and map upper and lower case */
810 switch (format_code) {
811 case 'e': /* exponent */
812 case 'f': /* fixed */
813 case 'g': /* general */
814 break;
815 case 'E':
816 upper = 1;
817 format_code = 'e';
818 break;
819 case 'F':
820 upper = 1;
821 format_code = 'f';
822 break;
823 case 'G':
824 upper = 1;
825 format_code = 'g';
826 break;
827 case 'r': /* repr format */
828 /* Supplied precision is unused, must be 0. */
829 if (precision != 0) {
830 PyErr_BadInternalCall();
831 return NULL;
832 }
833 /* The repr() precision (17 significant decimal digits) is the
834 minimal number that is guaranteed to have enough precision
835 so that if the number is read back in the exact same binary
836 value is recreated. This is true for IEEE floating point
837 by design, and also happens to work for all other modern
838 hardware. */
839 precision = 17;
840 format_code = 'g';
841 break;
842 default:
843 PyErr_BadInternalCall();
844 return NULL;
845 }
846
847 /* Here's a quick-and-dirty calculation to figure out how big a buffer
848 we need. In general, for a finite float we need:
849
850 1 byte for each digit of the decimal significand, and
851
852 1 for a possible sign
853 1 for a possible decimal point
854 2 for a possible [eE][+-]
855 1 for each digit of the exponent; if we allow 19 digits
856 total then we're safe up to exponents of 2**63.
857 1 for the trailing nul byte
858
859 This gives a total of 24 + the number of digits in the significand,
860 and the number of digits in the significand is:
861
862 for 'g' format: at most precision, except possibly
863 when precision == 0, when it's 1.
864 for 'e' format: precision+1
865 for 'f' format: precision digits after the point, at least 1
866 before. To figure out how many digits appear before the point
867 we have to examine the size of the number. If fabs(val) < 1.0
868 then there will be only one digit before the point. If
869 fabs(val) >= 1.0, then there are at most
870
871 1+floor(log10(ceiling(fabs(val))))
872
873 digits before the point (where the 'ceiling' allows for the
874 possibility that the rounding rounds the integer part of val
875 up). A safe upper bound for the above quantity is
876 1+floor(exp/3), where exp is the unique integer such that 0.5
877 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
878 frexp.
879
880 So we allow room for precision+1 digits for all formats, plus an
881 extra floor(exp/3) digits for 'f' format.
882
883 */
884
885 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
886 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
887 bufsize = 5;
888 else {
889 bufsize = 25 + precision;
890 if (format_code == 'f' && fabs(val) >= 1.0) {
891 frexp(val, &exp);
892 bufsize += exp/3;
893 }
894 }
895
896 buf = PyMem_Malloc(bufsize);
897 if (buf == NULL) {
898 PyErr_NoMemory();
899 return NULL;
900 }
901
902 /* Handle nan and inf. */
903 if (Py_IS_NAN(val)) {
904 strcpy(buf, "nan");
905 t = Py_DTST_NAN;
906 } else if (Py_IS_INFINITY(val)) {
907 if (copysign(1., val) == 1.)
908 strcpy(buf, "inf");
909 else
910 strcpy(buf, "-inf");
911 t = Py_DTST_INFINITE;
912 } else {
913 t = Py_DTST_FINITE;
914 if (flags & Py_DTSF_ADD_DOT_0)
915 format_code = 'Z';
916
917 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
918 (flags & Py_DTSF_ALT ? "#" : ""), precision,
919 format_code);
920 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
921 }
922
923 /* Add sign when requested. It's convenient (esp. when formatting
924 complex numbers) to include a sign even for inf and nan. */
925 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
926 size_t len = strlen(buf);
927 /* the bufsize calculations above should ensure that we've got
928 space to add a sign */
929 assert((size_t)bufsize >= len+2);
930 memmove(buf+1, buf, len+1);
931 buf[0] = '+';
932 }
933 if (upper) {
934 /* Convert to upper case. */
935 char *p1;
936 for (p1 = buf; *p1; p1++)
937 *p1 = Py_TOUPPER(*p1);
938 }
939
940 if (type)
941 *type = t;
942 return buf;
943 }
944
945 #else
946
947 /* _Py_dg_dtoa is available. */
948
949 /* I'm using a lookup table here so that I don't have to invent a non-locale
950 specific way to convert to uppercase */
951 #define OFS_INF 0
952 #define OFS_NAN 1
953 #define OFS_E 2
954
955 /* The lengths of these are known to the code below, so don't change them */
956 static const char * const lc_float_strings[] = {
957 "inf",
958 "nan",
959 "e",
960 };
961 static const char * const uc_float_strings[] = {
962 "INF",
963 "NAN",
964 "E",
965 };
966
967
968 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
969 memory contain the resulting string.
970
971 Arguments:
972 d is the double to be converted
973 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
974 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
975 mode is one of '0', '2' or '3', and is completely determined by
976 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
977 precision is the desired precision
978 always_add_sign is nonzero if a '+' sign should be included for positive
979 numbers
980 add_dot_0_if_integer is nonzero if integers in non-exponential form
981 should have ".0" added. Only applies to format codes 'r' and 'g'.
982 use_alt_formatting is nonzero if alternative formatting should be
983 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
984 at most one of use_alt_formatting and add_dot_0_if_integer should
985 be nonzero.
986 type, if non-NULL, will be set to one of these constants to identify
987 the type of the 'd' argument:
988 Py_DTST_FINITE
989 Py_DTST_INFINITE
990 Py_DTST_NAN
991
992 Returns a PyMem_Malloc'd block of memory containing the resulting string,
993 or NULL on error. If NULL is returned, the Python error has been set.
994 */
995
996 static char *
format_float_short(double d,char format_code,int mode,int precision,int always_add_sign,int add_dot_0_if_integer,int use_alt_formatting,const char * const * float_strings,int * type)997 format_float_short(double d, char format_code,
998 int mode, int precision,
999 int always_add_sign, int add_dot_0_if_integer,
1000 int use_alt_formatting, const char * const *float_strings,
1001 int *type)
1002 {
1003 char *buf = NULL;
1004 char *p = NULL;
1005 Py_ssize_t bufsize = 0;
1006 char *digits, *digits_end;
1007 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1008 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1009 _Py_SET_53BIT_PRECISION_HEADER;
1010
1011 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1012 Must be matched by a call to _Py_dg_freedtoa. */
1013 _Py_SET_53BIT_PRECISION_START;
1014 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1015 &digits_end);
1016 _Py_SET_53BIT_PRECISION_END;
1017
1018 decpt = (Py_ssize_t)decpt_as_int;
1019 if (digits == NULL) {
1020 /* The only failure mode is no memory. */
1021 PyErr_NoMemory();
1022 goto exit;
1023 }
1024 assert(digits_end != NULL && digits_end >= digits);
1025 digits_len = digits_end - digits;
1026
1027 if (digits_len && !Py_ISDIGIT(digits[0])) {
1028 /* Infinities and nans here; adapt Gay's output,
1029 so convert Infinity to inf and NaN to nan, and
1030 ignore sign of nan. Then return. */
1031
1032 /* ignore the actual sign of a nan */
1033 if (digits[0] == 'n' || digits[0] == 'N')
1034 sign = 0;
1035
1036 /* We only need 5 bytes to hold the result "+inf\0" . */
1037 bufsize = 5; /* Used later in an assert. */
1038 buf = (char *)PyMem_Malloc(bufsize);
1039 if (buf == NULL) {
1040 PyErr_NoMemory();
1041 goto exit;
1042 }
1043 p = buf;
1044
1045 if (sign == 1) {
1046 *p++ = '-';
1047 }
1048 else if (always_add_sign) {
1049 *p++ = '+';
1050 }
1051 if (digits[0] == 'i' || digits[0] == 'I') {
1052 strncpy(p, float_strings[OFS_INF], 3);
1053 p += 3;
1054
1055 if (type)
1056 *type = Py_DTST_INFINITE;
1057 }
1058 else if (digits[0] == 'n' || digits[0] == 'N') {
1059 strncpy(p, float_strings[OFS_NAN], 3);
1060 p += 3;
1061
1062 if (type)
1063 *type = Py_DTST_NAN;
1064 }
1065 else {
1066 /* shouldn't get here: Gay's code should always return
1067 something starting with a digit, an 'I', or 'N' */
1068 Py_UNREACHABLE();
1069 }
1070 goto exit;
1071 }
1072
1073 /* The result must be finite (not inf or nan). */
1074 if (type)
1075 *type = Py_DTST_FINITE;
1076
1077
1078 /* We got digits back, format them. We may need to pad 'digits'
1079 either on the left or right (or both) with extra zeros, so in
1080 general the resulting string has the form
1081
1082 [<sign>]<zeros><digits><zeros>[<exponent>]
1083
1084 where either of the <zeros> pieces could be empty, and there's a
1085 decimal point that could appear either in <digits> or in the
1086 leading or trailing <zeros>.
1087
1088 Imagine an infinite 'virtual' string vdigits, consisting of the
1089 string 'digits' (starting at index 0) padded on both the left and
1090 right with infinite strings of zeros. We want to output a slice
1091
1092 vdigits[vdigits_start : vdigits_end]
1093
1094 of this virtual string. Thus if vdigits_start < 0 then we'll end
1095 up producing some leading zeros; if vdigits_end > digits_len there
1096 will be trailing zeros in the output. The next section of code
1097 determines whether to use an exponent or not, figures out the
1098 position 'decpt' of the decimal point, and computes 'vdigits_start'
1099 and 'vdigits_end'. */
1100 vdigits_end = digits_len;
1101 switch (format_code) {
1102 case 'e':
1103 use_exp = 1;
1104 vdigits_end = precision;
1105 break;
1106 case 'f':
1107 vdigits_end = decpt + precision;
1108 break;
1109 case 'g':
1110 if (decpt <= -4 || decpt >
1111 (add_dot_0_if_integer ? precision-1 : precision))
1112 use_exp = 1;
1113 if (use_alt_formatting)
1114 vdigits_end = precision;
1115 break;
1116 case 'r':
1117 /* convert to exponential format at 1e16. We used to convert
1118 at 1e17, but that gives odd-looking results for some values
1119 when a 16-digit 'shortest' repr is padded with bogus zeros.
1120 For example, repr(2e16+8) would give 20000000000000010.0;
1121 the true value is 20000000000000008.0. */
1122 if (decpt <= -4 || decpt > 16)
1123 use_exp = 1;
1124 break;
1125 default:
1126 PyErr_BadInternalCall();
1127 goto exit;
1128 }
1129
1130 /* if using an exponent, reset decimal point position to 1 and adjust
1131 exponent accordingly.*/
1132 if (use_exp) {
1133 exp = (int)decpt - 1;
1134 decpt = 1;
1135 }
1136 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1137 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1138 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1139 if (!use_exp && add_dot_0_if_integer)
1140 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1141 else
1142 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1143
1144 /* double check inequalities */
1145 assert(vdigits_start <= 0 &&
1146 0 <= digits_len &&
1147 digits_len <= vdigits_end);
1148 /* decimal point should be in (vdigits_start, vdigits_end] */
1149 assert(vdigits_start < decpt && decpt <= vdigits_end);
1150
1151 /* Compute an upper bound how much memory we need. This might be a few
1152 chars too long, but no big deal. */
1153 bufsize =
1154 /* sign, decimal point and trailing 0 byte */
1155 3 +
1156
1157 /* total digit count (including zero padding on both sides) */
1158 (vdigits_end - vdigits_start) +
1159
1160 /* exponent "e+100", max 3 numerical digits */
1161 (use_exp ? 5 : 0);
1162
1163 /* Now allocate the memory and initialize p to point to the start of
1164 it. */
1165 buf = (char *)PyMem_Malloc(bufsize);
1166 if (buf == NULL) {
1167 PyErr_NoMemory();
1168 goto exit;
1169 }
1170 p = buf;
1171
1172 /* Add a negative sign if negative, and a plus sign if non-negative
1173 and always_add_sign is true. */
1174 if (sign == 1)
1175 *p++ = '-';
1176 else if (always_add_sign)
1177 *p++ = '+';
1178
1179 /* note that exactly one of the three 'if' conditions is true,
1180 so we include exactly one decimal point */
1181 /* Zero padding on left of digit string */
1182 if (decpt <= 0) {
1183 memset(p, '0', decpt-vdigits_start);
1184 p += decpt - vdigits_start;
1185 *p++ = '.';
1186 memset(p, '0', 0-decpt);
1187 p += 0-decpt;
1188 }
1189 else {
1190 memset(p, '0', 0-vdigits_start);
1191 p += 0 - vdigits_start;
1192 }
1193
1194 /* Digits, with included decimal point */
1195 if (0 < decpt && decpt <= digits_len) {
1196 strncpy(p, digits, decpt-0);
1197 p += decpt-0;
1198 *p++ = '.';
1199 strncpy(p, digits+decpt, digits_len-decpt);
1200 p += digits_len-decpt;
1201 }
1202 else {
1203 strncpy(p, digits, digits_len);
1204 p += digits_len;
1205 }
1206
1207 /* And zeros on the right */
1208 if (digits_len < decpt) {
1209 memset(p, '0', decpt-digits_len);
1210 p += decpt-digits_len;
1211 *p++ = '.';
1212 memset(p, '0', vdigits_end-decpt);
1213 p += vdigits_end-decpt;
1214 }
1215 else {
1216 memset(p, '0', vdigits_end-digits_len);
1217 p += vdigits_end-digits_len;
1218 }
1219
1220 /* Delete a trailing decimal pt unless using alternative formatting. */
1221 if (p[-1] == '.' && !use_alt_formatting)
1222 p--;
1223
1224 /* Now that we've done zero padding, add an exponent if needed. */
1225 if (use_exp) {
1226 *p++ = float_strings[OFS_E][0];
1227 exp_len = sprintf(p, "%+.02d", exp);
1228 p += exp_len;
1229 }
1230 exit:
1231 if (buf) {
1232 *p = '\0';
1233 /* It's too late if this fails, as we've already stepped on
1234 memory that isn't ours. But it's an okay debugging test. */
1235 assert(p-buf < bufsize);
1236 }
1237 if (digits)
1238 _Py_dg_freedtoa(digits);
1239
1240 return buf;
1241 }
1242
1243
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)1244 char * PyOS_double_to_string(double val,
1245 char format_code,
1246 int precision,
1247 int flags,
1248 int *type)
1249 {
1250 const char * const *float_strings = lc_float_strings;
1251 int mode;
1252
1253 /* Validate format_code, and map upper and lower case. Compute the
1254 mode and make any adjustments as needed. */
1255 switch (format_code) {
1256 /* exponent */
1257 case 'E':
1258 float_strings = uc_float_strings;
1259 format_code = 'e';
1260 /* Fall through. */
1261 case 'e':
1262 mode = 2;
1263 precision++;
1264 break;
1265
1266 /* fixed */
1267 case 'F':
1268 float_strings = uc_float_strings;
1269 format_code = 'f';
1270 /* Fall through. */
1271 case 'f':
1272 mode = 3;
1273 break;
1274
1275 /* general */
1276 case 'G':
1277 float_strings = uc_float_strings;
1278 format_code = 'g';
1279 /* Fall through. */
1280 case 'g':
1281 mode = 2;
1282 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1283 if (precision == 0)
1284 precision = 1;
1285 break;
1286
1287 /* repr format */
1288 case 'r':
1289 mode = 0;
1290 /* Supplied precision is unused, must be 0. */
1291 if (precision != 0) {
1292 PyErr_BadInternalCall();
1293 return NULL;
1294 }
1295 break;
1296
1297 default:
1298 PyErr_BadInternalCall();
1299 return NULL;
1300 }
1301
1302 return format_float_short(val, format_code, mode, precision,
1303 flags & Py_DTSF_SIGN,
1304 flags & Py_DTSF_ADD_DOT_0,
1305 flags & Py_DTSF_ALT,
1306 float_strings, type);
1307 }
1308 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */
1309