1 /* implements the unicode (as opposed to string) version of the
2    built-in formatters for string, int, float.  that is, the versions
3    of int.__float__, etc., that take and return unicode objects */
4 
5 #include "Python.h"
6 #include "pycore_fileutils.h"
7 #include <locale.h>
8 
9 /* Raises an exception about an unknown presentation type for this
10  * type. */
11 
12 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)13 unknown_presentation_type(Py_UCS4 presentation_type,
14                           const char* type_name)
15 {
16     /* %c might be out-of-range, hence the two cases. */
17     if (presentation_type > 32 && presentation_type < 128)
18         PyErr_Format(PyExc_ValueError,
19                      "Unknown format code '%c' "
20                      "for object of type '%.200s'",
21                      (char)presentation_type,
22                      type_name);
23     else
24         PyErr_Format(PyExc_ValueError,
25                      "Unknown format code '\\x%x' "
26                      "for object of type '%.200s'",
27                      (unsigned int)presentation_type,
28                      type_name);
29 }
30 
31 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)32 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
33 {
34     assert(specifier == ',' || specifier == '_');
35     if (presentation_type > 32 && presentation_type < 128)
36         PyErr_Format(PyExc_ValueError,
37                      "Cannot specify '%c' with '%c'.",
38                      specifier, (char)presentation_type);
39     else
40         PyErr_Format(PyExc_ValueError,
41                      "Cannot specify '%c' with '\\x%x'.",
42                      specifier, (unsigned int)presentation_type);
43 }
44 
45 static void
invalid_comma_and_underscore(void)46 invalid_comma_and_underscore(void)
47 {
48     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49 }
50 
51 /*
52     get_integer consumes 0 or more decimal digit characters from an
53     input string, updates *result with the corresponding positive
54     integer, and returns the number of digits consumed.
55 
56     returns -1 on error.
57 */
58 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)59 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
60                   Py_ssize_t *result)
61 {
62     Py_ssize_t accumulator, digitval, pos = *ppos;
63     int numdigits;
64     int kind = PyUnicode_KIND(str);
65     void *data = PyUnicode_DATA(str);
66 
67     accumulator = numdigits = 0;
68     for (; pos < end; pos++, numdigits++) {
69         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
70         if (digitval < 0)
71             break;
72         /*
73            Detect possible overflow before it happens:
74 
75               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
77         */
78         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
79             PyErr_Format(PyExc_ValueError,
80                          "Too many decimal digits in format string");
81             *ppos = pos;
82             return -1;
83         }
84         accumulator = accumulator * 10 + digitval;
85     }
86     *ppos = pos;
87     *result = accumulator;
88     return numdigits;
89 }
90 
91 /************************************************************************/
92 /*********** standard format specifier parsing **************************/
93 /************************************************************************/
94 
95 /* returns true if this character is a specifier alignment token */
96 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)97 is_alignment_token(Py_UCS4 c)
98 {
99     switch (c) {
100     case '<': case '>': case '=': case '^':
101         return 1;
102     default:
103         return 0;
104     }
105 }
106 
107 /* returns true if this character is a sign element */
108 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)109 is_sign_element(Py_UCS4 c)
110 {
111     switch (c) {
112     case ' ': case '+': case '-':
113         return 1;
114     default:
115         return 0;
116     }
117 }
118 
119 /* Locale type codes. LT_NO_LOCALE must be zero. */
120 enum LocaleType {
121     LT_NO_LOCALE = 0,
122     LT_DEFAULT_LOCALE = ',',
123     LT_UNDERSCORE_LOCALE = '_',
124     LT_UNDER_FOUR_LOCALE,
125     LT_CURRENT_LOCALE
126 };
127 
128 typedef struct {
129     Py_UCS4 fill_char;
130     Py_UCS4 align;
131     int alternate;
132     Py_UCS4 sign;
133     Py_ssize_t width;
134     enum LocaleType thousands_separators;
135     Py_ssize_t precision;
136     Py_UCS4 type;
137 } InternalFormatSpec;
138 
139 #if 0
140 /* Occasionally useful for debugging. Should normally be commented out. */
141 static void
142 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143 {
144     printf("internal format spec: fill_char %d\n", format->fill_char);
145     printf("internal format spec: align %d\n", format->align);
146     printf("internal format spec: alternate %d\n", format->alternate);
147     printf("internal format spec: sign %d\n", format->sign);
148     printf("internal format spec: width %zd\n", format->width);
149     printf("internal format spec: thousands_separators %d\n",
150            format->thousands_separators);
151     printf("internal format spec: precision %zd\n", format->precision);
152     printf("internal format spec: type %c\n", format->type);
153     printf("\n");
154 }
155 #endif
156 
157 
158 /*
159   ptr points to the start of the format_spec, end points just past its end.
160   fills in format with the parsed information.
161   returns 1 on success, 0 on failure.
162   if failure, sets the exception
163 */
164 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)165 parse_internal_render_format_spec(PyObject *format_spec,
166                                   Py_ssize_t start, Py_ssize_t end,
167                                   InternalFormatSpec *format,
168                                   char default_type,
169                                   char default_align)
170 {
171     Py_ssize_t pos = start;
172     int kind = PyUnicode_KIND(format_spec);
173     void *data = PyUnicode_DATA(format_spec);
174     /* end-pos is used throughout this code to specify the length of
175        the input string */
176 #define READ_spec(index) PyUnicode_READ(kind, data, index)
177 
178     Py_ssize_t consumed;
179     int align_specified = 0;
180     int fill_char_specified = 0;
181 
182     format->fill_char = ' ';
183     format->align = default_align;
184     format->alternate = 0;
185     format->sign = '\0';
186     format->width = -1;
187     format->thousands_separators = LT_NO_LOCALE;
188     format->precision = -1;
189     format->type = default_type;
190 
191     /* If the second char is an alignment token,
192        then parse the fill char */
193     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194         format->align = READ_spec(pos+1);
195         format->fill_char = READ_spec(pos);
196         fill_char_specified = 1;
197         align_specified = 1;
198         pos += 2;
199     }
200     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201         format->align = READ_spec(pos);
202         align_specified = 1;
203         ++pos;
204     }
205 
206     /* Parse the various sign options */
207     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208         format->sign = READ_spec(pos);
209         ++pos;
210     }
211 
212     /* If the next character is #, we're in alternate mode.  This only
213        applies to integers. */
214     if (end-pos >= 1 && READ_spec(pos) == '#') {
215         format->alternate = 1;
216         ++pos;
217     }
218 
219     /* The special case for 0-padding (backwards compat) */
220     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
221         format->fill_char = '0';
222         if (!align_specified) {
223             format->align = '=';
224         }
225         ++pos;
226     }
227 
228     consumed = get_integer(format_spec, &pos, end, &format->width);
229     if (consumed == -1)
230         /* Overflow error. Exception already set. */
231         return 0;
232 
233     /* If consumed is 0, we didn't consume any characters for the
234        width. In that case, reset the width to -1, because
235        get_integer() will have set it to zero. -1 is how we record
236        that the width wasn't specified. */
237     if (consumed == 0)
238         format->width = -1;
239 
240     /* Comma signifies add thousands separators */
241     if (end-pos && READ_spec(pos) == ',') {
242         format->thousands_separators = LT_DEFAULT_LOCALE;
243         ++pos;
244     }
245     /* Underscore signifies add thousands separators */
246     if (end-pos && READ_spec(pos) == '_') {
247         if (format->thousands_separators != LT_NO_LOCALE) {
248             invalid_comma_and_underscore();
249             return 0;
250         }
251         format->thousands_separators = LT_UNDERSCORE_LOCALE;
252         ++pos;
253     }
254     if (end-pos && READ_spec(pos) == ',') {
255         invalid_comma_and_underscore();
256         return 0;
257     }
258 
259     /* Parse field precision */
260     if (end-pos && READ_spec(pos) == '.') {
261         ++pos;
262 
263         consumed = get_integer(format_spec, &pos, end, &format->precision);
264         if (consumed == -1)
265             /* Overflow error. Exception already set. */
266             return 0;
267 
268         /* Not having a precision after a dot is an error. */
269         if (consumed == 0) {
270             PyErr_Format(PyExc_ValueError,
271                          "Format specifier missing precision");
272             return 0;
273         }
274 
275     }
276 
277     /* Finally, parse the type field. */
278 
279     if (end-pos > 1) {
280         /* More than one char remain, invalid format specifier. */
281         PyErr_Format(PyExc_ValueError, "Invalid format specifier");
282         return 0;
283     }
284 
285     if (end-pos == 1) {
286         format->type = READ_spec(pos);
287         ++pos;
288     }
289 
290     /* Do as much validating as we can, just by looking at the format
291        specifier.  Do not take into account what type of formatting
292        we're doing (int, float, string). */
293 
294     if (format->thousands_separators) {
295         switch (format->type) {
296         case 'd':
297         case 'e':
298         case 'f':
299         case 'g':
300         case 'E':
301         case 'G':
302         case '%':
303         case 'F':
304         case '\0':
305             /* These are allowed. See PEP 378.*/
306             break;
307         case 'b':
308         case 'o':
309         case 'x':
310         case 'X':
311             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
312             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
313                 /* Every four digits, not every three, in bin/oct/hex. */
314                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
315                 break;
316             }
317             /* fall through */
318         default:
319             invalid_thousands_separator_type(format->thousands_separators, format->type);
320             return 0;
321         }
322     }
323 
324     assert (format->align <= 127);
325     assert (format->sign <= 127);
326     return 1;
327 }
328 
329 /* Calculate the padding needed. */
330 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)331 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
332              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
333              Py_ssize_t *n_total)
334 {
335     if (width >= 0) {
336         if (nchars > width)
337             *n_total = nchars;
338         else
339             *n_total = width;
340     }
341     else {
342         /* not specified, use all of the chars and no more */
343         *n_total = nchars;
344     }
345 
346     /* Figure out how much leading space we need, based on the
347        aligning */
348     if (align == '>')
349         *n_lpadding = *n_total - nchars;
350     else if (align == '^')
351         *n_lpadding = (*n_total - nchars) / 2;
352     else if (align == '<' || align == '=')
353         *n_lpadding = 0;
354     else {
355         /* We should never have an unspecified alignment. */
356         Py_UNREACHABLE();
357     }
358 
359     *n_rpadding = *n_total - nchars - *n_lpadding;
360 }
361 
362 /* Do the padding, and return a pointer to where the caller-supplied
363    content goes. */
364 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)365 fill_padding(_PyUnicodeWriter *writer,
366              Py_ssize_t nchars,
367              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
368              Py_ssize_t n_rpadding)
369 {
370     Py_ssize_t pos;
371 
372     /* Pad on left. */
373     if (n_lpadding) {
374         pos = writer->pos;
375         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
376     }
377 
378     /* Pad on right. */
379     if (n_rpadding) {
380         pos = writer->pos + nchars + n_lpadding;
381         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
382     }
383 
384     /* Pointer to the user content. */
385     writer->pos += n_lpadding;
386     return 0;
387 }
388 
389 /************************************************************************/
390 /*********** common routines for numeric formatting *********************/
391 /************************************************************************/
392 
393 /* Locale info needed for formatting integers and the part of floats
394    before and including the decimal. Note that locales only support
395    8-bit chars, not unicode. */
396 typedef struct {
397     PyObject *decimal_point;
398     PyObject *thousands_sep;
399     const char *grouping;
400     char *grouping_buffer;
401 } LocaleInfo;
402 
403 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
404 
405 /* describes the layout for an integer, see the comment in
406    calc_number_widths() for details */
407 typedef struct {
408     Py_ssize_t n_lpadding;
409     Py_ssize_t n_prefix;
410     Py_ssize_t n_spadding;
411     Py_ssize_t n_rpadding;
412     char sign;
413     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
414     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
415                                     any grouping chars. */
416     Py_ssize_t n_decimal;   /* 0 if only an integer */
417     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
418                                excluding the decimal itself, if
419                                present. */
420 
421     /* These 2 are not the widths of fields, but are needed by
422        STRINGLIB_GROUPING. */
423     Py_ssize_t n_digits;    /* The number of digits before a decimal
424                                or exponent. */
425     Py_ssize_t n_min_width; /* The min_width we used when we computed
426                                the n_grouped_digits width. */
427 } NumberFieldWidths;
428 
429 
430 /* Given a number of the form:
431    digits[remainder]
432    where ptr points to the start and end points to the end, find where
433     the integer part ends. This could be a decimal, an exponent, both,
434     or neither.
435    If a decimal point is present, set *has_decimal and increment
436     remainder beyond it.
437    Results are undefined (but shouldn't crash) for improperly
438     formatted strings.
439 */
440 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)441 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
442              Py_ssize_t *n_remainder, int *has_decimal)
443 {
444     Py_ssize_t remainder;
445     int kind = PyUnicode_KIND(s);
446     void *data = PyUnicode_DATA(s);
447 
448     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
449         ++pos;
450     remainder = pos;
451 
452     /* Does remainder start with a decimal point? */
453     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
454 
455     /* Skip the decimal point. */
456     if (*has_decimal)
457         remainder++;
458 
459     *n_remainder = end - remainder;
460 }
461 
462 /* not all fields of format are used.  for example, precision is
463    unused.  should this take discrete params in order to be more clear
464    about what it does?  or is passing a single format parameter easier
465    and more efficient enough to justify a little obfuscation?
466    Return -1 on error. */
467 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,PyObject * number,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)468 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
469                    Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
470                    Py_ssize_t n_end, Py_ssize_t n_remainder,
471                    int has_decimal, const LocaleInfo *locale,
472                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
473 {
474     Py_ssize_t n_non_digit_non_padding;
475     Py_ssize_t n_padding;
476 
477     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
478     spec->n_lpadding = 0;
479     spec->n_prefix = n_prefix;
480     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
481     spec->n_remainder = n_remainder;
482     spec->n_spadding = 0;
483     spec->n_rpadding = 0;
484     spec->sign = '\0';
485     spec->n_sign = 0;
486 
487     /* the output will look like:
488        |                                                                                         |
489        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
490        |                                                                                         |
491 
492        sign is computed from format->sign and the actual
493        sign of the number
494 
495        prefix is given (it's for the '0x' prefix)
496 
497        digits is already known
498 
499        the total width is either given, or computed from the
500        actual digits
501 
502        only one of lpadding, spadding, and rpadding can be non-zero,
503        and it's calculated from the width and other fields
504     */
505 
506     /* compute the various parts we're going to write */
507     switch (format->sign) {
508     case '+':
509         /* always put a + or - */
510         spec->n_sign = 1;
511         spec->sign = (sign_char == '-' ? '-' : '+');
512         break;
513     case ' ':
514         spec->n_sign = 1;
515         spec->sign = (sign_char == '-' ? '-' : ' ');
516         break;
517     default:
518         /* Not specified, or the default (-) */
519         if (sign_char == '-') {
520             spec->n_sign = 1;
521             spec->sign = '-';
522         }
523     }
524 
525     /* The number of chars used for non-digits and non-padding. */
526     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
527         spec->n_remainder;
528 
529     /* min_width can go negative, that's okay. format->width == -1 means
530        we don't care. */
531     if (format->fill_char == '0' && format->align == '=')
532         spec->n_min_width = format->width - n_non_digit_non_padding;
533     else
534         spec->n_min_width = 0;
535 
536     if (spec->n_digits == 0)
537         /* This case only occurs when using 'c' formatting, we need
538            to special case it because the grouping code always wants
539            to have at least one character. */
540         spec->n_grouped_digits = 0;
541     else {
542         Py_UCS4 grouping_maxchar;
543         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
544             NULL, 0,
545             NULL, 0, spec->n_digits,
546             spec->n_min_width,
547             locale->grouping, locale->thousands_sep, &grouping_maxchar);
548         if (spec->n_grouped_digits == -1) {
549             return -1;
550         }
551         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
552     }
553 
554     /* Given the desired width and the total of digit and non-digit
555        space we consume, see if we need any padding. format->width can
556        be negative (meaning no padding), but this code still works in
557        that case. */
558     n_padding = format->width -
559                         (n_non_digit_non_padding + spec->n_grouped_digits);
560     if (n_padding > 0) {
561         /* Some padding is needed. Determine if it's left, space, or right. */
562         switch (format->align) {
563         case '<':
564             spec->n_rpadding = n_padding;
565             break;
566         case '^':
567             spec->n_lpadding = n_padding / 2;
568             spec->n_rpadding = n_padding - spec->n_lpadding;
569             break;
570         case '=':
571             spec->n_spadding = n_padding;
572             break;
573         case '>':
574             spec->n_lpadding = n_padding;
575             break;
576         default:
577             /* Shouldn't get here, but treat it as '>' */
578             Py_UNREACHABLE();
579         }
580     }
581 
582     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
583         *maxchar = Py_MAX(*maxchar, format->fill_char);
584 
585     if (spec->n_decimal)
586         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
587 
588     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
589         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
590         spec->n_remainder + spec->n_rpadding;
591 }
592 
593 /* Fill in the digit parts of a number's string representation,
594    as determined in calc_number_widths().
595    Return -1 on error, or 0 on success. */
596 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,Py_ssize_t d_end,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)597 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
598             PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
599             PyObject *prefix, Py_ssize_t p_start,
600             Py_UCS4 fill_char,
601             LocaleInfo *locale, int toupper)
602 {
603     /* Used to keep track of digits, decimal, and remainder. */
604     Py_ssize_t d_pos = d_start;
605     const unsigned int kind = writer->kind;
606     const void *data = writer->data;
607     Py_ssize_t r;
608 
609     if (spec->n_lpadding) {
610         _PyUnicode_FastFill(writer->buffer,
611                             writer->pos, spec->n_lpadding, fill_char);
612         writer->pos += spec->n_lpadding;
613     }
614     if (spec->n_sign == 1) {
615         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
616         writer->pos++;
617     }
618     if (spec->n_prefix) {
619         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
620                                       prefix, p_start,
621                                       spec->n_prefix);
622         if (toupper) {
623             Py_ssize_t t;
624             for (t = 0; t < spec->n_prefix; t++) {
625                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
626                 c = Py_TOUPPER(c);
627                 assert (c <= 127);
628                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
629             }
630         }
631         writer->pos += spec->n_prefix;
632     }
633     if (spec->n_spadding) {
634         _PyUnicode_FastFill(writer->buffer,
635                             writer->pos, spec->n_spadding, fill_char);
636         writer->pos += spec->n_spadding;
637     }
638 
639     /* Only for type 'c' special case, it has no digits. */
640     if (spec->n_digits != 0) {
641         /* Fill the digits with InsertThousandsGrouping. */
642         r = _PyUnicode_InsertThousandsGrouping(
643                 writer, spec->n_grouped_digits,
644                 digits, d_pos, spec->n_digits,
645                 spec->n_min_width,
646                 locale->grouping, locale->thousands_sep, NULL);
647         if (r == -1)
648             return -1;
649         assert(r == spec->n_grouped_digits);
650         d_pos += spec->n_digits;
651     }
652     if (toupper) {
653         Py_ssize_t t;
654         for (t = 0; t < spec->n_grouped_digits; t++) {
655             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
656             c = Py_TOUPPER(c);
657             if (c > 127) {
658                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
659                 return -1;
660             }
661             PyUnicode_WRITE(kind, data, writer->pos + t, c);
662         }
663     }
664     writer->pos += spec->n_grouped_digits;
665 
666     if (spec->n_decimal) {
667         _PyUnicode_FastCopyCharacters(
668             writer->buffer, writer->pos,
669             locale->decimal_point, 0, spec->n_decimal);
670         writer->pos += spec->n_decimal;
671         d_pos += 1;
672     }
673 
674     if (spec->n_remainder) {
675         _PyUnicode_FastCopyCharacters(
676             writer->buffer, writer->pos,
677             digits, d_pos, spec->n_remainder);
678         writer->pos += spec->n_remainder;
679         /* d_pos += spec->n_remainder; */
680     }
681 
682     if (spec->n_rpadding) {
683         _PyUnicode_FastFill(writer->buffer,
684                             writer->pos, spec->n_rpadding,
685                             fill_char);
686         writer->pos += spec->n_rpadding;
687     }
688     return 0;
689 }
690 
691 static const char no_grouping[1] = {CHAR_MAX};
692 
693 /* Find the decimal point character(s?), thousands_separator(s?), and
694    grouping description, either for the current locale if type is
695    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
696    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
697 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)698 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
699 {
700     switch (type) {
701     case LT_CURRENT_LOCALE: {
702         struct lconv *lc = localeconv();
703         if (_Py_GetLocaleconvNumeric(lc,
704                                      &locale_info->decimal_point,
705                                      &locale_info->thousands_sep) < 0) {
706             return -1;
707         }
708 
709         /* localeconv() grouping can become a dangling pointer or point
710            to a different string if another thread calls localeconv() during
711            the string formatting. Copy the string to avoid this risk. */
712         locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
713         if (locale_info->grouping_buffer == NULL) {
714             PyErr_NoMemory();
715             return -1;
716         }
717         locale_info->grouping = locale_info->grouping_buffer;
718         break;
719     }
720     case LT_DEFAULT_LOCALE:
721     case LT_UNDERSCORE_LOCALE:
722     case LT_UNDER_FOUR_LOCALE:
723         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
724         locale_info->thousands_sep = PyUnicode_FromOrdinal(
725             type == LT_DEFAULT_LOCALE ? ',' : '_');
726         if (!locale_info->decimal_point || !locale_info->thousands_sep)
727             return -1;
728         if (type != LT_UNDER_FOUR_LOCALE)
729             locale_info->grouping = "\3"; /* Group every 3 characters.  The
730                                          (implicit) trailing 0 means repeat
731                                          infinitely. */
732         else
733             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
734         break;
735     case LT_NO_LOCALE:
736         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
737         locale_info->thousands_sep = PyUnicode_New(0, 0);
738         if (!locale_info->decimal_point || !locale_info->thousands_sep)
739             return -1;
740         locale_info->grouping = no_grouping;
741         break;
742     }
743     return 0;
744 }
745 
746 static void
free_locale_info(LocaleInfo * locale_info)747 free_locale_info(LocaleInfo *locale_info)
748 {
749     Py_XDECREF(locale_info->decimal_point);
750     Py_XDECREF(locale_info->thousands_sep);
751     PyMem_Free(locale_info->grouping_buffer);
752 }
753 
754 /************************************************************************/
755 /*********** string formatting ******************************************/
756 /************************************************************************/
757 
758 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)759 format_string_internal(PyObject *value, const InternalFormatSpec *format,
760                        _PyUnicodeWriter *writer)
761 {
762     Py_ssize_t lpad;
763     Py_ssize_t rpad;
764     Py_ssize_t total;
765     Py_ssize_t len;
766     int result = -1;
767     Py_UCS4 maxchar;
768 
769     assert(PyUnicode_IS_READY(value));
770     len = PyUnicode_GET_LENGTH(value);
771 
772     /* sign is not allowed on strings */
773     if (format->sign != '\0') {
774         PyErr_SetString(PyExc_ValueError,
775                         "Sign not allowed in string format specifier");
776         goto done;
777     }
778 
779     /* alternate is not allowed on strings */
780     if (format->alternate) {
781         PyErr_SetString(PyExc_ValueError,
782                         "Alternate form (#) not allowed in string format "
783                         "specifier");
784         goto done;
785     }
786 
787     /* '=' alignment not allowed on strings */
788     if (format->align == '=') {
789         PyErr_SetString(PyExc_ValueError,
790                         "'=' alignment not allowed "
791                         "in string format specifier");
792         goto done;
793     }
794 
795     if ((format->width == -1 || format->width <= len)
796         && (format->precision == -1 || format->precision >= len)) {
797         /* Fast path */
798         return _PyUnicodeWriter_WriteStr(writer, value);
799     }
800 
801     /* if precision is specified, output no more that format.precision
802        characters */
803     if (format->precision >= 0 && len >= format->precision) {
804         len = format->precision;
805     }
806 
807     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
808 
809     maxchar = writer->maxchar;
810     if (lpad != 0 || rpad != 0)
811         maxchar = Py_MAX(maxchar, format->fill_char);
812     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
813         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
814         maxchar = Py_MAX(maxchar, valmaxchar);
815     }
816 
817     /* allocate the resulting string */
818     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
819         goto done;
820 
821     /* Write into that space. First the padding. */
822     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
823     if (result == -1)
824         goto done;
825 
826     /* Then the source string. */
827     if (len) {
828         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
829                                       value, 0, len);
830     }
831     writer->pos += (len + rpad);
832     result = 0;
833 
834 done:
835     return result;
836 }
837 
838 
839 /************************************************************************/
840 /*********** long formatting ********************************************/
841 /************************************************************************/
842 
843 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)844 format_long_internal(PyObject *value, const InternalFormatSpec *format,
845                      _PyUnicodeWriter *writer)
846 {
847     int result = -1;
848     Py_UCS4 maxchar = 127;
849     PyObject *tmp = NULL;
850     Py_ssize_t inumeric_chars;
851     Py_UCS4 sign_char = '\0';
852     Py_ssize_t n_digits;       /* count of digits need from the computed
853                                   string */
854     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
855                                    produces non-digits */
856     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
857     Py_ssize_t n_total;
858     Py_ssize_t prefix = 0;
859     NumberFieldWidths spec;
860     long x;
861 
862     /* Locale settings, either from the actual locale or
863        from a hard-code pseudo-locale */
864     LocaleInfo locale = LocaleInfo_STATIC_INIT;
865 
866     /* no precision allowed on integers */
867     if (format->precision != -1) {
868         PyErr_SetString(PyExc_ValueError,
869                         "Precision not allowed in integer format specifier");
870         goto done;
871     }
872 
873     /* special case for character formatting */
874     if (format->type == 'c') {
875         /* error to specify a sign */
876         if (format->sign != '\0') {
877             PyErr_SetString(PyExc_ValueError,
878                             "Sign not allowed with integer"
879                             " format specifier 'c'");
880             goto done;
881         }
882         /* error to request alternate format */
883         if (format->alternate) {
884             PyErr_SetString(PyExc_ValueError,
885                             "Alternate form (#) not allowed with integer"
886                             " format specifier 'c'");
887             goto done;
888         }
889 
890         /* taken from unicodeobject.c formatchar() */
891         /* Integer input truncated to a character */
892         x = PyLong_AsLong(value);
893         if (x == -1 && PyErr_Occurred())
894             goto done;
895         if (x < 0 || x > 0x10ffff) {
896             PyErr_SetString(PyExc_OverflowError,
897                             "%c arg not in range(0x110000)");
898             goto done;
899         }
900         tmp = PyUnicode_FromOrdinal(x);
901         inumeric_chars = 0;
902         n_digits = 1;
903         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
904 
905         /* As a sort-of hack, we tell calc_number_widths that we only
906            have "remainder" characters. calc_number_widths thinks
907            these are characters that don't get formatted, only copied
908            into the output string. We do this for 'c' formatting,
909            because the characters are likely to be non-digits. */
910         n_remainder = 1;
911     }
912     else {
913         int base;
914         int leading_chars_to_skip = 0;  /* Number of characters added by
915                                            PyNumber_ToBase that we want to
916                                            skip over. */
917 
918         /* Compute the base and how many characters will be added by
919            PyNumber_ToBase */
920         switch (format->type) {
921         case 'b':
922             base = 2;
923             leading_chars_to_skip = 2; /* 0b */
924             break;
925         case 'o':
926             base = 8;
927             leading_chars_to_skip = 2; /* 0o */
928             break;
929         case 'x':
930         case 'X':
931             base = 16;
932             leading_chars_to_skip = 2; /* 0x */
933             break;
934         default:  /* shouldn't be needed, but stops a compiler warning */
935         case 'd':
936         case 'n':
937             base = 10;
938             break;
939         }
940 
941         if (format->sign != '+' && format->sign != ' '
942             && format->width == -1
943             && format->type != 'X' && format->type != 'n'
944             && !format->thousands_separators
945             && PyLong_CheckExact(value))
946         {
947             /* Fast path */
948             return _PyLong_FormatWriter(writer, value, base, format->alternate);
949         }
950 
951         /* The number of prefix chars is the same as the leading
952            chars to skip */
953         if (format->alternate)
954             n_prefix = leading_chars_to_skip;
955 
956         /* Do the hard part, converting to a string in a given base */
957         tmp = _PyLong_Format(value, base);
958         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
959             goto done;
960 
961         inumeric_chars = 0;
962         n_digits = PyUnicode_GET_LENGTH(tmp);
963 
964         prefix = inumeric_chars;
965 
966         /* Is a sign character present in the output?  If so, remember it
967            and skip it */
968         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
969             sign_char = '-';
970             ++prefix;
971             ++leading_chars_to_skip;
972         }
973 
974         /* Skip over the leading chars (0x, 0b, etc.) */
975         n_digits -= leading_chars_to_skip;
976         inumeric_chars += leading_chars_to_skip;
977     }
978 
979     /* Determine the grouping, separator, and decimal point, if any. */
980     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
981                         format->thousands_separators,
982                         &locale) == -1)
983         goto done;
984 
985     /* Calculate how much memory we'll need. */
986     n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
987                                  inumeric_chars + n_digits, n_remainder, 0,
988                                  &locale, format, &maxchar);
989     if (n_total == -1) {
990         goto done;
991     }
992 
993     /* Allocate the memory. */
994     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
995         goto done;
996 
997     /* Populate the memory. */
998     result = fill_number(writer, &spec,
999                          tmp, inumeric_chars, inumeric_chars + n_digits,
1000                          tmp, prefix, format->fill_char,
1001                          &locale, format->type == 'X');
1002 
1003 done:
1004     Py_XDECREF(tmp);
1005     free_locale_info(&locale);
1006     return result;
1007 }
1008 
1009 /************************************************************************/
1010 /*********** float formatting *******************************************/
1011 /************************************************************************/
1012 
1013 /* much of this is taken from unicodeobject.c */
1014 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1015 format_float_internal(PyObject *value,
1016                       const InternalFormatSpec *format,
1017                       _PyUnicodeWriter *writer)
1018 {
1019     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1020     Py_ssize_t n_digits;
1021     Py_ssize_t n_remainder;
1022     Py_ssize_t n_total;
1023     int has_decimal;
1024     double val;
1025     int precision, default_precision = 6;
1026     Py_UCS4 type = format->type;
1027     int add_pct = 0;
1028     Py_ssize_t index;
1029     NumberFieldWidths spec;
1030     int flags = 0;
1031     int result = -1;
1032     Py_UCS4 maxchar = 127;
1033     Py_UCS4 sign_char = '\0';
1034     int float_type; /* Used to see if we have a nan, inf, or regular float. */
1035     PyObject *unicode_tmp = NULL;
1036 
1037     /* Locale settings, either from the actual locale or
1038        from a hard-code pseudo-locale */
1039     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1040 
1041     if (format->precision > INT_MAX) {
1042         PyErr_SetString(PyExc_ValueError, "precision too big");
1043         goto done;
1044     }
1045     precision = (int)format->precision;
1046 
1047     if (format->alternate)
1048         flags |= Py_DTSF_ALT;
1049 
1050     if (type == '\0') {
1051         /* Omitted type specifier.  Behaves in the same way as repr(x)
1052            and str(x) if no precision is given, else like 'g', but with
1053            at least one digit after the decimal point. */
1054         flags |= Py_DTSF_ADD_DOT_0;
1055         type = 'r';
1056         default_precision = 0;
1057     }
1058 
1059     if (type == 'n')
1060         /* 'n' is the same as 'g', except for the locale used to
1061            format the result. We take care of that later. */
1062         type = 'g';
1063 
1064     val = PyFloat_AsDouble(value);
1065     if (val == -1.0 && PyErr_Occurred())
1066         goto done;
1067 
1068     if (type == '%') {
1069         type = 'f';
1070         val *= 100;
1071         add_pct = 1;
1072     }
1073 
1074     if (precision < 0)
1075         precision = default_precision;
1076     else if (type == 'r')
1077         type = 'g';
1078 
1079     /* Cast "type", because if we're in unicode we need to pass an
1080        8-bit char. This is safe, because we've restricted what "type"
1081        can be. */
1082     buf = PyOS_double_to_string(val, (char)type, precision, flags,
1083                                 &float_type);
1084     if (buf == NULL)
1085         goto done;
1086     n_digits = strlen(buf);
1087 
1088     if (add_pct) {
1089         /* We know that buf has a trailing zero (since we just called
1090            strlen() on it), and we don't use that fact any more. So we
1091            can just write over the trailing zero. */
1092         buf[n_digits] = '%';
1093         n_digits += 1;
1094     }
1095 
1096     if (format->sign != '+' && format->sign != ' '
1097         && format->width == -1
1098         && format->type != 'n'
1099         && !format->thousands_separators)
1100     {
1101         /* Fast path */
1102         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1103         PyMem_Free(buf);
1104         return result;
1105     }
1106 
1107     /* Since there is no unicode version of PyOS_double_to_string,
1108        just use the 8 bit version and then convert to unicode. */
1109     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1110     PyMem_Free(buf);
1111     if (unicode_tmp == NULL)
1112         goto done;
1113 
1114     /* Is a sign character present in the output?  If so, remember it
1115        and skip it */
1116     index = 0;
1117     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1118         sign_char = '-';
1119         ++index;
1120         --n_digits;
1121     }
1122 
1123     /* Determine if we have any "remainder" (after the digits, might include
1124        decimal or exponent or both (or neither)) */
1125     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1126 
1127     /* Determine the grouping, separator, and decimal point, if any. */
1128     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1129                         format->thousands_separators,
1130                         &locale) == -1)
1131         goto done;
1132 
1133     /* Calculate how much memory we'll need. */
1134     n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1135                                  index + n_digits, n_remainder, has_decimal,
1136                                  &locale, format, &maxchar);
1137     if (n_total == -1) {
1138         goto done;
1139     }
1140 
1141     /* Allocate the memory. */
1142     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1143         goto done;
1144 
1145     /* Populate the memory. */
1146     result = fill_number(writer, &spec,
1147                          unicode_tmp, index, index + n_digits,
1148                          NULL, 0, format->fill_char,
1149                          &locale, 0);
1150 
1151 done:
1152     Py_XDECREF(unicode_tmp);
1153     free_locale_info(&locale);
1154     return result;
1155 }
1156 
1157 /************************************************************************/
1158 /*********** complex formatting *****************************************/
1159 /************************************************************************/
1160 
1161 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1162 format_complex_internal(PyObject *value,
1163                         const InternalFormatSpec *format,
1164                         _PyUnicodeWriter *writer)
1165 {
1166     double re;
1167     double im;
1168     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1169     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1170 
1171     InternalFormatSpec tmp_format = *format;
1172     Py_ssize_t n_re_digits;
1173     Py_ssize_t n_im_digits;
1174     Py_ssize_t n_re_remainder;
1175     Py_ssize_t n_im_remainder;
1176     Py_ssize_t n_re_total;
1177     Py_ssize_t n_im_total;
1178     int re_has_decimal;
1179     int im_has_decimal;
1180     int precision, default_precision = 6;
1181     Py_UCS4 type = format->type;
1182     Py_ssize_t i_re;
1183     Py_ssize_t i_im;
1184     NumberFieldWidths re_spec;
1185     NumberFieldWidths im_spec;
1186     int flags = 0;
1187     int result = -1;
1188     Py_UCS4 maxchar = 127;
1189     enum PyUnicode_Kind rkind;
1190     void *rdata;
1191     Py_UCS4 re_sign_char = '\0';
1192     Py_UCS4 im_sign_char = '\0';
1193     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1194     int im_float_type;
1195     int add_parens = 0;
1196     int skip_re = 0;
1197     Py_ssize_t lpad;
1198     Py_ssize_t rpad;
1199     Py_ssize_t total;
1200     PyObject *re_unicode_tmp = NULL;
1201     PyObject *im_unicode_tmp = NULL;
1202 
1203     /* Locale settings, either from the actual locale or
1204        from a hard-code pseudo-locale */
1205     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1206 
1207     if (format->precision > INT_MAX) {
1208         PyErr_SetString(PyExc_ValueError, "precision too big");
1209         goto done;
1210     }
1211     precision = (int)format->precision;
1212 
1213     /* Zero padding is not allowed. */
1214     if (format->fill_char == '0') {
1215         PyErr_SetString(PyExc_ValueError,
1216                         "Zero padding is not allowed in complex format "
1217                         "specifier");
1218         goto done;
1219     }
1220 
1221     /* Neither is '=' alignment . */
1222     if (format->align == '=') {
1223         PyErr_SetString(PyExc_ValueError,
1224                         "'=' alignment flag is not allowed in complex format "
1225                         "specifier");
1226         goto done;
1227     }
1228 
1229     re = PyComplex_RealAsDouble(value);
1230     if (re == -1.0 && PyErr_Occurred())
1231         goto done;
1232     im = PyComplex_ImagAsDouble(value);
1233     if (im == -1.0 && PyErr_Occurred())
1234         goto done;
1235 
1236     if (format->alternate)
1237         flags |= Py_DTSF_ALT;
1238 
1239     if (type == '\0') {
1240         /* Omitted type specifier. Should be like str(self). */
1241         type = 'r';
1242         default_precision = 0;
1243         if (re == 0.0 && copysign(1.0, re) == 1.0)
1244             skip_re = 1;
1245         else
1246             add_parens = 1;
1247     }
1248 
1249     if (type == 'n')
1250         /* 'n' is the same as 'g', except for the locale used to
1251            format the result. We take care of that later. */
1252         type = 'g';
1253 
1254     if (precision < 0)
1255         precision = default_precision;
1256     else if (type == 'r')
1257         type = 'g';
1258 
1259     /* Cast "type", because if we're in unicode we need to pass an
1260        8-bit char. This is safe, because we've restricted what "type"
1261        can be. */
1262     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1263                                    &re_float_type);
1264     if (re_buf == NULL)
1265         goto done;
1266     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1267                                    &im_float_type);
1268     if (im_buf == NULL)
1269         goto done;
1270 
1271     n_re_digits = strlen(re_buf);
1272     n_im_digits = strlen(im_buf);
1273 
1274     /* Since there is no unicode version of PyOS_double_to_string,
1275        just use the 8 bit version and then convert to unicode. */
1276     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1277     if (re_unicode_tmp == NULL)
1278         goto done;
1279     i_re = 0;
1280 
1281     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1282     if (im_unicode_tmp == NULL)
1283         goto done;
1284     i_im = 0;
1285 
1286     /* Is a sign character present in the output?  If so, remember it
1287        and skip it */
1288     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1289         re_sign_char = '-';
1290         ++i_re;
1291         --n_re_digits;
1292     }
1293     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1294         im_sign_char = '-';
1295         ++i_im;
1296         --n_im_digits;
1297     }
1298 
1299     /* Determine if we have any "remainder" (after the digits, might include
1300        decimal or exponent or both (or neither)) */
1301     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1302                  &n_re_remainder, &re_has_decimal);
1303     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1304                  &n_im_remainder, &im_has_decimal);
1305 
1306     /* Determine the grouping, separator, and decimal point, if any. */
1307     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1308                         format->thousands_separators,
1309                         &locale) == -1)
1310         goto done;
1311 
1312     /* Turn off any padding. We'll do it later after we've composed
1313        the numbers without padding. */
1314     tmp_format.fill_char = '\0';
1315     tmp_format.align = '<';
1316     tmp_format.width = -1;
1317 
1318     /* Calculate how much memory we'll need. */
1319     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1320                                     i_re, i_re + n_re_digits, n_re_remainder,
1321                                     re_has_decimal, &locale, &tmp_format,
1322                                     &maxchar);
1323     if (n_re_total == -1) {
1324         goto done;
1325     }
1326 
1327     /* Same formatting, but always include a sign, unless the real part is
1328      * going to be omitted, in which case we use whatever sign convention was
1329      * requested by the original format. */
1330     if (!skip_re)
1331         tmp_format.sign = '+';
1332     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1333                                     i_im, i_im + n_im_digits, n_im_remainder,
1334                                     im_has_decimal, &locale, &tmp_format,
1335                                     &maxchar);
1336     if (n_im_total == -1) {
1337         goto done;
1338     }
1339 
1340     if (skip_re)
1341         n_re_total = 0;
1342 
1343     /* Add 1 for the 'j', and optionally 2 for parens. */
1344     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1345                  format->width, format->align, &lpad, &rpad, &total);
1346 
1347     if (lpad || rpad)
1348         maxchar = Py_MAX(maxchar, format->fill_char);
1349 
1350     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1351         goto done;
1352     rkind = writer->kind;
1353     rdata = writer->data;
1354 
1355     /* Populate the memory. First, the padding. */
1356     result = fill_padding(writer,
1357                           n_re_total + n_im_total + 1 + add_parens * 2,
1358                           format->fill_char, lpad, rpad);
1359     if (result == -1)
1360         goto done;
1361 
1362     if (add_parens) {
1363         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1364         writer->pos++;
1365     }
1366 
1367     if (!skip_re) {
1368         result = fill_number(writer, &re_spec,
1369                              re_unicode_tmp, i_re, i_re + n_re_digits,
1370                              NULL, 0,
1371                              0,
1372                              &locale, 0);
1373         if (result == -1)
1374             goto done;
1375     }
1376     result = fill_number(writer, &im_spec,
1377                          im_unicode_tmp, i_im, i_im + n_im_digits,
1378                          NULL, 0,
1379                          0,
1380                          &locale, 0);
1381     if (result == -1)
1382         goto done;
1383     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1384     writer->pos++;
1385 
1386     if (add_parens) {
1387         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1388         writer->pos++;
1389     }
1390 
1391     writer->pos += rpad;
1392 
1393 done:
1394     PyMem_Free(re_buf);
1395     PyMem_Free(im_buf);
1396     Py_XDECREF(re_unicode_tmp);
1397     Py_XDECREF(im_unicode_tmp);
1398     free_locale_info(&locale);
1399     return result;
1400 }
1401 
1402 /************************************************************************/
1403 /*********** built in formatters ****************************************/
1404 /************************************************************************/
1405 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1406 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1407 {
1408     PyObject *str;
1409     int err;
1410 
1411     str = PyObject_Str(obj);
1412     if (str == NULL)
1413         return -1;
1414     err = _PyUnicodeWriter_WriteStr(writer, str);
1415     Py_DECREF(str);
1416     return err;
1417 }
1418 
1419 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1420 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1421                                 PyObject *obj,
1422                                 PyObject *format_spec,
1423                                 Py_ssize_t start, Py_ssize_t end)
1424 {
1425     InternalFormatSpec format;
1426 
1427     assert(PyUnicode_Check(obj));
1428 
1429     /* check for the special case of zero length format spec, make
1430        it equivalent to str(obj) */
1431     if (start == end) {
1432         if (PyUnicode_CheckExact(obj))
1433             return _PyUnicodeWriter_WriteStr(writer, obj);
1434         else
1435             return format_obj(obj, writer);
1436     }
1437 
1438     /* parse the format_spec */
1439     if (!parse_internal_render_format_spec(format_spec, start, end,
1440                                            &format, 's', '<'))
1441         return -1;
1442 
1443     /* type conversion? */
1444     switch (format.type) {
1445     case 's':
1446         /* no type conversion needed, already a string.  do the formatting */
1447         return format_string_internal(obj, &format, writer);
1448     default:
1449         /* unknown */
1450         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1451         return -1;
1452     }
1453 }
1454 
1455 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1456 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1457                              PyObject *obj,
1458                              PyObject *format_spec,
1459                              Py_ssize_t start, Py_ssize_t end)
1460 {
1461     PyObject *tmp = NULL, *str = NULL;
1462     InternalFormatSpec format;
1463     int result = -1;
1464 
1465     /* check for the special case of zero length format spec, make
1466        it equivalent to str(obj) */
1467     if (start == end) {
1468         if (PyLong_CheckExact(obj))
1469             return _PyLong_FormatWriter(writer, obj, 10, 0);
1470         else
1471             return format_obj(obj, writer);
1472     }
1473 
1474     /* parse the format_spec */
1475     if (!parse_internal_render_format_spec(format_spec, start, end,
1476                                            &format, 'd', '>'))
1477         goto done;
1478 
1479     /* type conversion? */
1480     switch (format.type) {
1481     case 'b':
1482     case 'c':
1483     case 'd':
1484     case 'o':
1485     case 'x':
1486     case 'X':
1487     case 'n':
1488         /* no type conversion needed, already an int.  do the formatting */
1489         result = format_long_internal(obj, &format, writer);
1490         break;
1491 
1492     case 'e':
1493     case 'E':
1494     case 'f':
1495     case 'F':
1496     case 'g':
1497     case 'G':
1498     case '%':
1499         /* convert to float */
1500         tmp = PyNumber_Float(obj);
1501         if (tmp == NULL)
1502             goto done;
1503         result = format_float_internal(tmp, &format, writer);
1504         break;
1505 
1506     default:
1507         /* unknown */
1508         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1509         goto done;
1510     }
1511 
1512 done:
1513     Py_XDECREF(tmp);
1514     Py_XDECREF(str);
1515     return result;
1516 }
1517 
1518 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1519 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1520                               PyObject *obj,
1521                               PyObject *format_spec,
1522                               Py_ssize_t start, Py_ssize_t end)
1523 {
1524     InternalFormatSpec format;
1525 
1526     /* check for the special case of zero length format spec, make
1527        it equivalent to str(obj) */
1528     if (start == end)
1529         return format_obj(obj, writer);
1530 
1531     /* parse the format_spec */
1532     if (!parse_internal_render_format_spec(format_spec, start, end,
1533                                            &format, '\0', '>'))
1534         return -1;
1535 
1536     /* type conversion? */
1537     switch (format.type) {
1538     case '\0': /* No format code: like 'g', but with at least one decimal. */
1539     case 'e':
1540     case 'E':
1541     case 'f':
1542     case 'F':
1543     case 'g':
1544     case 'G':
1545     case 'n':
1546     case '%':
1547         /* no conversion, already a float.  do the formatting */
1548         return format_float_internal(obj, &format, writer);
1549 
1550     default:
1551         /* unknown */
1552         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1553         return -1;
1554     }
1555 }
1556 
1557 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1558 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1559                                 PyObject *obj,
1560                                 PyObject *format_spec,
1561                                 Py_ssize_t start, Py_ssize_t end)
1562 {
1563     InternalFormatSpec format;
1564 
1565     /* check for the special case of zero length format spec, make
1566        it equivalent to str(obj) */
1567     if (start == end)
1568         return format_obj(obj, writer);
1569 
1570     /* parse the format_spec */
1571     if (!parse_internal_render_format_spec(format_spec, start, end,
1572                                            &format, '\0', '>'))
1573         return -1;
1574 
1575     /* type conversion? */
1576     switch (format.type) {
1577     case '\0': /* No format code: like 'g', but with at least one decimal. */
1578     case 'e':
1579     case 'E':
1580     case 'f':
1581     case 'F':
1582     case 'g':
1583     case 'G':
1584     case 'n':
1585         /* no conversion, already a complex.  do the formatting */
1586         return format_complex_internal(obj, &format, writer);
1587 
1588     default:
1589         /* unknown */
1590         unknown_presentation_type(format.type, obj->ob_type->tp_name);
1591         return -1;
1592     }
1593 }
1594