1 /* Formatted output to strings.
2    Copyright (C) 1999-2000, 2002-2003, 2006-2021 Free Software Foundation, Inc.
3 
4    This file is free software: you can redistribute it and/or modify
5    it under the terms of the GNU Lesser General Public License as
6    published by the Free Software Foundation; either version 2.1 of the
7    License, or (at your option) any later version.
8 
9    This file is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* This file can be parametrized with the following macros:
18      CHAR_T             The element type of the format string.
19      CHAR_T_ONLY_ASCII  Set to 1 to enable verification that all characters
20                         in the format string are ASCII.
21      DIRECTIVE          Structure denoting a format directive.
22                         Depends on CHAR_T.
23      DIRECTIVES         Structure denoting the set of format directives of a
24                         format string.  Depends on CHAR_T.
25      PRINTF_PARSE       Function that parses a format string.
26                         Depends on CHAR_T.
27      STATIC             Set to 'static' to declare the function static.
28      ENABLE_UNISTDIO    Set to 1 to enable the unistdio extensions.  */
29 
30 #ifndef PRINTF_PARSE
31 # include <config.h>
32 #endif
33 
34 /* Specification.  */
35 #ifndef PRINTF_PARSE
36 # include "printf-parse.h"
37 #endif
38 
39 /* Default parameters.  */
40 #ifndef PRINTF_PARSE
41 # define PRINTF_PARSE printf_parse
42 # define CHAR_T char
43 # define DIRECTIVE char_directive
44 # define DIRECTIVES char_directives
45 #endif
46 
47 /* Get size_t, NULL.  */
48 #include <stddef.h>
49 
50 /* Get intmax_t.  */
51 #include <stdint.h>
52 
53 /* malloc(), realloc(), free().  */
54 #include <stdlib.h>
55 
56 /* memcpy().  */
57 #include <string.h>
58 
59 /* errno.  */
60 #include <errno.h>
61 
62 /* Checked size_t computations.  */
63 #include "xsize.h"
64 
65 #if CHAR_T_ONLY_ASCII
66 /* c_isascii().  */
67 # include "c-ctype.h"
68 #endif
69 
70 #ifdef STATIC
71 STATIC
72 #endif
73 int
PRINTF_PARSE(const CHAR_T * format,DIRECTIVES * d,arguments * a)74 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
75 {
76   const CHAR_T *cp = format;    /* pointer into format */
77   size_t arg_posn = 0;          /* number of regular arguments consumed */
78   size_t d_allocated;           /* allocated elements of d->dir */
79   size_t a_allocated;           /* allocated elements of a->arg */
80   size_t max_width_length = 0;
81   size_t max_precision_length = 0;
82 
83   d->count = 0;
84   d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
85   d->dir = d->direct_alloc_dir;
86 
87   a->count = 0;
88   a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
89   a->arg = a->direct_alloc_arg;
90 
91 #define REGISTER_ARG(_index_,_type_) \
92   {                                                                     \
93     size_t n = (_index_);                                               \
94     if (n >= a_allocated)                                               \
95       {                                                                 \
96         size_t memory_size;                                             \
97         argument *memory;                                               \
98                                                                         \
99         a_allocated = xtimes (a_allocated, 2);                          \
100         if (a_allocated <= n)                                           \
101           a_allocated = xsum (n, 1);                                    \
102         memory_size = xtimes (a_allocated, sizeof (argument));          \
103         if (size_overflow_p (memory_size))                              \
104           /* Overflow, would lead to out of memory.  */                 \
105           goto out_of_memory;                                           \
106         memory = (argument *) (a->arg != a->direct_alloc_arg            \
107                                ? realloc (a->arg, memory_size)          \
108                                : malloc (memory_size));                 \
109         if (memory == NULL)                                             \
110           /* Out of memory.  */                                         \
111           goto out_of_memory;                                           \
112         if (a->arg == a->direct_alloc_arg)                              \
113           memcpy (memory, a->arg, a->count * sizeof (argument));        \
114         a->arg = memory;                                                \
115       }                                                                 \
116     while (a->count <= n)                                               \
117       a->arg[a->count++].type = TYPE_NONE;                              \
118     if (a->arg[n].type == TYPE_NONE)                                    \
119       a->arg[n].type = (_type_);                                        \
120     else if (a->arg[n].type != (_type_))                                \
121       /* Ambiguous type for positional argument.  */                    \
122       goto error;                                                       \
123   }
124 
125   while (*cp != '\0')
126     {
127       CHAR_T c = *cp++;
128       if (c == '%')
129         {
130           size_t arg_index = ARG_NONE;
131           DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
132 
133           /* Initialize the next directive.  */
134           dp->dir_start = cp - 1;
135           dp->flags = 0;
136           dp->width_start = NULL;
137           dp->width_end = NULL;
138           dp->width_arg_index = ARG_NONE;
139           dp->precision_start = NULL;
140           dp->precision_end = NULL;
141           dp->precision_arg_index = ARG_NONE;
142           dp->arg_index = ARG_NONE;
143 
144           /* Test for positional argument.  */
145           if (*cp >= '0' && *cp <= '9')
146             {
147               const CHAR_T *np;
148 
149               for (np = cp; *np >= '0' && *np <= '9'; np++)
150                 ;
151               if (*np == '$')
152                 {
153                   size_t n = 0;
154 
155                   for (np = cp; *np >= '0' && *np <= '9'; np++)
156                     n = xsum (xtimes (n, 10), *np - '0');
157                   if (n == 0)
158                     /* Positional argument 0.  */
159                     goto error;
160                   if (size_overflow_p (n))
161                     /* n too large, would lead to out of memory later.  */
162                     goto error;
163                   arg_index = n - 1;
164                   cp = np + 1;
165                 }
166             }
167 
168           /* Read the flags.  */
169           for (;;)
170             {
171               if (*cp == '\'')
172                 {
173                   dp->flags |= FLAG_GROUP;
174                   cp++;
175                 }
176               else if (*cp == '-')
177                 {
178                   dp->flags |= FLAG_LEFT;
179                   cp++;
180                 }
181               else if (*cp == '+')
182                 {
183                   dp->flags |= FLAG_SHOWSIGN;
184                   cp++;
185                 }
186               else if (*cp == ' ')
187                 {
188                   dp->flags |= FLAG_SPACE;
189                   cp++;
190                 }
191               else if (*cp == '#')
192                 {
193                   dp->flags |= FLAG_ALT;
194                   cp++;
195                 }
196               else if (*cp == '0')
197                 {
198                   dp->flags |= FLAG_ZERO;
199                   cp++;
200                 }
201 #if __GLIBC__ >= 2 && !defined __UCLIBC__
202               else if (*cp == 'I')
203                 {
204                   dp->flags |= FLAG_LOCALIZED;
205                   cp++;
206                 }
207 #endif
208               else
209                 break;
210             }
211 
212           /* Parse the field width.  */
213           if (*cp == '*')
214             {
215               dp->width_start = cp;
216               cp++;
217               dp->width_end = cp;
218               if (max_width_length < 1)
219                 max_width_length = 1;
220 
221               /* Test for positional argument.  */
222               if (*cp >= '0' && *cp <= '9')
223                 {
224                   const CHAR_T *np;
225 
226                   for (np = cp; *np >= '0' && *np <= '9'; np++)
227                     ;
228                   if (*np == '$')
229                     {
230                       size_t n = 0;
231 
232                       for (np = cp; *np >= '0' && *np <= '9'; np++)
233                         n = xsum (xtimes (n, 10), *np - '0');
234                       if (n == 0)
235                         /* Positional argument 0.  */
236                         goto error;
237                       if (size_overflow_p (n))
238                         /* n too large, would lead to out of memory later.  */
239                         goto error;
240                       dp->width_arg_index = n - 1;
241                       cp = np + 1;
242                     }
243                 }
244               if (dp->width_arg_index == ARG_NONE)
245                 {
246                   dp->width_arg_index = arg_posn++;
247                   if (dp->width_arg_index == ARG_NONE)
248                     /* arg_posn wrapped around.  */
249                     goto error;
250                 }
251               REGISTER_ARG (dp->width_arg_index, TYPE_INT);
252             }
253           else if (*cp >= '0' && *cp <= '9')
254             {
255               size_t width_length;
256 
257               dp->width_start = cp;
258               for (; *cp >= '0' && *cp <= '9'; cp++)
259                 ;
260               dp->width_end = cp;
261               width_length = dp->width_end - dp->width_start;
262               if (max_width_length < width_length)
263                 max_width_length = width_length;
264             }
265 
266           /* Parse the precision.  */
267           if (*cp == '.')
268             {
269               cp++;
270               if (*cp == '*')
271                 {
272                   dp->precision_start = cp - 1;
273                   cp++;
274                   dp->precision_end = cp;
275                   if (max_precision_length < 2)
276                     max_precision_length = 2;
277 
278                   /* Test for positional argument.  */
279                   if (*cp >= '0' && *cp <= '9')
280                     {
281                       const CHAR_T *np;
282 
283                       for (np = cp; *np >= '0' && *np <= '9'; np++)
284                         ;
285                       if (*np == '$')
286                         {
287                           size_t n = 0;
288 
289                           for (np = cp; *np >= '0' && *np <= '9'; np++)
290                             n = xsum (xtimes (n, 10), *np - '0');
291                           if (n == 0)
292                             /* Positional argument 0.  */
293                             goto error;
294                           if (size_overflow_p (n))
295                             /* n too large, would lead to out of memory
296                                later.  */
297                             goto error;
298                           dp->precision_arg_index = n - 1;
299                           cp = np + 1;
300                         }
301                     }
302                   if (dp->precision_arg_index == ARG_NONE)
303                     {
304                       dp->precision_arg_index = arg_posn++;
305                       if (dp->precision_arg_index == ARG_NONE)
306                         /* arg_posn wrapped around.  */
307                         goto error;
308                     }
309                   REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
310                 }
311               else
312                 {
313                   size_t precision_length;
314 
315                   dp->precision_start = cp - 1;
316                   for (; *cp >= '0' && *cp <= '9'; cp++)
317                     ;
318                   dp->precision_end = cp;
319                   precision_length = dp->precision_end - dp->precision_start;
320                   if (max_precision_length < precision_length)
321                     max_precision_length = precision_length;
322                 }
323             }
324 
325           {
326             arg_type type;
327 
328             /* Parse argument type/size specifiers.  */
329             {
330               int flags = 0;
331 
332               for (;;)
333                 {
334                   if (*cp == 'h')
335                     {
336                       flags |= (1 << (flags & 1));
337                       cp++;
338                     }
339                   else if (*cp == 'L')
340                     {
341                       flags |= 4;
342                       cp++;
343                     }
344                   else if (*cp == 'l')
345                     {
346                       flags += 8;
347                       cp++;
348                     }
349                   else if (*cp == 'j')
350                     {
351                       if (sizeof (intmax_t) > sizeof (long))
352                         {
353                           /* intmax_t = long long */
354                           flags += 16;
355                         }
356                       else if (sizeof (intmax_t) > sizeof (int))
357                         {
358                           /* intmax_t = long */
359                           flags += 8;
360                         }
361                       cp++;
362                     }
363                   else if (*cp == 'z' || *cp == 'Z')
364                     {
365                       /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
366                          because the warning facility in gcc-2.95.2 understands
367                          only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784).  */
368                       if (sizeof (size_t) > sizeof (long))
369                         {
370                           /* size_t = long long */
371                           flags += 16;
372                         }
373                       else if (sizeof (size_t) > sizeof (int))
374                         {
375                           /* size_t = long */
376                           flags += 8;
377                         }
378                       cp++;
379                     }
380                   else if (*cp == 't')
381                     {
382                       if (sizeof (ptrdiff_t) > sizeof (long))
383                         {
384                           /* ptrdiff_t = long long */
385                           flags += 16;
386                         }
387                       else if (sizeof (ptrdiff_t) > sizeof (int))
388                         {
389                           /* ptrdiff_t = long */
390                           flags += 8;
391                         }
392                       cp++;
393                     }
394 #if defined __APPLE__ && defined __MACH__
395                   /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
396                      We cannot change it to "lld" because PRIdMAX must also
397                      be understood by the system's printf routines.  */
398                   else if (*cp == 'q')
399                     {
400                       if (64 / 8 > sizeof (long))
401                         {
402                           /* int64_t = long long */
403                           flags += 16;
404                         }
405                       else
406                         {
407                           /* int64_t = long */
408                           flags += 8;
409                         }
410                       cp++;
411                     }
412 #endif
413 #if defined _WIN32 && ! defined __CYGWIN__
414                   /* On native Windows, PRIdMAX is defined as "I64d".
415                      We cannot change it to "lld" because PRIdMAX must also
416                      be understood by the system's printf routines.  */
417                   else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
418                     {
419                       if (64 / 8 > sizeof (long))
420                         {
421                           /* __int64 = long long */
422                           flags += 16;
423                         }
424                       else
425                         {
426                           /* __int64 = long */
427                           flags += 8;
428                         }
429                       cp += 3;
430                     }
431 #endif
432                   else
433                     break;
434                 }
435 
436               /* Read the conversion character.  */
437               c = *cp++;
438               switch (c)
439                 {
440                 case 'd': case 'i':
441                   /* If 'long long' is larger than 'long':  */
442                   if (flags >= 16 || (flags & 4))
443                     type = TYPE_LONGLONGINT;
444                   else
445                   /* If 'long long' is the same as 'long', we parse "lld" into
446                      TYPE_LONGINT.  */
447                   if (flags >= 8)
448                     type = TYPE_LONGINT;
449                   else if (flags & 2)
450                     type = TYPE_SCHAR;
451                   else if (flags & 1)
452                     type = TYPE_SHORT;
453                   else
454                     type = TYPE_INT;
455                   break;
456                 case 'o': case 'u': case 'x': case 'X':
457                   /* If 'unsigned long long' is larger than 'unsigned long':  */
458                   if (flags >= 16 || (flags & 4))
459                     type = TYPE_ULONGLONGINT;
460                   else
461                   /* If 'unsigned long long' is the same as 'unsigned long', we
462                      parse "llu" into TYPE_ULONGINT.  */
463                   if (flags >= 8)
464                     type = TYPE_ULONGINT;
465                   else if (flags & 2)
466                     type = TYPE_UCHAR;
467                   else if (flags & 1)
468                     type = TYPE_USHORT;
469                   else
470                     type = TYPE_UINT;
471                   break;
472                 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
473                 case 'a': case 'A':
474                   if (flags >= 16 || (flags & 4))
475                     type = TYPE_LONGDOUBLE;
476                   else
477                     type = TYPE_DOUBLE;
478                   break;
479                 case 'c':
480                   if (flags >= 8)
481 #if HAVE_WINT_T
482                     type = TYPE_WIDE_CHAR;
483 #else
484                     goto error;
485 #endif
486                   else
487                     type = TYPE_CHAR;
488                   break;
489 #if HAVE_WINT_T
490                 case 'C':
491                   type = TYPE_WIDE_CHAR;
492                   c = 'c';
493                   break;
494 #endif
495                 case 's':
496                   if (flags >= 8)
497 #if HAVE_WCHAR_T
498                     type = TYPE_WIDE_STRING;
499 #else
500                     goto error;
501 #endif
502                   else
503                     type = TYPE_STRING;
504                   break;
505 #if HAVE_WCHAR_T
506                 case 'S':
507                   type = TYPE_WIDE_STRING;
508                   c = 's';
509                   break;
510 #endif
511                 case 'p':
512                   type = TYPE_POINTER;
513                   break;
514                 case 'n':
515                   /* If 'long long' is larger than 'long':  */
516                   if (flags >= 16 || (flags & 4))
517                     type = TYPE_COUNT_LONGLONGINT_POINTER;
518                   else
519                   /* If 'long long' is the same as 'long', we parse "lln" into
520                      TYPE_COUNT_LONGINT_POINTER.  */
521                   if (flags >= 8)
522                     type = TYPE_COUNT_LONGINT_POINTER;
523                   else if (flags & 2)
524                     type = TYPE_COUNT_SCHAR_POINTER;
525                   else if (flags & 1)
526                     type = TYPE_COUNT_SHORT_POINTER;
527                   else
528                     type = TYPE_COUNT_INT_POINTER;
529                   break;
530 #if ENABLE_UNISTDIO
531                 /* The unistdio extensions.  */
532                 case 'U':
533                   if (flags >= 16)
534                     type = TYPE_U32_STRING;
535                   else if (flags >= 8)
536                     type = TYPE_U16_STRING;
537                   else
538                     type = TYPE_U8_STRING;
539                   break;
540 #endif
541                 case '%':
542                   type = TYPE_NONE;
543                   break;
544                 default:
545                   /* Unknown conversion character.  */
546                   goto error;
547                 }
548             }
549 
550             if (type != TYPE_NONE)
551               {
552                 dp->arg_index = arg_index;
553                 if (dp->arg_index == ARG_NONE)
554                   {
555                     dp->arg_index = arg_posn++;
556                     if (dp->arg_index == ARG_NONE)
557                       /* arg_posn wrapped around.  */
558                       goto error;
559                   }
560                 REGISTER_ARG (dp->arg_index, type);
561               }
562             dp->conversion = c;
563             dp->dir_end = cp;
564           }
565 
566           d->count++;
567           if (d->count >= d_allocated)
568             {
569               size_t memory_size;
570               DIRECTIVE *memory;
571 
572               d_allocated = xtimes (d_allocated, 2);
573               memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
574               if (size_overflow_p (memory_size))
575                 /* Overflow, would lead to out of memory.  */
576                 goto out_of_memory;
577               memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
578                                       ? realloc (d->dir, memory_size)
579                                       : malloc (memory_size));
580               if (memory == NULL)
581                 /* Out of memory.  */
582                 goto out_of_memory;
583               if (d->dir == d->direct_alloc_dir)
584                 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
585               d->dir = memory;
586             }
587         }
588 #if CHAR_T_ONLY_ASCII
589       else if (!c_isascii (c))
590         {
591           /* Non-ASCII character.  Not supported.  */
592           goto error;
593         }
594 #endif
595     }
596   d->dir[d->count].dir_start = cp;
597 
598   d->max_width_length = max_width_length;
599   d->max_precision_length = max_precision_length;
600   return 0;
601 
602 error:
603   if (a->arg != a->direct_alloc_arg)
604     free (a->arg);
605   if (d->dir != d->direct_alloc_dir)
606     free (d->dir);
607   errno = EINVAL;
608   return -1;
609 
610 out_of_memory:
611   if (a->arg != a->direct_alloc_arg)
612     free (a->arg);
613   if (d->dir != d->direct_alloc_dir)
614     free (d->dir);
615   errno = ENOMEM;
616   return -1;
617 }
618 
619 #undef PRINTF_PARSE
620 #undef DIRECTIVES
621 #undef DIRECTIVE
622 #undef CHAR_T_ONLY_ASCII
623 #undef CHAR_T
624