1 /* Python format strings.
2    Copyright (C) 2001-2004, 2006-2009, 2019-2020 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* Python format strings are described in
36      Python Library reference
37      2. Built-in Types, Exceptions and Functions
38      2.1. Built-in Types
39      2.1.5. Sequence Types
40      2.1.5.2. String Formatting Operations
41    Any string or Unicode string can act as format string via the '%' operator,
42    implemented in stringobject.c and unicodeobject.c.
43    A directive
44    - starts with '%'
45    - is optionally followed by '(ident)' where ident is any sequence of
46      characters with balanced left and right parentheses,
47    - is optionally followed by any of the characters '-' (left justification),
48      '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
49      flag,
50    - is optionally followed by a width specification: '*' (reads an argument)
51      or a nonempty digit sequence,
52    - is optionally followed by '.' and a precision specification: '*' (reads
53      an argument) or a nonempty digit sequence,
54    - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
55    - is finished by a specifier
56        - '%', that needs no argument,
57        - 'c', that needs a character argument,
58        - 's', 'r', that need a string argument (or, when a precision of 0 is
59          given, an argument of any type),
60        - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
61        - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
62    Use of '(ident)' and use of unnamed argument specifications are exclusive,
63    because the first requires a mapping as argument, while the second requires
64    a tuple as argument. When unnamed arguments are used, the number of
65    arguments in the format string and the number of elements in the argument
66    tuple (to the right of the '%' operator) must be the same.
67  */
68 
69 enum format_arg_type
70 {
71   FAT_NONE,
72   FAT_ANY,
73   FAT_CHARACTER,
74   FAT_STRING,
75   FAT_INTEGER,
76   FAT_FLOAT
77 };
78 
79 struct named_arg
80 {
81   char *name;
82   enum format_arg_type type;
83 };
84 
85 struct unnamed_arg
86 {
87   enum format_arg_type type;
88 };
89 
90 struct spec
91 {
92   unsigned int directives;
93   unsigned int named_arg_count;
94   unsigned int unnamed_arg_count;
95   struct named_arg *named;
96   struct unnamed_arg *unnamed;
97 };
98 
99 /* Locale independent test for a decimal digit.
100    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
101    <ctype.h> isdigit must be an 'unsigned char'.)  */
102 #undef isdigit
103 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
104 
105 
106 static int
named_arg_compare(const void * p1,const void * p2)107 named_arg_compare (const void *p1, const void *p2)
108 {
109   return strcmp (((const struct named_arg *) p1)->name,
110                  ((const struct named_arg *) p2)->name);
111 }
112 
113 #define INVALID_MIXES_NAMED_UNNAMED() \
114   xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
115 
116 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)117 format_parse (const char *format, bool translated, char *fdi,
118               char **invalid_reason)
119 {
120   const char *const format_start = format;
121   struct spec spec;
122   unsigned int allocated;
123   struct spec *result;
124 
125   spec.directives = 0;
126   spec.named_arg_count = 0;
127   spec.unnamed_arg_count = 0;
128   spec.named = NULL;
129   spec.unnamed = NULL;
130   allocated = 0;
131 
132   for (; *format != '\0';)
133     if (*format++ == '%')
134       {
135         /* A directive.  */
136         char *name = NULL;
137         bool zero_precision = false;
138         enum format_arg_type type;
139 
140         FDI_SET (format - 1, FMTDIR_START);
141         spec.directives++;
142 
143         if (*format == '(')
144           {
145             unsigned int depth;
146             const char *name_start;
147             const char *name_end;
148             size_t n;
149 
150             name_start = ++format;
151             depth = 0;
152             for (; *format != '\0'; format++)
153               {
154                 if (*format == '(')
155                   depth++;
156                 else if (*format == ')')
157                   {
158                     if (depth == 0)
159                       break;
160                     else
161                       depth--;
162                   }
163               }
164             if (*format == '\0')
165               {
166                 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
167                 FDI_SET (format - 1, FMTDIR_ERROR);
168                 goto bad_format;
169               }
170             name_end = format++;
171 
172             n = name_end - name_start;
173             name = XNMALLOC (n + 1, char);
174             memcpy (name, name_start, n);
175             name[n] = '\0';
176           }
177 
178         while (*format == '-' || *format == '+' || *format == ' '
179                || *format == '#' || *format == '0')
180           format++;
181 
182         if (*format == '*')
183           {
184             format++;
185 
186             /* Named and unnamed specifications are exclusive.  */
187             if (spec.named_arg_count > 0)
188               {
189                 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
190                 FDI_SET (format - 1, FMTDIR_ERROR);
191                 goto bad_format;
192               }
193 
194             if (allocated == spec.unnamed_arg_count)
195               {
196                 allocated = 2 * allocated + 1;
197                 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
198               }
199             spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
200             spec.unnamed_arg_count++;
201           }
202         else if (isdigit (*format))
203           {
204             do format++; while (isdigit (*format));
205           }
206 
207         if (*format == '.')
208           {
209             format++;
210 
211             if (*format == '*')
212               {
213                 format++;
214 
215                 /* Named and unnamed specifications are exclusive.  */
216                 if (spec.named_arg_count > 0)
217                   {
218                     *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
219                     FDI_SET (format - 1, FMTDIR_ERROR);
220                     goto bad_format;
221                   }
222 
223                 if (allocated == spec.unnamed_arg_count)
224                   {
225                     allocated = 2 * allocated + 1;
226                     spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
227                   }
228                 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
229                 spec.unnamed_arg_count++;
230               }
231             else if (isdigit (*format))
232               {
233                 zero_precision = true;
234                 do
235                   {
236                     if (*format != '0')
237                       zero_precision = false;
238                     format++;
239                   }
240                 while (isdigit (*format));
241               }
242           }
243 
244         if (*format == 'h' || *format == 'l' || *format == 'L')
245           format++;
246 
247         switch (*format)
248           {
249           case '%':
250             type = FAT_NONE;
251             break;
252           case 'c':
253             type = FAT_CHARACTER;
254             break;
255           case 's': case 'r':
256             type = (zero_precision ? FAT_ANY : FAT_STRING);
257             break;
258           case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
259             type = FAT_INTEGER;
260             break;
261           case 'e': case 'E': case 'f': case 'g': case 'G':
262             type = FAT_FLOAT;
263             break;
264           default:
265             if (*format == '\0')
266               {
267                 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
268                 FDI_SET (format - 1, FMTDIR_ERROR);
269               }
270             else
271               {
272                 *invalid_reason =
273                   INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
274                 FDI_SET (format, FMTDIR_ERROR);
275               }
276             goto bad_format;
277           }
278 
279         if (name != NULL)
280           {
281             /* Named argument.  */
282 
283             /* Named and unnamed specifications are exclusive.  */
284             if (spec.unnamed_arg_count > 0)
285               {
286                 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
287                 FDI_SET (format, FMTDIR_ERROR);
288                 goto bad_format;
289               }
290 
291             if (allocated == spec.named_arg_count)
292               {
293                 allocated = 2 * allocated + 1;
294                 spec.named = (struct named_arg *) xrealloc (spec.named, allocated * sizeof (struct named_arg));
295               }
296             spec.named[spec.named_arg_count].name = name;
297             spec.named[spec.named_arg_count].type = type;
298             spec.named_arg_count++;
299           }
300         else if (*format != '%')
301           {
302             /* Unnamed argument.  */
303 
304             /* Named and unnamed specifications are exclusive.  */
305             if (spec.named_arg_count > 0)
306               {
307                 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
308                 FDI_SET (format, FMTDIR_ERROR);
309                 goto bad_format;
310               }
311 
312             if (allocated == spec.unnamed_arg_count)
313               {
314                 allocated = 2 * allocated + 1;
315                 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
316               }
317             spec.unnamed[spec.unnamed_arg_count].type = type;
318             spec.unnamed_arg_count++;
319           }
320 
321         FDI_SET (format, FMTDIR_END);
322 
323         format++;
324       }
325 
326   /* Sort the named argument array, and eliminate duplicates.  */
327   if (spec.named_arg_count > 1)
328     {
329       unsigned int i, j;
330       bool err;
331 
332       qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
333              named_arg_compare);
334 
335       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
336       err = false;
337       for (i = j = 0; i < spec.named_arg_count; i++)
338         if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
339           {
340             enum format_arg_type type1 = spec.named[i].type;
341             enum format_arg_type type2 = spec.named[j-1].type;
342             enum format_arg_type type_both;
343 
344             if (type1 == type2 || type2 == FAT_ANY)
345               type_both = type1;
346             else if (type1 == FAT_ANY)
347               type_both = type2;
348             else
349               {
350                 /* Incompatible types.  */
351                 type_both = FAT_NONE;
352                 if (!err)
353                   *invalid_reason =
354                     xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
355                 err = true;
356               }
357 
358             spec.named[j-1].type = type_both;
359             free (spec.named[i].name);
360           }
361         else
362           {
363             if (j < i)
364               {
365                 spec.named[j].name = spec.named[i].name;
366                 spec.named[j].type = spec.named[i].type;
367               }
368             j++;
369           }
370       spec.named_arg_count = j;
371       if (err)
372         /* *invalid_reason has already been set above.  */
373         goto bad_format;
374     }
375 
376   result = XMALLOC (struct spec);
377   *result = spec;
378   return result;
379 
380  bad_format:
381   if (spec.named != NULL)
382     {
383       unsigned int i;
384       for (i = 0; i < spec.named_arg_count; i++)
385         free (spec.named[i].name);
386       free (spec.named);
387     }
388   if (spec.unnamed != NULL)
389     free (spec.unnamed);
390   return NULL;
391 }
392 
393 static void
format_free(void * descr)394 format_free (void *descr)
395 {
396   struct spec *spec = (struct spec *) descr;
397 
398   if (spec->named != NULL)
399     {
400       unsigned int i;
401       for (i = 0; i < spec->named_arg_count; i++)
402         free (spec->named[i].name);
403       free (spec->named);
404     }
405   if (spec->unnamed != NULL)
406     free (spec->unnamed);
407   free (spec);
408 }
409 
410 static int
format_get_number_of_directives(void * descr)411 format_get_number_of_directives (void *descr)
412 {
413   struct spec *spec = (struct spec *) descr;
414 
415   return spec->directives;
416 }
417 
418 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)419 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
420               formatstring_error_logger_t error_logger,
421               const char *pretty_msgid, const char *pretty_msgstr)
422 {
423   struct spec *spec1 = (struct spec *) msgid_descr;
424   struct spec *spec2 = (struct spec *) msgstr_descr;
425   bool err = false;
426 
427   if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
428     {
429       if (error_logger)
430         error_logger (_("format specifications in '%s' expect a mapping, those in '%s' expect a tuple"),
431                       pretty_msgid, pretty_msgstr);
432       err = true;
433     }
434   else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
435     {
436       if (error_logger)
437         error_logger (_("format specifications in '%s' expect a tuple, those in '%s' expect a mapping"),
438                       pretty_msgid, pretty_msgstr);
439       err = true;
440     }
441   else
442     {
443       if (spec1->named_arg_count + spec2->named_arg_count > 0)
444         {
445           unsigned int i, j;
446           unsigned int n1 = spec1->named_arg_count;
447           unsigned int n2 = spec2->named_arg_count;
448 
449           /* Check the argument names are the same.
450              Both arrays are sorted.  We search for the first difference.  */
451           for (i = 0, j = 0; i < n1 || j < n2; )
452             {
453               int cmp = (i >= n1 ? 1 :
454                          j >= n2 ? -1 :
455                          strcmp (spec1->named[i].name, spec2->named[j].name));
456 
457               if (cmp > 0)
458                 {
459                   if (error_logger)
460                     error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
461                                   spec2->named[j].name, pretty_msgstr,
462                                   pretty_msgid);
463                   err = true;
464                   break;
465                 }
466               else if (cmp < 0)
467                 {
468                   if (equality)
469                     {
470                       if (error_logger)
471                         error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
472                                       spec1->named[i].name, pretty_msgstr);
473                       err = true;
474                       break;
475                     }
476                   else
477                     i++;
478                 }
479               else
480                 j++, i++;
481             }
482           /* Check the argument types are the same.  */
483           if (!err)
484             for (i = 0, j = 0; j < n2; )
485               {
486                 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
487                   {
488                     if (!(spec1->named[i].type == spec2->named[j].type
489                           || (!equality
490                               && (spec1->named[i].type == FAT_ANY
491                                   || spec2->named[j].type == FAT_ANY))))
492                       {
493                         if (error_logger)
494                           error_logger (_("format specifications in '%s' and '%s' for argument '%s' are not the same"),
495                                         pretty_msgid, pretty_msgstr,
496                                         spec2->named[j].name);
497                         err = true;
498                         break;
499                       }
500                     j++, i++;
501                   }
502                 else
503                   i++;
504               }
505         }
506 
507       if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
508         {
509           unsigned int i;
510 
511           /* Check the argument types are the same.  */
512           if (spec1->unnamed_arg_count != spec2->unnamed_arg_count)
513             {
514               if (error_logger)
515                 error_logger (_("number of format specifications in '%s' and '%s' does not match"),
516                               pretty_msgid, pretty_msgstr);
517               err = true;
518             }
519           else
520             for (i = 0; i < spec2->unnamed_arg_count; i++)
521               if (!(spec1->unnamed[i].type == spec2->unnamed[i].type
522                     || (!equality
523                         && (spec1->unnamed[i].type == FAT_ANY
524                             || spec2->unnamed[i].type == FAT_ANY))))
525                 {
526                   if (error_logger)
527                     error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
528                                   pretty_msgid, pretty_msgstr, i + 1);
529                   err = true;
530                 }
531         }
532     }
533 
534   return err;
535 }
536 
537 
538 struct formatstring_parser formatstring_python =
539 {
540   format_parse,
541   format_free,
542   format_get_number_of_directives,
543   NULL,
544   format_check
545 };
546 
547 
548 unsigned int
get_python_format_unnamed_arg_count(const char * string)549 get_python_format_unnamed_arg_count (const char *string)
550 {
551   /* Parse the format string.  */
552   char *invalid_reason = NULL;
553   struct spec *descr =
554     (struct spec *) format_parse (string, false, NULL, &invalid_reason);
555 
556   if (descr != NULL)
557     {
558       unsigned int result = descr->unnamed_arg_count;
559 
560       format_free (descr);
561       return result;
562     }
563   else
564     {
565       free (invalid_reason);
566       return 0;
567     }
568 }
569 
570 
571 #ifdef TEST
572 
573 /* Test program: Print the argument list specification returned by
574    format_parse for strings read from standard input.  */
575 
576 #include <stdio.h>
577 
578 static void
format_print(void * descr)579 format_print (void *descr)
580 {
581   struct spec *spec = (struct spec *) descr;
582   unsigned int i;
583 
584   if (spec == NULL)
585     {
586       printf ("INVALID");
587       return;
588     }
589 
590   if (spec->named_arg_count > 0)
591     {
592       if (spec->unnamed_arg_count > 0)
593         abort ();
594 
595       printf ("{");
596       for (i = 0; i < spec->named_arg_count; i++)
597         {
598           if (i > 0)
599             printf (", ");
600           printf ("'%s':", spec->named[i].name);
601           switch (spec->named[i].type)
602             {
603             case FAT_ANY:
604               printf ("*");
605               break;
606             case FAT_CHARACTER:
607               printf ("c");
608               break;
609             case FAT_STRING:
610               printf ("s");
611               break;
612             case FAT_INTEGER:
613               printf ("i");
614               break;
615             case FAT_FLOAT:
616               printf ("f");
617               break;
618             default:
619               abort ();
620             }
621         }
622       printf ("}");
623     }
624   else
625     {
626       printf ("(");
627       for (i = 0; i < spec->unnamed_arg_count; i++)
628         {
629           if (i > 0)
630             printf (" ");
631           switch (spec->unnamed[i].type)
632             {
633             case FAT_ANY:
634               printf ("*");
635               break;
636             case FAT_CHARACTER:
637               printf ("c");
638               break;
639             case FAT_STRING:
640               printf ("s");
641               break;
642             case FAT_INTEGER:
643               printf ("i");
644               break;
645             case FAT_FLOAT:
646               printf ("f");
647               break;
648             default:
649               abort ();
650             }
651         }
652       printf (")");
653     }
654 }
655 
656 int
main()657 main ()
658 {
659   for (;;)
660     {
661       char *line = NULL;
662       size_t line_size = 0;
663       int line_len;
664       char *invalid_reason;
665       void *descr;
666 
667       line_len = getline (&line, &line_size, stdin);
668       if (line_len < 0)
669         break;
670       if (line_len > 0 && line[line_len - 1] == '\n')
671         line[--line_len] = '\0';
672 
673       invalid_reason = NULL;
674       descr = format_parse (line, false, NULL, &invalid_reason);
675 
676       format_print (descr);
677       printf ("\n");
678       if (descr == NULL)
679         printf ("%s\n", invalid_reason);
680 
681       free (invalid_reason);
682       free (line);
683     }
684 
685   return 0;
686 }
687 
688 /*
689  * For Emacs M-x compile
690  * Local Variables:
691  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-python.c ../gnulib-lib/libgettextlib.la"
692  * End:
693  */
694 
695 #endif /* TEST */
696