1 /* Boost format strings.
2    Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2006.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 #include <stdbool.h>
23 #include <stdlib.h>
24 
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31 
32 #define _(str) gettext (str)
33 
34 /* Boost format strings are described in
35      boost_1_33_1/libs/format/doc/format.html
36    and implemented in
37      boost_1_33_1/boost/format/parsing.hpp.
38    A directive (other than '%%')
39    - starts with '%' or '%|'; in the latter case it must end in '|',
40    - is continued either by
41        - 'm%' where m is a positive integer, starting with a nonzero digit;
42          in this case the directive must not have started with '%|'; or
43        - the following:
44            - optional: 'm$' where m is a positive integer, starting with a
45              nonzero digit,
46            - optional: any of the characters '#', '0', '-', ' ', '+', "'",
47              '_', '=', 'h', 'l',
48            - optional: a width specification: '*' (reads an argument) or '*m$'
49              or a nonempty digit sequence,
50            - optional: a '.' and a precision specification: '*' (reads an
51              argument) or '*m$' or a nonempty digit sequence,
52            - optional: any of the characters 'h', 'l', 'L',
53            - if the directive started with '%|':
54                an optional specifier and a final '|',
55              otherwise
56                a mandatory specifier.
57              If no specifier is given, it needs an argument of any type.
58              The possible specifiers are:
59                - 'c', 'C', that need a character argument,
60                - 's', 'S', that need an argument of any type,
61                - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument,
62                - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument,
63                - 'p', that needs a 'void *' argument,
64                - 't', that doesn't need an argument,
65                - 'TX', where X is any character, that doesn't need an argument,
66                - 'n', that needs a pointer to integer.
67              The Boost format string interpreter doesn't actually care about
68              the argument types, but we do, because it increases the likelihood
69              of detecting translator mistakes.
70    Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications
71    cannot be used in the same string.
72  */
73 
74 enum format_arg_type
75 {
76   FAT_NONE              = 0,
77   /* Basic types */
78   FAT_INTEGER           = 1,
79   FAT_DOUBLE            = 2,
80   FAT_CHAR              = 3,
81   FAT_POINTER           = 4,
82   FAT_ANY               = 5
83 };
84 
85 struct numbered_arg
86 {
87   unsigned int number;
88   enum format_arg_type type;
89 };
90 
91 struct spec
92 {
93   unsigned int directives;
94   unsigned int numbered_arg_count;
95   struct numbered_arg *numbered;
96 };
97 
98 /* Locale independent test for a decimal digit.
99    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
100    <ctype.h> isdigit must be an 'unsigned char'.)  */
101 #undef isdigit
102 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
103 
104 
105 static int
numbered_arg_compare(const void * p1,const void * p2)106 numbered_arg_compare (const void *p1, const void *p2)
107 {
108   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
109   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
110 
111   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
112 }
113 
114 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)115 format_parse (const char *format, bool translated, char *fdi,
116               char **invalid_reason)
117 {
118   const char *const format_start = format;
119   struct spec spec;
120   unsigned int numbered_allocated;
121   unsigned int unnumbered_arg_count;
122   struct spec *result;
123 
124   spec.directives = 0;
125   spec.numbered_arg_count = 0;
126   spec.numbered = NULL;
127   numbered_allocated = 0;
128   unnumbered_arg_count = 0;
129 
130   for (; *format != '\0';)
131     if (*format++ == '%')
132       {
133         /* A directive.  */
134         FDI_SET (format - 1, FMTDIR_START);
135         spec.directives++;
136 
137         if (*format == '%')
138           format++;
139         else
140           {
141             bool brackets = false;
142             bool done = false;
143             unsigned int number = 0;
144             enum format_arg_type type = FAT_NONE;
145 
146             if (*format == '|')
147               {
148                 format++;
149                 brackets = true;
150               }
151 
152             if (isdigit (*format) && *format != '0')
153               {
154                 const char *f = format;
155                 unsigned int m = 0;
156 
157                 do
158                   {
159                     m = 10 * m + (*f - '0');
160                     f++;
161                   }
162                 while (isdigit (*f));
163 
164                 if ((!brackets && *f == '%') || *f == '$')
165                   {
166                     if (m == 0) /* can happen if m overflows */
167                       {
168                         *invalid_reason = INVALID_ARGNO_0 (spec.directives);
169                         FDI_SET (f, FMTDIR_ERROR);
170                         goto bad_format;
171                       }
172                     number = m;
173                     if (*f == '%')
174                       {
175                         type = FAT_ANY;
176                         done = true;
177                       }
178                     format = ++f;
179                   }
180               }
181 
182             if (!done)
183               {
184                 /* Parse flags.  */
185                 for (;;)
186                   {
187                     if (*format == ' ' || *format == '+' || *format == '-'
188                         || *format == '#' || *format == '0' || *format == '\''
189                         || *format == '_' || *format == '=' || *format == 'h'
190                         || *format == 'l')
191                       format++;
192                     else
193                       break;
194                   }
195 
196                 /* Parse width.  */
197                 if (*format == '*')
198                   {
199                     unsigned int width_number = 0;
200 
201                     format++;
202 
203                     if (isdigit (*format))
204                       {
205                         const char *f = format;
206                         unsigned int m = 0;
207 
208                         do
209                           {
210                             m = 10 * m + (*f - '0');
211                             f++;
212                           }
213                         while (isdigit (*f));
214 
215                         if (*f == '$')
216                           {
217                             if (m == 0)
218                               {
219                                 *invalid_reason =
220                                   INVALID_WIDTH_ARGNO_0 (spec.directives);
221                                 FDI_SET (f, FMTDIR_ERROR);
222                                 goto bad_format;
223                               }
224                             width_number = m;
225                             format = ++f;
226                           }
227                       }
228 
229                     if (width_number)
230                       {
231                         /* Numbered argument.  */
232 
233                         /* Numbered and unnumbered specifications are
234                            exclusive.  */
235                         if (unnumbered_arg_count > 0)
236                           {
237                             *invalid_reason =
238                               INVALID_MIXES_NUMBERED_UNNUMBERED ();
239                             FDI_SET (format - 1, FMTDIR_ERROR);
240                             goto bad_format;
241                           }
242 
243                         if (numbered_allocated == spec.numbered_arg_count)
244                           {
245                             numbered_allocated = 2 * numbered_allocated + 1;
246                             spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
247                           }
248                         spec.numbered[spec.numbered_arg_count].number = width_number;
249                         spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
250                         spec.numbered_arg_count++;
251                       }
252                     else
253                       {
254                         /* Unnumbered argument.  */
255 
256                         /* Numbered and unnumbered specifications are
257                            exclusive.  */
258                         if (spec.numbered_arg_count > 0)
259                           {
260                             *invalid_reason =
261                               INVALID_MIXES_NUMBERED_UNNUMBERED ();
262                             FDI_SET (format - 1, FMTDIR_ERROR);
263                             goto bad_format;
264                           }
265 
266                         if (numbered_allocated == unnumbered_arg_count)
267                           {
268                             numbered_allocated = 2 * numbered_allocated + 1;
269                             spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
270                           }
271                         spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
272                         spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
273                         unnumbered_arg_count++;
274                       }
275                   }
276                 else if (isdigit (*format))
277                   {
278                     do format++; while (isdigit (*format));
279                   }
280 
281                 /* Parse precision.  */
282                 if (*format == '.')
283                   {
284                     format++;
285 
286                     if (*format == '*')
287                       {
288                         unsigned int precision_number = 0;
289 
290                         format++;
291 
292                         if (isdigit (*format))
293                           {
294                             const char *f = format;
295                             unsigned int m = 0;
296 
297                             do
298                               {
299                                 m = 10 * m + (*f - '0');
300                                 f++;
301                               }
302                             while (isdigit (*f));
303 
304                             if (*f == '$')
305                               {
306                                 if (m == 0)
307                                   {
308                                     *invalid_reason =
309                                       INVALID_PRECISION_ARGNO_0 (spec.directives);
310                                     FDI_SET (f, FMTDIR_ERROR);
311                                     goto bad_format;
312                                   }
313                                 precision_number = m;
314                                 format = ++f;
315                               }
316                           }
317 
318                         if (precision_number)
319                           {
320                             /* Numbered argument.  */
321 
322                             /* Numbered and unnumbered specifications are
323                                exclusive.  */
324                             if (unnumbered_arg_count > 0)
325                               {
326                                 *invalid_reason =
327                                   INVALID_MIXES_NUMBERED_UNNUMBERED ();
328                                 FDI_SET (format - 1, FMTDIR_ERROR);
329                                 goto bad_format;
330                               }
331 
332                             if (numbered_allocated == spec.numbered_arg_count)
333                               {
334                                 numbered_allocated = 2 * numbered_allocated + 1;
335                                 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
336                               }
337                             spec.numbered[spec.numbered_arg_count].number = precision_number;
338                             spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
339                             spec.numbered_arg_count++;
340                           }
341                         else
342                           {
343                             /* Unnumbered argument.  */
344 
345                             /* Numbered and unnumbered specifications are
346                                exclusive.  */
347                             if (spec.numbered_arg_count > 0)
348                               {
349                                 *invalid_reason =
350                                   INVALID_MIXES_NUMBERED_UNNUMBERED ();
351                                 FDI_SET (format - 1, FMTDIR_ERROR);
352                                 goto bad_format;
353                               }
354 
355                             if (numbered_allocated == unnumbered_arg_count)
356                               {
357                                 numbered_allocated = 2 * numbered_allocated + 1;
358                                 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated  * sizeof (struct numbered_arg));
359                               }
360                             spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
361                             spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
362                             unnumbered_arg_count++;
363                           }
364                       }
365                     else if (isdigit (*format))
366                       {
367                         do format++; while (isdigit (*format));
368                       }
369                   }
370 
371                 /* Parse size.  */
372                 for (;;)
373                   {
374                     if (*format == 'h' || *format == 'l' || *format == 'L')
375                       format++;
376                     else
377                       break;
378                   }
379 
380                 switch (*format++)
381                   {
382                   case 'c': case 'C':
383                     type = FAT_CHAR;
384                     break;
385                   case 's': case 'S':
386                     type = FAT_ANY;
387                     break;
388                   case 'i': case 'd': case 'o': case 'u': case 'x': case 'X':
389                     type = FAT_INTEGER;
390                     break;
391                   case 'e': case 'E': case 'f': case 'g': case 'G':
392                     type = FAT_DOUBLE;
393                     break;
394                   case 'p':
395                     type = FAT_POINTER;
396                     break;
397                   case 't':
398                     type = FAT_NONE;
399                     break;
400                   case 'T':
401                     if (*format == '\0')
402                       {
403                         *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
404                         FDI_SET (format - 1, FMTDIR_ERROR);
405                         goto bad_format;
406                       }
407                     format++;
408                     type = FAT_NONE;
409                     break;
410                   case 'n':
411                     type = FAT_NONE;
412                     break;
413                   case '|':
414                     if (brackets)
415                       {
416                         --format;
417                         type = FAT_ANY;
418                         break;
419                       }
420                     /*FALLTHROUGH*/
421                   default:
422                     --format;
423                     if (*format == '\0')
424                       {
425                         *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
426                         FDI_SET (format - 1, FMTDIR_ERROR);
427                       }
428                     else
429                       {
430                         *invalid_reason =
431                           INVALID_CONVERSION_SPECIFIER (spec.directives,
432                                                         *format);
433                         FDI_SET (format, FMTDIR_ERROR);
434                       }
435                     goto bad_format;
436                   }
437                 if (brackets)
438                   {
439                     if (*format != '|')
440                       {
441                         if (*format == '\0')
442                           {
443                             *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
444                             FDI_SET (format - 1, FMTDIR_ERROR);
445                           }
446                         else
447                           {
448                             *invalid_reason =
449                               xasprintf (_("The directive number %u starts with | but does not end with |."),
450                                          spec.directives);
451                             FDI_SET (format, FMTDIR_ERROR);
452                           }
453                         goto bad_format;
454                       }
455                     format++;
456                   }
457               }
458 
459             if (type != FAT_NONE)
460               {
461                 if (number)
462                   {
463                     /* Numbered argument.  */
464 
465                     /* Numbered and unnumbered specifications are exclusive.  */
466                     if (unnumbered_arg_count > 0)
467                       {
468                         *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
469                         FDI_SET (format - 1, FMTDIR_ERROR);
470                         goto bad_format;
471                       }
472 
473                     if (numbered_allocated == spec.numbered_arg_count)
474                       {
475                         numbered_allocated = 2 * numbered_allocated + 1;
476                         spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
477                       }
478                     spec.numbered[spec.numbered_arg_count].number = number;
479                     spec.numbered[spec.numbered_arg_count].type = type;
480                     spec.numbered_arg_count++;
481                   }
482                 else
483                   {
484                     /* Unnumbered argument.  */
485 
486                     /* Numbered and unnumbered specifications are exclusive.  */
487                     if (spec.numbered_arg_count > 0)
488                       {
489                         *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
490                         FDI_SET (format - 1, FMTDIR_ERROR);
491                         goto bad_format;
492                       }
493 
494                     if (numbered_allocated == unnumbered_arg_count)
495                       {
496                         numbered_allocated = 2 * numbered_allocated + 1;
497                         spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
498                       }
499                     spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
500                     spec.numbered[unnumbered_arg_count].type = type;
501                     unnumbered_arg_count++;
502                   }
503               }
504           }
505 
506         FDI_SET (format - 1, FMTDIR_END);
507       }
508 
509   /* Convert the unnumbered argument array to numbered arguments.  */
510   if (unnumbered_arg_count > 0)
511     spec.numbered_arg_count = unnumbered_arg_count;
512   /* Sort the numbered argument array, and eliminate duplicates.  */
513   else if (spec.numbered_arg_count > 1)
514     {
515       unsigned int i, j;
516       bool err;
517 
518       qsort (spec.numbered, spec.numbered_arg_count,
519              sizeof (struct numbered_arg), numbered_arg_compare);
520 
521       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
522       err = false;
523       for (i = j = 0; i < spec.numbered_arg_count; i++)
524         if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
525           {
526             enum format_arg_type type1 = spec.numbered[i].type;
527             enum format_arg_type type2 = spec.numbered[j-1].type;
528             enum format_arg_type type_both;
529 
530             if (type1 == type2 || type2 == FAT_ANY)
531               type_both = type1;
532             else if (type1 == FAT_ANY)
533               type_both = type2;
534             else
535               {
536                 /* Incompatible types.  */
537                 type_both = FAT_NONE;
538                 if (!err)
539                   *invalid_reason =
540                     INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
541                 err = true;
542               }
543 
544             spec.numbered[j-1].type = type_both;
545           }
546         else
547           {
548             if (j < i)
549               {
550                 spec.numbered[j].number = spec.numbered[i].number;
551                 spec.numbered[j].type = spec.numbered[i].type;
552               }
553             j++;
554           }
555       spec.numbered_arg_count = j;
556       if (err)
557         /* *invalid_reason has already been set above.  */
558         goto bad_format;
559     }
560 
561   result = XMALLOC (struct spec);
562   *result = spec;
563   return result;
564 
565  bad_format:
566   if (spec.numbered != NULL)
567     free (spec.numbered);
568   return NULL;
569 }
570 
571 static void
format_free(void * descr)572 format_free (void *descr)
573 {
574   struct spec *spec = (struct spec *) descr;
575 
576   if (spec->numbered != NULL)
577     free (spec->numbered);
578   free (spec);
579 }
580 
581 static int
format_get_number_of_directives(void * descr)582 format_get_number_of_directives (void *descr)
583 {
584   struct spec *spec = (struct spec *) descr;
585 
586   return spec->directives;
587 }
588 
589 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)590 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
591               formatstring_error_logger_t error_logger,
592               const char *pretty_msgid, const char *pretty_msgstr)
593 {
594   struct spec *spec1 = (struct spec *) msgid_descr;
595   struct spec *spec2 = (struct spec *) msgstr_descr;
596   bool err = false;
597 
598   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
599     {
600       unsigned int i, j;
601       unsigned int n1 = spec1->numbered_arg_count;
602       unsigned int n2 = spec2->numbered_arg_count;
603 
604       /* Check the argument names are the same.
605          Both arrays are sorted.  We search for the first difference.  */
606       for (i = 0, j = 0; i < n1 || j < n2; )
607         {
608           int cmp = (i >= n1 ? 1 :
609                      j >= n2 ? -1 :
610                      spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
611                      spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
612                      0);
613 
614           if (cmp > 0)
615             {
616               if (error_logger)
617                 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
618                               spec2->numbered[j].number, pretty_msgstr,
619                               pretty_msgid);
620               err = true;
621               break;
622             }
623           else if (cmp < 0)
624             {
625               if (equality)
626                 {
627                   if (error_logger)
628                     error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
629                                   spec1->numbered[i].number, pretty_msgstr);
630                   err = true;
631                   break;
632                 }
633               else
634                 i++;
635             }
636           else
637             j++, i++;
638         }
639       /* Check the argument types are the same.  */
640       if (!err)
641         for (i = 0, j = 0; j < n2; )
642           {
643             if (spec1->numbered[i].number == spec2->numbered[j].number)
644               {
645                 if (spec1->numbered[i].type != spec2->numbered[j].type)
646                   {
647                     if (error_logger)
648                       error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
649                                     pretty_msgid, pretty_msgstr,
650                                     spec2->numbered[j].number);
651                     err = true;
652                     break;
653                   }
654                 j++, i++;
655               }
656             else
657               i++;
658           }
659     }
660 
661   return err;
662 }
663 
664 
665 struct formatstring_parser formatstring_boost =
666 {
667   format_parse,
668   format_free,
669   format_get_number_of_directives,
670   NULL,
671   format_check
672 };
673 
674 
675 #ifdef TEST
676 
677 /* Test program: Print the argument list specification returned by
678    format_parse for strings read from standard input.  */
679 
680 #include <stdio.h>
681 
682 static void
format_print(void * descr)683 format_print (void *descr)
684 {
685   struct spec *spec = (struct spec *) descr;
686   unsigned int last;
687   unsigned int i;
688 
689   if (spec == NULL)
690     {
691       printf ("INVALID");
692       return;
693     }
694 
695   printf ("(");
696   last = 1;
697   for (i = 0; i < spec->numbered_arg_count; i++)
698     {
699       unsigned int number = spec->numbered[i].number;
700 
701       if (i > 0)
702         printf (" ");
703       if (number < last)
704         abort ();
705       for (; last < number; last++)
706         printf ("_ ");
707       switch (spec->numbered[i].type)
708         {
709         case FAT_INTEGER:
710           printf ("i");
711           break;
712         case FAT_DOUBLE:
713           printf ("f");
714           break;
715         case FAT_CHAR:
716           printf ("c");
717           break;
718         case FAT_POINTER:
719           printf ("p");
720           break;
721         case FAT_ANY:
722           printf ("*");
723           break;
724         default:
725           abort ();
726         }
727       last = number + 1;
728     }
729   printf (")");
730 }
731 
732 int
main()733 main ()
734 {
735   for (;;)
736     {
737       char *line = NULL;
738       size_t line_size = 0;
739       int line_len;
740       char *invalid_reason;
741       void *descr;
742 
743       line_len = getline (&line, &line_size, stdin);
744       if (line_len < 0)
745         break;
746       if (line_len > 0 && line[line_len - 1] == '\n')
747         line[--line_len] = '\0';
748 
749       invalid_reason = NULL;
750       descr = format_parse (line, false, NULL, &invalid_reason);
751 
752       format_print (descr);
753       printf ("\n");
754       if (descr == NULL)
755         printf ("%s\n", invalid_reason);
756 
757       free (invalid_reason);
758       free (line);
759     }
760 
761   return 0;
762 }
763 
764 /*
765  * For Emacs M-x compile
766  * Local Variables:
767  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-boost.c ../gnulib-lib/libgettextlib.la"
768  * End:
769  */
770 
771 #endif /* TEST */
772 
773