1 /* awk format strings.
2    Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2002.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 #include <stdbool.h>
23 #include <stdlib.h>
24 
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31 
32 #define _(str) gettext (str)
33 
34 /* awk format strings are described in the gawk-3.1 documentation and
35    implemented in gawk-3.1.0/builtin.c: format_tree().
36    A directive
37    - starts with '%' or '%m$' where m is a positive integer,
38    - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
39      each of which acts as a flag,
40    - is optionally followed by a width specification: '*' (reads an argument)
41      or '*m$' or a nonempty digit sequence,
42    - is optionally followed by '.' and a precision specification: '*' (reads
43      an argument) or '*m$' or a nonempty digit sequence,
44    - is finished by a specifier
45        - '%', that needs no argument,
46        - 'c', that need a character argument,
47        - 's', that need a string argument,
48        - 'i', 'd', that need a signed integer argument,
49        - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
50        - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
51    Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
52    be used in the same string.
53  */
54 
55 enum format_arg_type
56 {
57   FAT_NONE,
58   FAT_CHARACTER,
59   FAT_STRING,
60   FAT_INTEGER,
61   FAT_UNSIGNED_INTEGER,
62   FAT_FLOAT
63 };
64 
65 struct numbered_arg
66 {
67   unsigned int number;
68   enum format_arg_type type;
69 };
70 
71 struct spec
72 {
73   unsigned int directives;
74   unsigned int numbered_arg_count;
75   struct numbered_arg *numbered;
76 };
77 
78 /* Locale independent test for a decimal digit.
79    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
80    <ctype.h> isdigit must be an 'unsigned char'.)  */
81 #undef isdigit
82 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
83 
84 
85 static int
numbered_arg_compare(const void * p1,const void * p2)86 numbered_arg_compare (const void *p1, const void *p2)
87 {
88   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
89   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
90 
91   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
92 }
93 
94 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)95 format_parse (const char *format, bool translated, char *fdi,
96               char **invalid_reason)
97 {
98   const char *const format_start = format;
99   struct spec spec;
100   unsigned int numbered_allocated;
101   unsigned int unnumbered_arg_count;
102   struct spec *result;
103 
104   spec.directives = 0;
105   spec.numbered_arg_count = 0;
106   spec.numbered = NULL;
107   numbered_allocated = 0;
108   unnumbered_arg_count = 0;
109 
110   for (; *format != '\0';)
111     if (*format++ == '%')
112       {
113         /* A directive.  */
114         unsigned int number = 0;
115         enum format_arg_type type;
116 
117         FDI_SET (format - 1, FMTDIR_START);
118         spec.directives++;
119 
120         if (isdigit (*format))
121           {
122             const char *f = format;
123             unsigned int m = 0;
124 
125             do
126               {
127                 m = 10 * m + (*f - '0');
128                 f++;
129               }
130             while (isdigit (*f));
131 
132             if (*f == '$')
133               {
134                 if (m == 0)
135                   {
136                     *invalid_reason = INVALID_ARGNO_0 (spec.directives);
137                     FDI_SET (f, FMTDIR_ERROR);
138                     goto bad_format;
139                   }
140                 number = m;
141                 format = ++f;
142               }
143           }
144 
145         /* Parse flags.  */
146         while (*format == ' ' || *format == '+' || *format == '-'
147                || *format == '#' || *format == '0')
148           format++;
149 
150         /* Parse width.  */
151         if (*format == '*')
152           {
153             unsigned int width_number = 0;
154 
155             format++;
156 
157             if (isdigit (*format))
158               {
159                 const char *f = format;
160                 unsigned int m = 0;
161 
162                 do
163                   {
164                     m = 10 * m + (*f - '0');
165                     f++;
166                   }
167                 while (isdigit (*f));
168 
169                 if (*f == '$')
170                   {
171                     if (m == 0)
172                       {
173                         *invalid_reason =
174                           INVALID_WIDTH_ARGNO_0 (spec.directives);
175                         FDI_SET (f, FMTDIR_ERROR);
176                         goto bad_format;
177                       }
178                     width_number = m;
179                     format = ++f;
180                   }
181               }
182 
183             if (width_number)
184               {
185                 /* Numbered argument.  */
186 
187                 /* Numbered and unnumbered specifications are exclusive.  */
188                 if (unnumbered_arg_count > 0)
189                   {
190                     *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
191                     FDI_SET (format - 1, FMTDIR_ERROR);
192                     goto bad_format;
193                   }
194 
195                 if (numbered_allocated == spec.numbered_arg_count)
196                   {
197                     numbered_allocated = 2 * numbered_allocated + 1;
198                     spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
199                   }
200                 spec.numbered[spec.numbered_arg_count].number = width_number;
201                 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
202                 spec.numbered_arg_count++;
203               }
204             else
205               {
206                 /* Unnumbered argument.  */
207 
208                 /* Numbered and unnumbered specifications are exclusive.  */
209                 if (spec.numbered_arg_count > 0)
210                   {
211                     *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
212                     FDI_SET (format - 1, FMTDIR_ERROR);
213                     goto bad_format;
214                   }
215 
216                 if (numbered_allocated == unnumbered_arg_count)
217                   {
218                     numbered_allocated = 2 * numbered_allocated + 1;
219                     spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
220                   }
221                 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
222                 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
223                 unnumbered_arg_count++;
224               }
225           }
226         else if (isdigit (*format))
227           {
228             do format++; while (isdigit (*format));
229           }
230 
231         /* Parse precision.  */
232         if (*format == '.')
233           {
234             format++;
235 
236             if (*format == '*')
237               {
238                 unsigned int precision_number = 0;
239 
240                 format++;
241 
242                 if (isdigit (*format))
243                   {
244                     const char *f = format;
245                     unsigned int m = 0;
246 
247                     do
248                       {
249                         m = 10 * m + (*f - '0');
250                         f++;
251                       }
252                     while (isdigit (*f));
253 
254                     if (*f == '$')
255                       {
256                         if (m == 0)
257                           {
258                             *invalid_reason =
259                               INVALID_PRECISION_ARGNO_0 (spec.directives);
260                             FDI_SET (f, FMTDIR_ERROR);
261                             goto bad_format;
262                           }
263                         precision_number = m;
264                         format = ++f;
265                       }
266                   }
267 
268                 if (precision_number)
269                   {
270                     /* Numbered argument.  */
271 
272                     /* Numbered and unnumbered specifications are exclusive.  */
273                     if (unnumbered_arg_count > 0)
274                       {
275                         *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
276                         FDI_SET (format - 1, FMTDIR_ERROR);
277                         goto bad_format;
278                       }
279 
280                     if (numbered_allocated == spec.numbered_arg_count)
281                       {
282                         numbered_allocated = 2 * numbered_allocated + 1;
283                         spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
284                       }
285                     spec.numbered[spec.numbered_arg_count].number = precision_number;
286                     spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
287                     spec.numbered_arg_count++;
288                   }
289                 else
290                   {
291                     /* Unnumbered argument.  */
292 
293                     /* Numbered and unnumbered specifications are exclusive.  */
294                     if (spec.numbered_arg_count > 0)
295                       {
296                         *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
297                         FDI_SET (format - 1, FMTDIR_ERROR);
298                         goto bad_format;
299                       }
300 
301                     if (numbered_allocated == unnumbered_arg_count)
302                       {
303                         numbered_allocated = 2 * numbered_allocated + 1;
304                         spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
305                       }
306                     spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
307                     spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
308                     unnumbered_arg_count++;
309                   }
310               }
311             else if (isdigit (*format))
312               {
313                 do format++; while (isdigit (*format));
314               }
315           }
316 
317         switch (*format)
318           {
319           case '%':
320             type = FAT_NONE;
321             break;
322           case 'c':
323             type = FAT_CHARACTER;
324             break;
325           case 's':
326             type = FAT_STRING;
327             break;
328           case 'i': case 'd':
329             type = FAT_INTEGER;
330             break;
331           case 'u': case 'o': case 'x': case 'X':
332             type = FAT_UNSIGNED_INTEGER;
333             break;
334           case 'e': case 'E': case 'f': case 'g': case 'G':
335             type = FAT_FLOAT;
336             break;
337           default:
338             if (*format == '\0')
339               {
340                 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
341                 FDI_SET (format - 1, FMTDIR_ERROR);
342               }
343             else
344               {
345                 *invalid_reason =
346                   INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
347                 FDI_SET (format, FMTDIR_ERROR);
348               }
349             goto bad_format;
350           }
351 
352         if (type != FAT_NONE)
353           {
354             if (number)
355               {
356                 /* Numbered argument.  */
357 
358                 /* Numbered and unnumbered specifications are exclusive.  */
359                 if (unnumbered_arg_count > 0)
360                   {
361                     *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
362                     FDI_SET (format, FMTDIR_ERROR);
363                     goto bad_format;
364                   }
365 
366                 if (numbered_allocated == spec.numbered_arg_count)
367                   {
368                     numbered_allocated = 2 * numbered_allocated + 1;
369                     spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
370                   }
371                 spec.numbered[spec.numbered_arg_count].number = number;
372                 spec.numbered[spec.numbered_arg_count].type = type;
373                 spec.numbered_arg_count++;
374               }
375             else
376               {
377                 /* Unnumbered argument.  */
378 
379                 /* Numbered and unnumbered specifications are exclusive.  */
380                 if (spec.numbered_arg_count > 0)
381                   {
382                     *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
383                     FDI_SET (format, FMTDIR_ERROR);
384                     goto bad_format;
385                   }
386 
387                 if (numbered_allocated == unnumbered_arg_count)
388                   {
389                     numbered_allocated = 2 * numbered_allocated + 1;
390                     spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
391                   }
392                 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
393                 spec.numbered[unnumbered_arg_count].type = type;
394                 unnumbered_arg_count++;
395               }
396           }
397 
398         FDI_SET (format, FMTDIR_END);
399 
400         format++;
401       }
402 
403   /* Convert the unnumbered argument array to numbered arguments.  */
404   if (unnumbered_arg_count > 0)
405     spec.numbered_arg_count = unnumbered_arg_count;
406   /* Sort the numbered argument array, and eliminate duplicates.  */
407   else if (spec.numbered_arg_count > 1)
408     {
409       unsigned int i, j;
410       bool err;
411 
412       qsort (spec.numbered, spec.numbered_arg_count,
413              sizeof (struct numbered_arg), numbered_arg_compare);
414 
415       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
416       err = false;
417       for (i = j = 0; i < spec.numbered_arg_count; i++)
418         if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
419           {
420             enum format_arg_type type1 = spec.numbered[i].type;
421             enum format_arg_type type2 = spec.numbered[j-1].type;
422             enum format_arg_type type_both;
423 
424             if (type1 == type2)
425               type_both = type1;
426             else
427               {
428                 /* Incompatible types.  */
429                 type_both = FAT_NONE;
430                 if (!err)
431                   *invalid_reason =
432                     INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
433                 err = true;
434               }
435 
436             spec.numbered[j-1].type = type_both;
437           }
438         else
439           {
440             if (j < i)
441               {
442                 spec.numbered[j].number = spec.numbered[i].number;
443                 spec.numbered[j].type = spec.numbered[i].type;
444               }
445             j++;
446           }
447       spec.numbered_arg_count = j;
448       if (err)
449         /* *invalid_reason has already been set above.  */
450         goto bad_format;
451     }
452 
453   result = XMALLOC (struct spec);
454   *result = spec;
455   return result;
456 
457  bad_format:
458   if (spec.numbered != NULL)
459     free (spec.numbered);
460   return NULL;
461 }
462 
463 static void
format_free(void * descr)464 format_free (void *descr)
465 {
466   struct spec *spec = (struct spec *) descr;
467 
468   if (spec->numbered != NULL)
469     free (spec->numbered);
470   free (spec);
471 }
472 
473 static int
format_get_number_of_directives(void * descr)474 format_get_number_of_directives (void *descr)
475 {
476   struct spec *spec = (struct spec *) descr;
477 
478   return spec->directives;
479 }
480 
481 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)482 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
483               formatstring_error_logger_t error_logger,
484               const char *pretty_msgid, const char *pretty_msgstr)
485 {
486   struct spec *spec1 = (struct spec *) msgid_descr;
487   struct spec *spec2 = (struct spec *) msgstr_descr;
488   bool err = false;
489 
490   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
491     {
492       unsigned int i, j;
493       unsigned int n1 = spec1->numbered_arg_count;
494       unsigned int n2 = spec2->numbered_arg_count;
495 
496       /* Check the argument names are the same.
497          Both arrays are sorted.  We search for the first difference.  */
498       for (i = 0, j = 0; i < n1 || j < n2; )
499         {
500           int cmp = (i >= n1 ? 1 :
501                      j >= n2 ? -1 :
502                      spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
503                      spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
504                      0);
505 
506           if (cmp > 0)
507             {
508               if (error_logger)
509                 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
510                               spec2->numbered[j].number, pretty_msgstr,
511                               pretty_msgid);
512               err = true;
513               break;
514             }
515           else if (cmp < 0)
516             {
517               if (equality)
518                 {
519                   if (error_logger)
520                     error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
521                                   spec1->numbered[i].number, pretty_msgstr);
522                   err = true;
523                   break;
524                 }
525               else
526                 i++;
527             }
528           else
529             j++, i++;
530         }
531       /* Check the argument types are the same.  */
532       if (!err)
533         for (i = 0, j = 0; j < n2; )
534           {
535             if (spec1->numbered[i].number == spec2->numbered[j].number)
536               {
537                 if (spec1->numbered[i].type != spec2->numbered[j].type)
538                   {
539                     if (error_logger)
540                       error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
541                                     pretty_msgid, pretty_msgstr,
542                                     spec2->numbered[j].number);
543                     err = true;
544                     break;
545                   }
546                 j++, i++;
547               }
548             else
549               i++;
550           }
551     }
552 
553   return err;
554 }
555 
556 
557 struct formatstring_parser formatstring_awk =
558 {
559   format_parse,
560   format_free,
561   format_get_number_of_directives,
562   NULL,
563   format_check
564 };
565 
566 
567 #ifdef TEST
568 
569 /* Test program: Print the argument list specification returned by
570    format_parse for strings read from standard input.  */
571 
572 #include <stdio.h>
573 
574 static void
format_print(void * descr)575 format_print (void *descr)
576 {
577   struct spec *spec = (struct spec *) descr;
578   unsigned int last;
579   unsigned int i;
580 
581   if (spec == NULL)
582     {
583       printf ("INVALID");
584       return;
585     }
586 
587   printf ("(");
588   last = 1;
589   for (i = 0; i < spec->numbered_arg_count; i++)
590     {
591       unsigned int number = spec->numbered[i].number;
592 
593       if (i > 0)
594         printf (" ");
595       if (number < last)
596         abort ();
597       for (; last < number; last++)
598         printf ("_ ");
599       switch (spec->numbered[i].type)
600         {
601         case FAT_CHARACTER:
602           printf ("c");
603           break;
604         case FAT_STRING:
605           printf ("s");
606           break;
607         case FAT_INTEGER:
608           printf ("i");
609           break;
610         case FAT_UNSIGNED_INTEGER:
611           printf ("[unsigned]i");
612           break;
613         case FAT_FLOAT:
614           printf ("f");
615           break;
616         default:
617           abort ();
618         }
619       last = number + 1;
620     }
621   printf (")");
622 }
623 
624 int
main()625 main ()
626 {
627   for (;;)
628     {
629       char *line = NULL;
630       size_t line_size = 0;
631       int line_len;
632       char *invalid_reason;
633       void *descr;
634 
635       line_len = getline (&line, &line_size, stdin);
636       if (line_len < 0)
637         break;
638       if (line_len > 0 && line[line_len - 1] == '\n')
639         line[--line_len] = '\0';
640 
641       invalid_reason = NULL;
642       descr = format_parse (line, false, NULL, &invalid_reason);
643 
644       format_print (descr);
645       printf ("\n");
646       if (descr == NULL)
647         printf ("%s\n", invalid_reason);
648 
649       free (invalid_reason);
650       free (line);
651     }
652 
653   return 0;
654 }
655 
656 /*
657  * For Emacs M-x compile
658  * Local Variables:
659  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-awk.c ../gnulib-lib/libgettextlib.la"
660  * End:
661  */
662 
663 #endif /* TEST */
664