1 /* Object Pascal format strings.
2    Copyright (C) 2001-2004, 2006-2007, 2009-2010, 2018-2020 Free Software
3    Foundation, Inc.
4    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* Object Pascal format strings are usable with the "format" function in the
36    "sysutils" unit.  They are described in
37    <https://www.freepascal.org/docs-html/rtl/sysutils/format.html>
38    and are implemented in fpc-2.4.0/rtl/objpas/sysutils/sysformt.inc.
39    Another implementation exists in Borland Delphi.  The GNU Pascal's
40    "sysutils" doesn't (yet?) have the "format" function.
41 
42    A directive
43    - starts with '%',
44    - either
45      - is finished with '%', or
46      - - is optionally followed by an index specification: '*' (reads an
47          argument, must be of type integer) or a nonempty digit sequence
48          or nothing (equivalent to 0), followed by ':',
49        - is optionally followed by '-', which acts as a flag,
50        - is optionally followed by a width specification: '*' (reads an
51          argument, must be of type integer) or a nonempty digit sequence,
52        - is optionally followed by '.' and a precision specification: '*'
53          (reads an argument, must be of type integer) or a nonempty digit
54          sequence,
55        - is finished by a case-insensitive specifier. If no index was
56          specified, it reads an argument; otherwise is uses the index-th
57          argument, 0-based.
58          - 'd', 'u', 'x', needs an 'integer' or 'int64' or 'qword' argument,
59          - 'e', 'f', 'g', 'n', 'm', need an 'extended' or 'currency' floating-
60            point argument,
61          - 's', needs a 'string', 'char', 'pchar', 'widestring', 'widechar',
62            'pwidechar' or 'ansistring' argument,
63          - 'p', needs a 'pointer' argument.
64    Numbered and unnumbered argument specifications can be used in the same
65    string.  Numbered argument specifications have no influence on the
66    "current argument index", that is incremented each time an argument is read.
67  */
68 
69 enum format_arg_type
70 {
71   FAT_INTEGER,         /* integer, int64, qword */
72   FAT_FLOAT,           /* extended, currency */
73   FAT_STRING,          /* string, char, pchar, widestring, widechar, pwidechar,
74                           ansistring */
75   FAT_POINTER
76 };
77 
78 struct numbered_arg
79 {
80   unsigned int number;
81   enum format_arg_type type;
82 };
83 
84 struct spec
85 {
86   unsigned int directives;
87   unsigned int numbered_arg_count;
88   struct numbered_arg *numbered;
89 };
90 
91 /* Locale independent test for a decimal digit.
92    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
93    <ctype.h> isdigit must be an 'unsigned char'.)  */
94 #undef isdigit
95 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
96 
97 
98 static int
numbered_arg_compare(const void * p1,const void * p2)99 numbered_arg_compare (const void *p1, const void *p2)
100 {
101   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
102   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
103 
104   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
105 }
106 
107 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)108 format_parse (const char *format, bool translated, char *fdi,
109               char **invalid_reason)
110 {
111   const char *const format_start = format;
112   unsigned int directives;
113   unsigned int numbered_arg_count;
114   struct numbered_arg *numbered;
115   unsigned int numbered_allocated;
116   unsigned int unnumbered_arg_count;
117   struct spec *result;
118 
119   enum arg_index
120   {
121     index_numbered,     /* index given by a fixed integer */
122     index_unnumbered,   /* index given by unnumbered_arg_count++ */
123     index_unknown       /* index is only known at run time */
124   };
125 
126   directives = 0;
127   numbered_arg_count = 0;
128   numbered = NULL;
129   numbered_allocated = 0;
130   unnumbered_arg_count = 0;
131 
132   for (; *format != '\0';)
133     if (*format++ == '%')
134       {
135         /* A directive.  */
136         FDI_SET (format - 1, FMTDIR_START);
137         directives++;
138 
139         if (*format != '%')
140           {
141             /* A complex directive.  */
142             enum arg_index main_arg = index_unnumbered;
143             unsigned int main_number = 0;
144             enum format_arg_type type;
145 
146             if (isdigit (*format) || *format == ':')
147               {
148                 const char *f = format;
149                 unsigned int m = 0;
150 
151                 while (isdigit (*f))
152                   {
153                     m = 10 * m + (*f - '0');
154                     f++;
155                   }
156 
157                 if (*f == ':')
158                   {
159                     main_number = m;
160                     main_arg = index_numbered;
161                     format = ++f;
162                   }
163               }
164             else if (*format == '*')
165               {
166                 if (format[1] == ':')
167                   {
168                     main_arg = index_unknown;
169                     format += 2;
170                   }
171               }
172 
173             /* Parse flags.  */
174             if (*format == '-')
175               format++;
176 
177             /* Parse width.  */
178             if (isdigit (*format))
179               {
180                 do
181                   format++;
182                 while (isdigit (*format));
183               }
184             else if (*format == '*')
185               {
186                 /* Unnumbered argument of type FAT_INTEGER.   */
187                 if (numbered_allocated == numbered_arg_count)
188                   {
189                     numbered_allocated = 2 * numbered_allocated + 1;
190                     numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
191                   }
192                 numbered[numbered_arg_count].number = unnumbered_arg_count;
193                 numbered[numbered_arg_count].type = FAT_INTEGER;
194                 numbered_arg_count++;
195                 unnumbered_arg_count++;
196 
197                 format++;
198               }
199 
200             /* Parse precision.  */
201             if (*format == '.')
202               {
203                 format++;
204 
205                 if (isdigit (*format))
206                   {
207                     do
208                       format++;
209                     while (isdigit (*format));
210                   }
211                 else if (*format == '*')
212                   {
213                     /* Unnumbered argument of type FAT_INTEGER.   */
214                     if (numbered_allocated == unnumbered_arg_count)
215                       {
216                         numbered_allocated = 2 * numbered_allocated + 1;
217                         numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
218                       }
219                     numbered[numbered_arg_count].number = unnumbered_arg_count;
220                     numbered[numbered_arg_count].type = FAT_INTEGER;
221                     numbered_arg_count++;
222                     unnumbered_arg_count++;
223 
224                     format++;
225                   }
226                 else
227                   --format;     /* will jump to bad_format */
228               }
229 
230             switch (c_tolower (*format))
231               {
232               case 'd': case 'u': case 'x':
233                 type = FAT_INTEGER;
234                 break;
235               case 'e': case 'f': case 'g': case 'n': case 'm':
236                 type = FAT_FLOAT;
237                 break;
238               case 's':
239                 type = FAT_STRING;
240                 break;
241               case 'p':
242                 type = FAT_POINTER;
243                 break;
244               default:
245                 if (*format == '\0')
246                   {
247                     *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
248                     FDI_SET (format - 1, FMTDIR_ERROR);
249                   }
250                 else
251                   {
252                     *invalid_reason =
253                       INVALID_CONVERSION_SPECIFIER (directives, *format);
254                     FDI_SET (format, FMTDIR_ERROR);
255                   }
256                 goto bad_format;
257               }
258 
259             if (numbered_allocated == numbered_arg_count)
260               {
261                 numbered_allocated = 2 * numbered_allocated + 1;
262                 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
263               }
264             switch (main_arg)
265               {
266               case index_unnumbered:
267                 numbered[numbered_arg_count].number = unnumbered_arg_count;
268                 numbered[numbered_arg_count].type = type;
269                 unnumbered_arg_count++;
270                 break;
271               case index_numbered:
272                 numbered[numbered_arg_count].number = main_number;
273                 numbered[numbered_arg_count].type = type;
274                 break;
275               case index_unknown:
276                 numbered[numbered_arg_count].number = unnumbered_arg_count;
277                 numbered[numbered_arg_count].type = FAT_INTEGER;
278                 unnumbered_arg_count++;
279                 break;
280               default:
281                 abort ();
282               }
283             numbered_arg_count++;
284           }
285 
286         FDI_SET (format, FMTDIR_END);
287 
288         format++;
289       }
290 
291   /* Sort the numbered argument array, and eliminate duplicates.  */
292   if (numbered_arg_count > 1)
293     {
294       unsigned int i, j;
295       bool err;
296 
297       qsort (numbered, numbered_arg_count,
298              sizeof (struct numbered_arg), numbered_arg_compare);
299 
300       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
301       err = false;
302       for (i = j = 0; i < numbered_arg_count; i++)
303         if (j > 0 && numbered[i].number == numbered[j-1].number)
304           {
305             enum format_arg_type type1 = numbered[i].type;
306             enum format_arg_type type2 = numbered[j-1].type;
307             enum format_arg_type type_both;
308 
309             if (type1 == type2)
310               type_both = type1;
311             else
312               {
313                 /* Incompatible types.  */
314                 type_both = type1;
315                 if (!err)
316                   *invalid_reason =
317                     INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
318                 err = true;
319               }
320 
321             numbered[j-1].type = type_both;
322           }
323         else
324           {
325             if (j < i)
326               {
327                 numbered[j].number = numbered[i].number;
328                 numbered[j].type = numbered[i].type;
329               }
330             j++;
331           }
332       numbered_arg_count = j;
333       if (err)
334         /* *invalid_reason has already been set above.  */
335         goto bad_format;
336     }
337 
338   result = XMALLOC (struct spec);
339   result->directives = directives;
340   result->numbered_arg_count = numbered_arg_count;
341   result->numbered = numbered;
342   return result;
343 
344  bad_format:
345   if (numbered != NULL)
346     free (numbered);
347   return NULL;
348 }
349 
350 static void
format_free(void * descr)351 format_free (void *descr)
352 {
353   struct spec *spec = (struct spec *) descr;
354 
355   if (spec->numbered != NULL)
356     free (spec->numbered);
357   free (spec);
358 }
359 
360 static int
format_get_number_of_directives(void * descr)361 format_get_number_of_directives (void *descr)
362 {
363   struct spec *spec = (struct spec *) descr;
364 
365   return spec->directives;
366 }
367 
368 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)369 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
370               formatstring_error_logger_t error_logger,
371               const char *pretty_msgid, const char *pretty_msgstr)
372 {
373   struct spec *spec1 = (struct spec *) msgid_descr;
374   struct spec *spec2 = (struct spec *) msgstr_descr;
375   bool err = false;
376 
377   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
378     {
379       unsigned int i, j;
380       unsigned int n1 = spec1->numbered_arg_count;
381       unsigned int n2 = spec2->numbered_arg_count;
382 
383       /* Check the argument names are the same.
384          Both arrays are sorted.  We search for the first difference.  */
385       for (i = 0, j = 0; i < n1 || j < n2; )
386         {
387           int cmp = (i >= n1 ? 1 :
388                      j >= n2 ? -1 :
389                      spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
390                      spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
391                      0);
392 
393           if (cmp > 0)
394             {
395               if (error_logger)
396                 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
397                               spec2->numbered[j].number, pretty_msgstr,
398                               pretty_msgid);
399               err = true;
400               break;
401             }
402           else if (cmp < 0)
403             {
404               if (equality)
405                 {
406                   if (error_logger)
407                     error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
408                                   spec1->numbered[i].number, pretty_msgstr);
409                   err = true;
410                   break;
411                 }
412               else
413                 i++;
414             }
415           else
416             j++, i++;
417         }
418       /* Check the argument types are the same.  */
419       if (!err)
420         for (i = 0, j = 0; j < n2; )
421           {
422             if (spec1->numbered[i].number == spec2->numbered[j].number)
423               {
424                 if (spec1->numbered[i].type != spec2->numbered[j].type)
425                   {
426                     if (error_logger)
427                       error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
428                                     pretty_msgid, pretty_msgstr,
429                                     spec2->numbered[j].number);
430                     err = true;
431                     break;
432                   }
433                 j++, i++;
434               }
435             else
436               i++;
437           }
438     }
439 
440   return err;
441 }
442 
443 
444 struct formatstring_parser formatstring_pascal =
445 {
446   format_parse,
447   format_free,
448   format_get_number_of_directives,
449   NULL,
450   format_check
451 };
452 
453 
454 #ifdef TEST
455 
456 /* Test program: Print the argument list specification returned by
457    format_parse for strings read from standard input.  */
458 
459 #include <stdio.h>
460 
461 static void
format_print(void * descr)462 format_print (void *descr)
463 {
464   struct spec *spec = (struct spec *) descr;
465   unsigned int last;
466   unsigned int i;
467 
468   if (spec == NULL)
469     {
470       printf ("INVALID");
471       return;
472     }
473 
474   printf ("(");
475   last = 0;
476   for (i = 0; i < spec->numbered_arg_count; i++)
477     {
478       unsigned int number = spec->numbered[i].number;
479 
480       if (i > 0)
481         printf (" ");
482       if (number < last)
483         abort ();
484       for (; last < number; last++)
485         printf ("_ ");
486       switch (spec->numbered[i].type)
487         {
488         case FAT_INTEGER:
489           printf ("i");
490           break;
491         case FAT_FLOAT:
492           printf ("f");
493           break;
494         case FAT_STRING:
495           printf ("s");
496           break;
497         case FAT_POINTER:
498           printf ("p");
499           break;
500         default:
501           abort ();
502         }
503       last = number + 1;
504     }
505   printf (")");
506 }
507 
508 int
main()509 main ()
510 {
511   for (;;)
512     {
513       char *line = NULL;
514       size_t line_size = 0;
515       int line_len;
516       char *invalid_reason;
517       void *descr;
518 
519       line_len = getline (&line, &line_size, stdin);
520       if (line_len < 0)
521         break;
522       if (line_len > 0 && line[line_len - 1] == '\n')
523         line[--line_len] = '\0';
524 
525       invalid_reason = NULL;
526       descr = format_parse (line, false, NULL, &invalid_reason);
527 
528       format_print (descr);
529       printf ("\n");
530       if (descr == NULL)
531         printf ("%s\n", invalid_reason);
532 
533       free (invalid_reason);
534       free (line);
535     }
536 
537   return 0;
538 }
539 
540 /*
541  * For Emacs M-x compile
542  * Local Variables:
543  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-pascal.c ../gnulib-lib/libgettextlib.la"
544  * End:
545  */
546 
547 #endif /* TEST */
548