1 /* Python format strings.
2    Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "format.h"
28 #include "c-ctype.h"
29 #include "xalloc.h"
30 #include "xvasprintf.h"
31 #include "format-invalid.h"
32 #include "gettext.h"
33 
34 #define _(str) gettext (str)
35 
36 /* Python format strings are described in
37      Python Library reference
38      2. Built-in Types, Exceptions and Functions
39      2.1. Built-in Types
40      2.1.5. Sequence Types
41      2.1.5.2. String Formatting Operations
42    Any string or Unicode string can act as format string via the '%' operator,
43    implemented in stringobject.c and unicodeobject.c.
44    A directive
45    - starts with '%'
46    - is optionally followed by '(ident)' where ident is any sequence of
47      characters with balanced left and right parentheses,
48    - is optionally followed by any of the characters '-' (left justification),
49      '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
50      flag,
51    - is optionally followed by a width specification: '*' (reads an argument)
52      or a nonempty digit sequence,
53    - is optionally followed by '.' and a precision specification: '*' (reads
54      an argument) or a nonempty digit sequence,
55    - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
56    - is finished by a specifier
57        - '%', that needs no argument,
58        - 'c', that needs a character argument,
59        - 's', 'r', that need a string argument,
60        - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
61        - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
62    Use of '(ident)' and use of unnamed argument specifications are exclusive,
63    because the first requires a mapping as argument, while the second requires
64    a tuple as argument.
65  */
66 
67 enum format_arg_type
68 {
69   FAT_NONE,
70   FAT_ANY,
71   FAT_CHARACTER,
72   FAT_STRING,
73   FAT_INTEGER,
74   FAT_FLOAT
75 };
76 
77 struct named_arg
78 {
79   char *name;
80   enum format_arg_type type;
81 };
82 
83 struct unnamed_arg
84 {
85   enum format_arg_type type;
86 };
87 
88 struct spec
89 {
90   unsigned int directives;
91   unsigned int named_arg_count;
92   unsigned int unnamed_arg_count;
93   unsigned int allocated;
94   struct named_arg *named;
95   struct unnamed_arg *unnamed;
96 };
97 
98 /* Locale independent test for a decimal digit.
99    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
100    <ctype.h> isdigit must be an 'unsigned char'.)  */
101 #undef isdigit
102 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
103 
104 
105 static int
named_arg_compare(const void * p1,const void * p2)106 named_arg_compare (const void *p1, const void *p2)
107 {
108   return strcmp (((const struct named_arg *) p1)->name,
109 		 ((const struct named_arg *) p2)->name);
110 }
111 
112 #define INVALID_MIXES_NAMED_UNNAMED() \
113   xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
114 
115 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)116 format_parse (const char *format, bool translated, char **invalid_reason)
117 {
118   struct spec spec;
119   struct spec *result;
120 
121   spec.directives = 0;
122   spec.named_arg_count = 0;
123   spec.unnamed_arg_count = 0;
124   spec.allocated = 0;
125   spec.named = NULL;
126   spec.unnamed = NULL;
127 
128   for (; *format != '\0';)
129     if (*format++ == '%')
130       {
131 	/* A directive.  */
132 	char *name = NULL;
133 	enum format_arg_type type;
134 
135 	spec.directives++;
136 
137 	if (*format == '(')
138 	  {
139 	    unsigned int depth;
140 	    const char *name_start;
141 	    const char *name_end;
142 	    size_t n;
143 
144 	    name_start = ++format;
145 	    depth = 0;
146 	    for (; *format != '\0'; format++)
147 	      {
148 		if (*format == '(')
149 		  depth++;
150 		else if (*format == ')')
151 		  {
152 		    if (depth == 0)
153 		      break;
154 		    else
155 		      depth--;
156 		  }
157 	      }
158 	    if (*format == '\0')
159 	      {
160 		*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
161 		goto bad_format;
162 	      }
163 	    name_end = format++;
164 
165 	    n = name_end - name_start;
166 	    name = (char *) xmalloc (n + 1);
167 	    memcpy (name, name_start, n);
168 	    name[n] = '\0';
169 	  }
170 
171 	while (*format == '-' || *format == '+' || *format == ' '
172 	       || *format == '#' || *format == '0')
173 	  format++;
174 
175 	if (*format == '*')
176 	  {
177 	    format++;
178 
179 	    /* Named and unnamed specifications are exclusive.  */
180 	    if (spec.named_arg_count > 0)
181 	      {
182 		*invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
183 		goto bad_format;
184 	      }
185 
186 	    if (spec.allocated == spec.unnamed_arg_count)
187 	      {
188 		spec.allocated = 2 * spec.allocated + 1;
189 		spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
190 	      }
191 	    spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
192 	    spec.unnamed_arg_count++;
193 	  }
194 	else if (isdigit (*format))
195 	  {
196 	    do format++; while (isdigit (*format));
197 	  }
198 
199 	if (*format == '.')
200 	  {
201 	    format++;
202 
203 	    if (*format == '*')
204 	      {
205 		format++;
206 
207 		/* Named and unnamed specifications are exclusive.  */
208 		if (spec.named_arg_count > 0)
209 		  {
210 		    *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
211 		    goto bad_format;
212 		  }
213 
214 		if (spec.allocated == spec.unnamed_arg_count)
215 		  {
216 		    spec.allocated = 2 * spec.allocated + 1;
217 		    spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
218 		  }
219 		spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
220 		spec.unnamed_arg_count++;
221 	      }
222 	    else if (isdigit (*format))
223 	      {
224 		do format++; while (isdigit (*format));
225 	      }
226 	  }
227 
228 	if (*format == 'h' || *format == 'l' || *format == 'L')
229 	  format++;
230 
231 	switch (*format)
232 	  {
233 	  case '%':
234 	    type = FAT_ANY;
235 	    break;
236 	  case 'c':
237 	    type = FAT_CHARACTER;
238 	    break;
239 	  case 's': case 'r':
240 	    type = FAT_STRING;
241 	    break;
242 	  case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
243 	    type = FAT_INTEGER;
244 	    break;
245 	  case 'e': case 'E': case 'f': case 'g': case 'G':
246 	    type = FAT_FLOAT;
247 	    break;
248 	  default:
249 	    *invalid_reason =
250 	      (*format == '\0'
251 	       ? INVALID_UNTERMINATED_DIRECTIVE ()
252 	       : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
253 	    goto bad_format;
254 	  }
255 
256 	if (name != NULL)
257 	  {
258 	    /* Named argument.  */
259 
260 	    /* Named and unnamed specifications are exclusive.  */
261 	    if (spec.unnamed_arg_count > 0)
262 	      {
263 		*invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
264 		goto bad_format;
265 	      }
266 
267 	    if (spec.allocated == spec.named_arg_count)
268 	      {
269 		spec.allocated = 2 * spec.allocated + 1;
270 		spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
271 	      }
272 	    spec.named[spec.named_arg_count].name = name;
273 	    spec.named[spec.named_arg_count].type = type;
274 	    spec.named_arg_count++;
275 	  }
276 	else if (*format != '%')
277 	  {
278 	    /* Unnamed argument.  */
279 
280 	    /* Named and unnamed specifications are exclusive.  */
281 	    if (spec.named_arg_count > 0)
282 	      {
283 		*invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
284 		goto bad_format;
285 	      }
286 
287 	    if (spec.allocated == spec.unnamed_arg_count)
288 	      {
289 		spec.allocated = 2 * spec.allocated + 1;
290 		spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
291 	      }
292 	    spec.unnamed[spec.unnamed_arg_count].type = type;
293 	    spec.unnamed_arg_count++;
294 	  }
295 
296 	format++;
297       }
298 
299   /* Sort the named argument array, and eliminate duplicates.  */
300   if (spec.named_arg_count > 1)
301     {
302       unsigned int i, j;
303       bool err;
304 
305       qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
306 	     named_arg_compare);
307 
308       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
309       err = false;
310       for (i = j = 0; i < spec.named_arg_count; i++)
311 	if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
312 	  {
313 	    enum format_arg_type type1 = spec.named[i].type;
314 	    enum format_arg_type type2 = spec.named[j-1].type;
315 	    enum format_arg_type type_both;
316 
317 	    if (type1 == type2 || type2 == FAT_ANY)
318 	      type_both = type1;
319 	    else if (type1 == FAT_ANY)
320 	      type_both = type2;
321 	    else
322 	      {
323 		/* Incompatible types.  */
324 		type_both = FAT_NONE;
325 		if (!err)
326 		  *invalid_reason =
327 		    xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
328 		err = true;
329 	      }
330 
331 	    spec.named[j-1].type = type_both;
332 	    free (spec.named[i].name);
333 	  }
334 	else
335 	  {
336 	    if (j < i)
337 	      {
338 		spec.named[j].name = spec.named[i].name;
339 		spec.named[j].type = spec.named[i].type;
340 	      }
341 	    j++;
342 	  }
343       spec.named_arg_count = j;
344       if (err)
345 	/* *invalid_reason has already been set above.  */
346 	goto bad_format;
347     }
348 
349   result = (struct spec *) xmalloc (sizeof (struct spec));
350   *result = spec;
351   return result;
352 
353  bad_format:
354   if (spec.named != NULL)
355     {
356       unsigned int i;
357       for (i = 0; i < spec.named_arg_count; i++)
358 	free (spec.named[i].name);
359       free (spec.named);
360     }
361   if (spec.unnamed != NULL)
362     free (spec.unnamed);
363   return NULL;
364 }
365 
366 static void
format_free(void * descr)367 format_free (void *descr)
368 {
369   struct spec *spec = (struct spec *) descr;
370 
371   if (spec->named != NULL)
372     {
373       unsigned int i;
374       for (i = 0; i < spec->named_arg_count; i++)
375 	free (spec->named[i].name);
376       free (spec->named);
377     }
378   if (spec->unnamed != NULL)
379     free (spec->unnamed);
380   free (spec);
381 }
382 
383 static int
format_get_number_of_directives(void * descr)384 format_get_number_of_directives (void *descr)
385 {
386   struct spec *spec = (struct spec *) descr;
387 
388   return spec->directives;
389 }
390 
391 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)392 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
393 	      formatstring_error_logger_t error_logger,
394 	      const char *pretty_msgstr)
395 {
396   struct spec *spec1 = (struct spec *) msgid_descr;
397   struct spec *spec2 = (struct spec *) msgstr_descr;
398   bool err = false;
399 
400   if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
401     {
402       if (error_logger)
403 	error_logger (_("format specifications in 'msgid' expect a mapping, those in '%s' expect a tuple"),
404 		      pretty_msgstr);
405       err = true;
406     }
407   else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
408     {
409       if (error_logger)
410 	error_logger (_("format specifications in 'msgid' expect a tuple, those in '%s' expect a mapping"),
411 		      pretty_msgstr);
412       err = true;
413     }
414   else
415     {
416       if (spec1->named_arg_count + spec2->named_arg_count > 0)
417 	{
418 	  unsigned int i, j;
419 	  unsigned int n1 = spec1->named_arg_count;
420 	  unsigned int n2 = spec2->named_arg_count;
421 
422 	  /* Check the argument names are the same.
423 	     Both arrays are sorted.  We search for the first difference.  */
424 	  for (i = 0, j = 0; i < n1 || j < n2; )
425 	    {
426 	      int cmp = (i >= n1 ? 1 :
427 			 j >= n2 ? -1 :
428 			 strcmp (spec1->named[i].name, spec2->named[j].name));
429 
430 	      if (cmp > 0)
431 		{
432 		  if (error_logger)
433 		    error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"),
434 				  spec2->named[j].name, pretty_msgstr);
435 		  err = true;
436 		  break;
437 		}
438 	      else if (cmp < 0)
439 		{
440 		  if (equality)
441 		    {
442 		      if (error_logger)
443 			error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
444 				      spec1->named[i].name, pretty_msgstr);
445 		      err = true;
446 		      break;
447 		    }
448 		  else
449 		    i++;
450 		}
451 	      else
452 		j++, i++;
453 	    }
454 	  /* Check the argument types are the same.  */
455 	  if (!err)
456 	    for (i = 0, j = 0; j < n2; )
457 	      {
458 		if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
459 		  {
460 		    if (spec1->named[i].type != spec2->named[j].type)
461 		      {
462 			if (error_logger)
463 			  error_logger (_("format specifications in 'msgid' and '%s' for argument '%s' are not the same"),
464 					pretty_msgstr, spec2->named[j].name);
465 			err = true;
466 			break;
467 		      }
468 		    j++, i++;
469 		  }
470 		else
471 		  i++;
472 	      }
473 	}
474 
475       if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
476 	{
477 	  unsigned int i;
478 
479 	  /* Check the argument types are the same.  */
480 	  if (equality
481 	      ? spec1->unnamed_arg_count != spec2->unnamed_arg_count
482 	      : spec1->unnamed_arg_count < spec2->unnamed_arg_count)
483 	    {
484 	      if (error_logger)
485 		error_logger (_("number of format specifications in 'msgid' and '%s' does not match"),
486 			      pretty_msgstr);
487 	      err = true;
488 	    }
489 	  else
490 	    for (i = 0; i < spec2->unnamed_arg_count; i++)
491 	      if (spec1->unnamed[i].type != spec2->unnamed[i].type)
492 		{
493 		  if (error_logger)
494 		    error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
495 				  pretty_msgstr, i + 1);
496 		  err = true;
497 		}
498 	}
499     }
500 
501   return err;
502 }
503 
504 
505 struct formatstring_parser formatstring_python =
506 {
507   format_parse,
508   format_free,
509   format_get_number_of_directives,
510   NULL,
511   format_check
512 };
513 
514 
515 unsigned int
get_python_format_unnamed_arg_count(const char * string)516 get_python_format_unnamed_arg_count (const char *string)
517 {
518   /* Parse the format string.  */
519   char *invalid_reason = NULL;
520   struct spec *descr =
521     (struct spec *) format_parse (string, false, &invalid_reason);
522 
523   if (descr != NULL)
524     {
525       unsigned int result = descr->unnamed_arg_count;
526 
527       format_free (descr);
528       return result;
529     }
530   else
531     {
532       free (invalid_reason);
533       return 0;
534     }
535 }
536 
537 
538 #ifdef TEST
539 
540 /* Test program: Print the argument list specification returned by
541    format_parse for strings read from standard input.  */
542 
543 #include <stdio.h>
544 #include "getline.h"
545 
546 static void
format_print(void * descr)547 format_print (void *descr)
548 {
549   struct spec *spec = (struct spec *) descr;
550   unsigned int i;
551 
552   if (spec == NULL)
553     {
554       printf ("INVALID");
555       return;
556     }
557 
558   if (spec->named_arg_count > 0)
559     {
560       if (spec->unnamed_arg_count > 0)
561 	abort ();
562 
563       printf ("{");
564       for (i = 0; i < spec->named_arg_count; i++)
565 	{
566 	  if (i > 0)
567 	    printf (", ");
568 	  printf ("'%s':", spec->named[i].name);
569 	  switch (spec->named[i].type)
570 	    {
571 	    case FAT_ANY:
572 	      printf ("*");
573 	      break;
574 	    case FAT_CHARACTER:
575 	      printf ("c");
576 	      break;
577 	    case FAT_STRING:
578 	      printf ("s");
579 	      break;
580 	    case FAT_INTEGER:
581 	      printf ("i");
582 	      break;
583 	    case FAT_FLOAT:
584 	      printf ("f");
585 	      break;
586 	    default:
587 	      abort ();
588 	    }
589 	}
590       printf ("}");
591     }
592   else
593     {
594       printf ("(");
595       for (i = 0; i < spec->unnamed_arg_count; i++)
596 	{
597 	  if (i > 0)
598 	    printf (" ");
599 	  switch (spec->unnamed[i].type)
600 	    {
601 	    case FAT_ANY:
602 	      printf ("*");
603 	      break;
604 	    case FAT_CHARACTER:
605 	      printf ("c");
606 	      break;
607 	    case FAT_STRING:
608 	      printf ("s");
609 	      break;
610 	    case FAT_INTEGER:
611 	      printf ("i");
612 	      break;
613 	    case FAT_FLOAT:
614 	      printf ("f");
615 	      break;
616 	    default:
617 	      abort ();
618 	    }
619 	}
620       printf (")");
621     }
622 }
623 
624 int
main()625 main ()
626 {
627   for (;;)
628     {
629       char *line = NULL;
630       size_t line_size = 0;
631       int line_len;
632       char *invalid_reason;
633       void *descr;
634 
635       line_len = getline (&line, &line_size, stdin);
636       if (line_len < 0)
637 	break;
638       if (line_len > 0 && line[line_len - 1] == '\n')
639 	line[--line_len] = '\0';
640 
641       invalid_reason = NULL;
642       descr = format_parse (line, false, &invalid_reason);
643 
644       format_print (descr);
645       printf ("\n");
646       if (descr == NULL)
647 	printf ("%s\n", invalid_reason);
648 
649       free (invalid_reason);
650       free (line);
651     }
652 
653   return 0;
654 }
655 
656 /*
657  * For Emacs M-x compile
658  * Local Variables:
659  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../lib/libgettextlib.la"
660  * End:
661  */
662 
663 #endif /* TEST */
664