1 /* Python format strings.
2 Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include "format.h"
28 #include "c-ctype.h"
29 #include "xalloc.h"
30 #include "xvasprintf.h"
31 #include "format-invalid.h"
32 #include "gettext.h"
33
34 #define _(str) gettext (str)
35
36 /* Python format strings are described in
37 Python Library reference
38 2. Built-in Types, Exceptions and Functions
39 2.1. Built-in Types
40 2.1.5. Sequence Types
41 2.1.5.2. String Formatting Operations
42 Any string or Unicode string can act as format string via the '%' operator,
43 implemented in stringobject.c and unicodeobject.c.
44 A directive
45 - starts with '%'
46 - is optionally followed by '(ident)' where ident is any sequence of
47 characters with balanced left and right parentheses,
48 - is optionally followed by any of the characters '-' (left justification),
49 '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
50 flag,
51 - is optionally followed by a width specification: '*' (reads an argument)
52 or a nonempty digit sequence,
53 - is optionally followed by '.' and a precision specification: '*' (reads
54 an argument) or a nonempty digit sequence,
55 - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
56 - is finished by a specifier
57 - '%', that needs no argument,
58 - 'c', that needs a character argument,
59 - 's', 'r', that need a string argument,
60 - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
61 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
62 Use of '(ident)' and use of unnamed argument specifications are exclusive,
63 because the first requires a mapping as argument, while the second requires
64 a tuple as argument.
65 */
66
67 enum format_arg_type
68 {
69 FAT_NONE,
70 FAT_ANY,
71 FAT_CHARACTER,
72 FAT_STRING,
73 FAT_INTEGER,
74 FAT_FLOAT
75 };
76
77 struct named_arg
78 {
79 char *name;
80 enum format_arg_type type;
81 };
82
83 struct unnamed_arg
84 {
85 enum format_arg_type type;
86 };
87
88 struct spec
89 {
90 unsigned int directives;
91 unsigned int named_arg_count;
92 unsigned int unnamed_arg_count;
93 unsigned int allocated;
94 struct named_arg *named;
95 struct unnamed_arg *unnamed;
96 };
97
98 /* Locale independent test for a decimal digit.
99 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
100 <ctype.h> isdigit must be an 'unsigned char'.) */
101 #undef isdigit
102 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
103
104
105 static int
named_arg_compare(const void * p1,const void * p2)106 named_arg_compare (const void *p1, const void *p2)
107 {
108 return strcmp (((const struct named_arg *) p1)->name,
109 ((const struct named_arg *) p2)->name);
110 }
111
112 #define INVALID_MIXES_NAMED_UNNAMED() \
113 xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
114
115 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)116 format_parse (const char *format, bool translated, char **invalid_reason)
117 {
118 struct spec spec;
119 struct spec *result;
120
121 spec.directives = 0;
122 spec.named_arg_count = 0;
123 spec.unnamed_arg_count = 0;
124 spec.allocated = 0;
125 spec.named = NULL;
126 spec.unnamed = NULL;
127
128 for (; *format != '\0';)
129 if (*format++ == '%')
130 {
131 /* A directive. */
132 char *name = NULL;
133 enum format_arg_type type;
134
135 spec.directives++;
136
137 if (*format == '(')
138 {
139 unsigned int depth;
140 const char *name_start;
141 const char *name_end;
142 size_t n;
143
144 name_start = ++format;
145 depth = 0;
146 for (; *format != '\0'; format++)
147 {
148 if (*format == '(')
149 depth++;
150 else if (*format == ')')
151 {
152 if (depth == 0)
153 break;
154 else
155 depth--;
156 }
157 }
158 if (*format == '\0')
159 {
160 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
161 goto bad_format;
162 }
163 name_end = format++;
164
165 n = name_end - name_start;
166 name = (char *) xmalloc (n + 1);
167 memcpy (name, name_start, n);
168 name[n] = '\0';
169 }
170
171 while (*format == '-' || *format == '+' || *format == ' '
172 || *format == '#' || *format == '0')
173 format++;
174
175 if (*format == '*')
176 {
177 format++;
178
179 /* Named and unnamed specifications are exclusive. */
180 if (spec.named_arg_count > 0)
181 {
182 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
183 goto bad_format;
184 }
185
186 if (spec.allocated == spec.unnamed_arg_count)
187 {
188 spec.allocated = 2 * spec.allocated + 1;
189 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
190 }
191 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
192 spec.unnamed_arg_count++;
193 }
194 else if (isdigit (*format))
195 {
196 do format++; while (isdigit (*format));
197 }
198
199 if (*format == '.')
200 {
201 format++;
202
203 if (*format == '*')
204 {
205 format++;
206
207 /* Named and unnamed specifications are exclusive. */
208 if (spec.named_arg_count > 0)
209 {
210 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
211 goto bad_format;
212 }
213
214 if (spec.allocated == spec.unnamed_arg_count)
215 {
216 spec.allocated = 2 * spec.allocated + 1;
217 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
218 }
219 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
220 spec.unnamed_arg_count++;
221 }
222 else if (isdigit (*format))
223 {
224 do format++; while (isdigit (*format));
225 }
226 }
227
228 if (*format == 'h' || *format == 'l' || *format == 'L')
229 format++;
230
231 switch (*format)
232 {
233 case '%':
234 type = FAT_ANY;
235 break;
236 case 'c':
237 type = FAT_CHARACTER;
238 break;
239 case 's': case 'r':
240 type = FAT_STRING;
241 break;
242 case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
243 type = FAT_INTEGER;
244 break;
245 case 'e': case 'E': case 'f': case 'g': case 'G':
246 type = FAT_FLOAT;
247 break;
248 default:
249 *invalid_reason =
250 (*format == '\0'
251 ? INVALID_UNTERMINATED_DIRECTIVE ()
252 : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
253 goto bad_format;
254 }
255
256 if (name != NULL)
257 {
258 /* Named argument. */
259
260 /* Named and unnamed specifications are exclusive. */
261 if (spec.unnamed_arg_count > 0)
262 {
263 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
264 goto bad_format;
265 }
266
267 if (spec.allocated == spec.named_arg_count)
268 {
269 spec.allocated = 2 * spec.allocated + 1;
270 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
271 }
272 spec.named[spec.named_arg_count].name = name;
273 spec.named[spec.named_arg_count].type = type;
274 spec.named_arg_count++;
275 }
276 else if (*format != '%')
277 {
278 /* Unnamed argument. */
279
280 /* Named and unnamed specifications are exclusive. */
281 if (spec.named_arg_count > 0)
282 {
283 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
284 goto bad_format;
285 }
286
287 if (spec.allocated == spec.unnamed_arg_count)
288 {
289 spec.allocated = 2 * spec.allocated + 1;
290 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
291 }
292 spec.unnamed[spec.unnamed_arg_count].type = type;
293 spec.unnamed_arg_count++;
294 }
295
296 format++;
297 }
298
299 /* Sort the named argument array, and eliminate duplicates. */
300 if (spec.named_arg_count > 1)
301 {
302 unsigned int i, j;
303 bool err;
304
305 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
306 named_arg_compare);
307
308 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
309 err = false;
310 for (i = j = 0; i < spec.named_arg_count; i++)
311 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
312 {
313 enum format_arg_type type1 = spec.named[i].type;
314 enum format_arg_type type2 = spec.named[j-1].type;
315 enum format_arg_type type_both;
316
317 if (type1 == type2 || type2 == FAT_ANY)
318 type_both = type1;
319 else if (type1 == FAT_ANY)
320 type_both = type2;
321 else
322 {
323 /* Incompatible types. */
324 type_both = FAT_NONE;
325 if (!err)
326 *invalid_reason =
327 xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
328 err = true;
329 }
330
331 spec.named[j-1].type = type_both;
332 free (spec.named[i].name);
333 }
334 else
335 {
336 if (j < i)
337 {
338 spec.named[j].name = spec.named[i].name;
339 spec.named[j].type = spec.named[i].type;
340 }
341 j++;
342 }
343 spec.named_arg_count = j;
344 if (err)
345 /* *invalid_reason has already been set above. */
346 goto bad_format;
347 }
348
349 result = (struct spec *) xmalloc (sizeof (struct spec));
350 *result = spec;
351 return result;
352
353 bad_format:
354 if (spec.named != NULL)
355 {
356 unsigned int i;
357 for (i = 0; i < spec.named_arg_count; i++)
358 free (spec.named[i].name);
359 free (spec.named);
360 }
361 if (spec.unnamed != NULL)
362 free (spec.unnamed);
363 return NULL;
364 }
365
366 static void
format_free(void * descr)367 format_free (void *descr)
368 {
369 struct spec *spec = (struct spec *) descr;
370
371 if (spec->named != NULL)
372 {
373 unsigned int i;
374 for (i = 0; i < spec->named_arg_count; i++)
375 free (spec->named[i].name);
376 free (spec->named);
377 }
378 if (spec->unnamed != NULL)
379 free (spec->unnamed);
380 free (spec);
381 }
382
383 static int
format_get_number_of_directives(void * descr)384 format_get_number_of_directives (void *descr)
385 {
386 struct spec *spec = (struct spec *) descr;
387
388 return spec->directives;
389 }
390
391 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)392 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
393 formatstring_error_logger_t error_logger,
394 const char *pretty_msgstr)
395 {
396 struct spec *spec1 = (struct spec *) msgid_descr;
397 struct spec *spec2 = (struct spec *) msgstr_descr;
398 bool err = false;
399
400 if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
401 {
402 if (error_logger)
403 error_logger (_("format specifications in 'msgid' expect a mapping, those in '%s' expect a tuple"),
404 pretty_msgstr);
405 err = true;
406 }
407 else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
408 {
409 if (error_logger)
410 error_logger (_("format specifications in 'msgid' expect a tuple, those in '%s' expect a mapping"),
411 pretty_msgstr);
412 err = true;
413 }
414 else
415 {
416 if (spec1->named_arg_count + spec2->named_arg_count > 0)
417 {
418 unsigned int i, j;
419 unsigned int n1 = spec1->named_arg_count;
420 unsigned int n2 = spec2->named_arg_count;
421
422 /* Check the argument names are the same.
423 Both arrays are sorted. We search for the first difference. */
424 for (i = 0, j = 0; i < n1 || j < n2; )
425 {
426 int cmp = (i >= n1 ? 1 :
427 j >= n2 ? -1 :
428 strcmp (spec1->named[i].name, spec2->named[j].name));
429
430 if (cmp > 0)
431 {
432 if (error_logger)
433 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"),
434 spec2->named[j].name, pretty_msgstr);
435 err = true;
436 break;
437 }
438 else if (cmp < 0)
439 {
440 if (equality)
441 {
442 if (error_logger)
443 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
444 spec1->named[i].name, pretty_msgstr);
445 err = true;
446 break;
447 }
448 else
449 i++;
450 }
451 else
452 j++, i++;
453 }
454 /* Check the argument types are the same. */
455 if (!err)
456 for (i = 0, j = 0; j < n2; )
457 {
458 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
459 {
460 if (spec1->named[i].type != spec2->named[j].type)
461 {
462 if (error_logger)
463 error_logger (_("format specifications in 'msgid' and '%s' for argument '%s' are not the same"),
464 pretty_msgstr, spec2->named[j].name);
465 err = true;
466 break;
467 }
468 j++, i++;
469 }
470 else
471 i++;
472 }
473 }
474
475 if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
476 {
477 unsigned int i;
478
479 /* Check the argument types are the same. */
480 if (equality
481 ? spec1->unnamed_arg_count != spec2->unnamed_arg_count
482 : spec1->unnamed_arg_count < spec2->unnamed_arg_count)
483 {
484 if (error_logger)
485 error_logger (_("number of format specifications in 'msgid' and '%s' does not match"),
486 pretty_msgstr);
487 err = true;
488 }
489 else
490 for (i = 0; i < spec2->unnamed_arg_count; i++)
491 if (spec1->unnamed[i].type != spec2->unnamed[i].type)
492 {
493 if (error_logger)
494 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
495 pretty_msgstr, i + 1);
496 err = true;
497 }
498 }
499 }
500
501 return err;
502 }
503
504
505 struct formatstring_parser formatstring_python =
506 {
507 format_parse,
508 format_free,
509 format_get_number_of_directives,
510 NULL,
511 format_check
512 };
513
514
515 unsigned int
get_python_format_unnamed_arg_count(const char * string)516 get_python_format_unnamed_arg_count (const char *string)
517 {
518 /* Parse the format string. */
519 char *invalid_reason = NULL;
520 struct spec *descr =
521 (struct spec *) format_parse (string, false, &invalid_reason);
522
523 if (descr != NULL)
524 {
525 unsigned int result = descr->unnamed_arg_count;
526
527 format_free (descr);
528 return result;
529 }
530 else
531 {
532 free (invalid_reason);
533 return 0;
534 }
535 }
536
537
538 #ifdef TEST
539
540 /* Test program: Print the argument list specification returned by
541 format_parse for strings read from standard input. */
542
543 #include <stdio.h>
544 #include "getline.h"
545
546 static void
format_print(void * descr)547 format_print (void *descr)
548 {
549 struct spec *spec = (struct spec *) descr;
550 unsigned int i;
551
552 if (spec == NULL)
553 {
554 printf ("INVALID");
555 return;
556 }
557
558 if (spec->named_arg_count > 0)
559 {
560 if (spec->unnamed_arg_count > 0)
561 abort ();
562
563 printf ("{");
564 for (i = 0; i < spec->named_arg_count; i++)
565 {
566 if (i > 0)
567 printf (", ");
568 printf ("'%s':", spec->named[i].name);
569 switch (spec->named[i].type)
570 {
571 case FAT_ANY:
572 printf ("*");
573 break;
574 case FAT_CHARACTER:
575 printf ("c");
576 break;
577 case FAT_STRING:
578 printf ("s");
579 break;
580 case FAT_INTEGER:
581 printf ("i");
582 break;
583 case FAT_FLOAT:
584 printf ("f");
585 break;
586 default:
587 abort ();
588 }
589 }
590 printf ("}");
591 }
592 else
593 {
594 printf ("(");
595 for (i = 0; i < spec->unnamed_arg_count; i++)
596 {
597 if (i > 0)
598 printf (" ");
599 switch (spec->unnamed[i].type)
600 {
601 case FAT_ANY:
602 printf ("*");
603 break;
604 case FAT_CHARACTER:
605 printf ("c");
606 break;
607 case FAT_STRING:
608 printf ("s");
609 break;
610 case FAT_INTEGER:
611 printf ("i");
612 break;
613 case FAT_FLOAT:
614 printf ("f");
615 break;
616 default:
617 abort ();
618 }
619 }
620 printf (")");
621 }
622 }
623
624 int
main()625 main ()
626 {
627 for (;;)
628 {
629 char *line = NULL;
630 size_t line_size = 0;
631 int line_len;
632 char *invalid_reason;
633 void *descr;
634
635 line_len = getline (&line, &line_size, stdin);
636 if (line_len < 0)
637 break;
638 if (line_len > 0 && line[line_len - 1] == '\n')
639 line[--line_len] = '\0';
640
641 invalid_reason = NULL;
642 descr = format_parse (line, false, &invalid_reason);
643
644 format_print (descr);
645 printf ("\n");
646 if (descr == NULL)
647 printf ("%s\n", invalid_reason);
648
649 free (invalid_reason);
650 free (line);
651 }
652
653 return 0;
654 }
655
656 /*
657 * For Emacs M-x compile
658 * Local Variables:
659 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../lib/libgettextlib.la"
660 * End:
661 */
662
663 #endif /* TEST */
664