1 /* Boost format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2006.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31
32 #define _(str) gettext (str)
33
34 /* Boost format strings are described in
35 boost_1_33_1/libs/format/doc/format.html
36 and implemented in
37 boost_1_33_1/boost/format/parsing.hpp.
38 A directive (other than '%%')
39 - starts with '%' or '%|'; in the latter case it must end in '|',
40 - is continued either by
41 - 'm%' where m is a positive integer, starting with a nonzero digit;
42 in this case the directive must not have started with '%|'; or
43 - the following:
44 - optional: 'm$' where m is a positive integer, starting with a
45 nonzero digit,
46 - optional: any of the characters '#', '0', '-', ' ', '+', "'",
47 '_', '=', 'h', 'l',
48 - optional: a width specification: '*' (reads an argument) or '*m$'
49 or a nonempty digit sequence,
50 - optional: a '.' and a precision specification: '*' (reads an
51 argument) or '*m$' or a nonempty digit sequence,
52 - optional: any of the characters 'h', 'l', 'L',
53 - if the directive started with '%|':
54 an optional specifier and a final '|',
55 otherwise
56 a mandatory specifier.
57 If no specifier is given, it needs an argument of any type.
58 The possible specifiers are:
59 - 'c', 'C', that need a character argument,
60 - 's', 'S', that need an argument of any type,
61 - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument,
62 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument,
63 - 'p', that needs a 'void *' argument,
64 - 't', that doesn't need an argument,
65 - 'TX', where X is any character, that doesn't need an argument,
66 - 'n', that needs a pointer to integer.
67 The Boost format string interpreter doesn't actually care about
68 the argument types, but we do, because it increases the likelihood
69 of detecting translator mistakes.
70 Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications
71 cannot be used in the same string.
72 */
73
74 enum format_arg_type
75 {
76 FAT_NONE = 0,
77 /* Basic types */
78 FAT_INTEGER = 1,
79 FAT_DOUBLE = 2,
80 FAT_CHAR = 3,
81 FAT_POINTER = 4,
82 FAT_ANY = 5
83 };
84
85 struct numbered_arg
86 {
87 unsigned int number;
88 enum format_arg_type type;
89 };
90
91 struct spec
92 {
93 unsigned int directives;
94 unsigned int numbered_arg_count;
95 struct numbered_arg *numbered;
96 };
97
98 /* Locale independent test for a decimal digit.
99 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
100 <ctype.h> isdigit must be an 'unsigned char'.) */
101 #undef isdigit
102 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
103
104
105 static int
numbered_arg_compare(const void * p1,const void * p2)106 numbered_arg_compare (const void *p1, const void *p2)
107 {
108 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
109 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
110
111 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
112 }
113
114 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)115 format_parse (const char *format, bool translated, char *fdi,
116 char **invalid_reason)
117 {
118 const char *const format_start = format;
119 struct spec spec;
120 unsigned int numbered_allocated;
121 unsigned int unnumbered_arg_count;
122 struct spec *result;
123
124 spec.directives = 0;
125 spec.numbered_arg_count = 0;
126 spec.numbered = NULL;
127 numbered_allocated = 0;
128 unnumbered_arg_count = 0;
129
130 for (; *format != '\0';)
131 if (*format++ == '%')
132 {
133 /* A directive. */
134 FDI_SET (format - 1, FMTDIR_START);
135 spec.directives++;
136
137 if (*format == '%')
138 format++;
139 else
140 {
141 bool brackets = false;
142 bool done = false;
143 unsigned int number = 0;
144 enum format_arg_type type = FAT_NONE;
145
146 if (*format == '|')
147 {
148 format++;
149 brackets = true;
150 }
151
152 if (isdigit (*format) && *format != '0')
153 {
154 const char *f = format;
155 unsigned int m = 0;
156
157 do
158 {
159 m = 10 * m + (*f - '0');
160 f++;
161 }
162 while (isdigit (*f));
163
164 if ((!brackets && *f == '%') || *f == '$')
165 {
166 if (m == 0) /* can happen if m overflows */
167 {
168 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
169 FDI_SET (f, FMTDIR_ERROR);
170 goto bad_format;
171 }
172 number = m;
173 if (*f == '%')
174 {
175 type = FAT_ANY;
176 done = true;
177 }
178 format = ++f;
179 }
180 }
181
182 if (!done)
183 {
184 /* Parse flags. */
185 for (;;)
186 {
187 if (*format == ' ' || *format == '+' || *format == '-'
188 || *format == '#' || *format == '0' || *format == '\''
189 || *format == '_' || *format == '=' || *format == 'h'
190 || *format == 'l')
191 format++;
192 else
193 break;
194 }
195
196 /* Parse width. */
197 if (*format == '*')
198 {
199 unsigned int width_number = 0;
200
201 format++;
202
203 if (isdigit (*format))
204 {
205 const char *f = format;
206 unsigned int m = 0;
207
208 do
209 {
210 m = 10 * m + (*f - '0');
211 f++;
212 }
213 while (isdigit (*f));
214
215 if (*f == '$')
216 {
217 if (m == 0)
218 {
219 *invalid_reason =
220 INVALID_WIDTH_ARGNO_0 (spec.directives);
221 FDI_SET (f, FMTDIR_ERROR);
222 goto bad_format;
223 }
224 width_number = m;
225 format = ++f;
226 }
227 }
228
229 if (width_number)
230 {
231 /* Numbered argument. */
232
233 /* Numbered and unnumbered specifications are
234 exclusive. */
235 if (unnumbered_arg_count > 0)
236 {
237 *invalid_reason =
238 INVALID_MIXES_NUMBERED_UNNUMBERED ();
239 FDI_SET (format - 1, FMTDIR_ERROR);
240 goto bad_format;
241 }
242
243 if (numbered_allocated == spec.numbered_arg_count)
244 {
245 numbered_allocated = 2 * numbered_allocated + 1;
246 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
247 }
248 spec.numbered[spec.numbered_arg_count].number = width_number;
249 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
250 spec.numbered_arg_count++;
251 }
252 else
253 {
254 /* Unnumbered argument. */
255
256 /* Numbered and unnumbered specifications are
257 exclusive. */
258 if (spec.numbered_arg_count > 0)
259 {
260 *invalid_reason =
261 INVALID_MIXES_NUMBERED_UNNUMBERED ();
262 FDI_SET (format - 1, FMTDIR_ERROR);
263 goto bad_format;
264 }
265
266 if (numbered_allocated == unnumbered_arg_count)
267 {
268 numbered_allocated = 2 * numbered_allocated + 1;
269 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
270 }
271 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
272 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
273 unnumbered_arg_count++;
274 }
275 }
276 else if (isdigit (*format))
277 {
278 do format++; while (isdigit (*format));
279 }
280
281 /* Parse precision. */
282 if (*format == '.')
283 {
284 format++;
285
286 if (*format == '*')
287 {
288 unsigned int precision_number = 0;
289
290 format++;
291
292 if (isdigit (*format))
293 {
294 const char *f = format;
295 unsigned int m = 0;
296
297 do
298 {
299 m = 10 * m + (*f - '0');
300 f++;
301 }
302 while (isdigit (*f));
303
304 if (*f == '$')
305 {
306 if (m == 0)
307 {
308 *invalid_reason =
309 INVALID_PRECISION_ARGNO_0 (spec.directives);
310 FDI_SET (f, FMTDIR_ERROR);
311 goto bad_format;
312 }
313 precision_number = m;
314 format = ++f;
315 }
316 }
317
318 if (precision_number)
319 {
320 /* Numbered argument. */
321
322 /* Numbered and unnumbered specifications are
323 exclusive. */
324 if (unnumbered_arg_count > 0)
325 {
326 *invalid_reason =
327 INVALID_MIXES_NUMBERED_UNNUMBERED ();
328 FDI_SET (format - 1, FMTDIR_ERROR);
329 goto bad_format;
330 }
331
332 if (numbered_allocated == spec.numbered_arg_count)
333 {
334 numbered_allocated = 2 * numbered_allocated + 1;
335 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
336 }
337 spec.numbered[spec.numbered_arg_count].number = precision_number;
338 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
339 spec.numbered_arg_count++;
340 }
341 else
342 {
343 /* Unnumbered argument. */
344
345 /* Numbered and unnumbered specifications are
346 exclusive. */
347 if (spec.numbered_arg_count > 0)
348 {
349 *invalid_reason =
350 INVALID_MIXES_NUMBERED_UNNUMBERED ();
351 FDI_SET (format - 1, FMTDIR_ERROR);
352 goto bad_format;
353 }
354
355 if (numbered_allocated == unnumbered_arg_count)
356 {
357 numbered_allocated = 2 * numbered_allocated + 1;
358 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
359 }
360 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
361 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
362 unnumbered_arg_count++;
363 }
364 }
365 else if (isdigit (*format))
366 {
367 do format++; while (isdigit (*format));
368 }
369 }
370
371 /* Parse size. */
372 for (;;)
373 {
374 if (*format == 'h' || *format == 'l' || *format == 'L')
375 format++;
376 else
377 break;
378 }
379
380 switch (*format++)
381 {
382 case 'c': case 'C':
383 type = FAT_CHAR;
384 break;
385 case 's': case 'S':
386 type = FAT_ANY;
387 break;
388 case 'i': case 'd': case 'o': case 'u': case 'x': case 'X':
389 type = FAT_INTEGER;
390 break;
391 case 'e': case 'E': case 'f': case 'g': case 'G':
392 type = FAT_DOUBLE;
393 break;
394 case 'p':
395 type = FAT_POINTER;
396 break;
397 case 't':
398 type = FAT_NONE;
399 break;
400 case 'T':
401 if (*format == '\0')
402 {
403 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
404 FDI_SET (format - 1, FMTDIR_ERROR);
405 goto bad_format;
406 }
407 format++;
408 type = FAT_NONE;
409 break;
410 case 'n':
411 type = FAT_NONE;
412 break;
413 case '|':
414 if (brackets)
415 {
416 --format;
417 type = FAT_ANY;
418 break;
419 }
420 /*FALLTHROUGH*/
421 default:
422 --format;
423 if (*format == '\0')
424 {
425 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
426 FDI_SET (format - 1, FMTDIR_ERROR);
427 }
428 else
429 {
430 *invalid_reason =
431 INVALID_CONVERSION_SPECIFIER (spec.directives,
432 *format);
433 FDI_SET (format, FMTDIR_ERROR);
434 }
435 goto bad_format;
436 }
437 if (brackets)
438 {
439 if (*format != '|')
440 {
441 if (*format == '\0')
442 {
443 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
444 FDI_SET (format - 1, FMTDIR_ERROR);
445 }
446 else
447 {
448 *invalid_reason =
449 xasprintf (_("The directive number %u starts with | but does not end with |."),
450 spec.directives);
451 FDI_SET (format, FMTDIR_ERROR);
452 }
453 goto bad_format;
454 }
455 format++;
456 }
457 }
458
459 if (type != FAT_NONE)
460 {
461 if (number)
462 {
463 /* Numbered argument. */
464
465 /* Numbered and unnumbered specifications are exclusive. */
466 if (unnumbered_arg_count > 0)
467 {
468 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
469 FDI_SET (format - 1, FMTDIR_ERROR);
470 goto bad_format;
471 }
472
473 if (numbered_allocated == spec.numbered_arg_count)
474 {
475 numbered_allocated = 2 * numbered_allocated + 1;
476 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
477 }
478 spec.numbered[spec.numbered_arg_count].number = number;
479 spec.numbered[spec.numbered_arg_count].type = type;
480 spec.numbered_arg_count++;
481 }
482 else
483 {
484 /* Unnumbered argument. */
485
486 /* Numbered and unnumbered specifications are exclusive. */
487 if (spec.numbered_arg_count > 0)
488 {
489 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
490 FDI_SET (format - 1, FMTDIR_ERROR);
491 goto bad_format;
492 }
493
494 if (numbered_allocated == unnumbered_arg_count)
495 {
496 numbered_allocated = 2 * numbered_allocated + 1;
497 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
498 }
499 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
500 spec.numbered[unnumbered_arg_count].type = type;
501 unnumbered_arg_count++;
502 }
503 }
504 }
505
506 FDI_SET (format - 1, FMTDIR_END);
507 }
508
509 /* Convert the unnumbered argument array to numbered arguments. */
510 if (unnumbered_arg_count > 0)
511 spec.numbered_arg_count = unnumbered_arg_count;
512 /* Sort the numbered argument array, and eliminate duplicates. */
513 else if (spec.numbered_arg_count > 1)
514 {
515 unsigned int i, j;
516 bool err;
517
518 qsort (spec.numbered, spec.numbered_arg_count,
519 sizeof (struct numbered_arg), numbered_arg_compare);
520
521 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
522 err = false;
523 for (i = j = 0; i < spec.numbered_arg_count; i++)
524 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
525 {
526 enum format_arg_type type1 = spec.numbered[i].type;
527 enum format_arg_type type2 = spec.numbered[j-1].type;
528 enum format_arg_type type_both;
529
530 if (type1 == type2 || type2 == FAT_ANY)
531 type_both = type1;
532 else if (type1 == FAT_ANY)
533 type_both = type2;
534 else
535 {
536 /* Incompatible types. */
537 type_both = FAT_NONE;
538 if (!err)
539 *invalid_reason =
540 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
541 err = true;
542 }
543
544 spec.numbered[j-1].type = type_both;
545 }
546 else
547 {
548 if (j < i)
549 {
550 spec.numbered[j].number = spec.numbered[i].number;
551 spec.numbered[j].type = spec.numbered[i].type;
552 }
553 j++;
554 }
555 spec.numbered_arg_count = j;
556 if (err)
557 /* *invalid_reason has already been set above. */
558 goto bad_format;
559 }
560
561 result = XMALLOC (struct spec);
562 *result = spec;
563 return result;
564
565 bad_format:
566 if (spec.numbered != NULL)
567 free (spec.numbered);
568 return NULL;
569 }
570
571 static void
format_free(void * descr)572 format_free (void *descr)
573 {
574 struct spec *spec = (struct spec *) descr;
575
576 if (spec->numbered != NULL)
577 free (spec->numbered);
578 free (spec);
579 }
580
581 static int
format_get_number_of_directives(void * descr)582 format_get_number_of_directives (void *descr)
583 {
584 struct spec *spec = (struct spec *) descr;
585
586 return spec->directives;
587 }
588
589 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)590 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
591 formatstring_error_logger_t error_logger,
592 const char *pretty_msgid, const char *pretty_msgstr)
593 {
594 struct spec *spec1 = (struct spec *) msgid_descr;
595 struct spec *spec2 = (struct spec *) msgstr_descr;
596 bool err = false;
597
598 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
599 {
600 unsigned int i, j;
601 unsigned int n1 = spec1->numbered_arg_count;
602 unsigned int n2 = spec2->numbered_arg_count;
603
604 /* Check the argument names are the same.
605 Both arrays are sorted. We search for the first difference. */
606 for (i = 0, j = 0; i < n1 || j < n2; )
607 {
608 int cmp = (i >= n1 ? 1 :
609 j >= n2 ? -1 :
610 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
611 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
612 0);
613
614 if (cmp > 0)
615 {
616 if (error_logger)
617 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
618 spec2->numbered[j].number, pretty_msgstr,
619 pretty_msgid);
620 err = true;
621 break;
622 }
623 else if (cmp < 0)
624 {
625 if (equality)
626 {
627 if (error_logger)
628 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
629 spec1->numbered[i].number, pretty_msgstr);
630 err = true;
631 break;
632 }
633 else
634 i++;
635 }
636 else
637 j++, i++;
638 }
639 /* Check the argument types are the same. */
640 if (!err)
641 for (i = 0, j = 0; j < n2; )
642 {
643 if (spec1->numbered[i].number == spec2->numbered[j].number)
644 {
645 if (spec1->numbered[i].type != spec2->numbered[j].type)
646 {
647 if (error_logger)
648 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
649 pretty_msgid, pretty_msgstr,
650 spec2->numbered[j].number);
651 err = true;
652 break;
653 }
654 j++, i++;
655 }
656 else
657 i++;
658 }
659 }
660
661 return err;
662 }
663
664
665 struct formatstring_parser formatstring_boost =
666 {
667 format_parse,
668 format_free,
669 format_get_number_of_directives,
670 NULL,
671 format_check
672 };
673
674
675 #ifdef TEST
676
677 /* Test program: Print the argument list specification returned by
678 format_parse for strings read from standard input. */
679
680 #include <stdio.h>
681
682 static void
format_print(void * descr)683 format_print (void *descr)
684 {
685 struct spec *spec = (struct spec *) descr;
686 unsigned int last;
687 unsigned int i;
688
689 if (spec == NULL)
690 {
691 printf ("INVALID");
692 return;
693 }
694
695 printf ("(");
696 last = 1;
697 for (i = 0; i < spec->numbered_arg_count; i++)
698 {
699 unsigned int number = spec->numbered[i].number;
700
701 if (i > 0)
702 printf (" ");
703 if (number < last)
704 abort ();
705 for (; last < number; last++)
706 printf ("_ ");
707 switch (spec->numbered[i].type)
708 {
709 case FAT_INTEGER:
710 printf ("i");
711 break;
712 case FAT_DOUBLE:
713 printf ("f");
714 break;
715 case FAT_CHAR:
716 printf ("c");
717 break;
718 case FAT_POINTER:
719 printf ("p");
720 break;
721 case FAT_ANY:
722 printf ("*");
723 break;
724 default:
725 abort ();
726 }
727 last = number + 1;
728 }
729 printf (")");
730 }
731
732 int
main()733 main ()
734 {
735 for (;;)
736 {
737 char *line = NULL;
738 size_t line_size = 0;
739 int line_len;
740 char *invalid_reason;
741 void *descr;
742
743 line_len = getline (&line, &line_size, stdin);
744 if (line_len < 0)
745 break;
746 if (line_len > 0 && line[line_len - 1] == '\n')
747 line[--line_len] = '\0';
748
749 invalid_reason = NULL;
750 descr = format_parse (line, false, NULL, &invalid_reason);
751
752 format_print (descr);
753 printf ("\n");
754 if (descr == NULL)
755 printf ("%s\n", invalid_reason);
756
757 free (invalid_reason);
758 free (line);
759 }
760
761 return 0;
762 }
763
764 /*
765 * For Emacs M-x compile
766 * Local Variables:
767 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-boost.c ../gnulib-lib/libgettextlib.la"
768 * End:
769 */
770
771 #endif /* TEST */
772
773