1 /* Perl format strings.
2 Copyright (C) 2004, 2006 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdbool.h>
24 #include <stdlib.h>
25
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32
33 #define _(str) gettext (str)
34
35 /* Perl format strings are implemented in function Perl_sv_vcatpvfn in
36 perl-5.8.0/sv.c.
37 A directive
38 - starts with '%' or '%m$' where m is a positive integer starting with a
39 nonzero digit,
40 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
41 each of which acts as a flag,
42 - is optionally followed by a vector specification: 'v' or '*v' (reads an
43 argument) or '*m$v' where m is a positive integer starting with a nonzero
44 digit,
45 - is optionally followed by a width specification: '*' (reads an argument)
46 or '*m$' where m is a positive integer starting with a nonzero digit or
47 a nonempty digit sequence starting with a nonzero digit,
48 - is optionally followed by '.' and a precision specification: '*' (reads
49 an argument) or '*m$' where m is a positive integer starting with a
50 nonzero digit or a digit sequence,
51 - is optionally followed by a size specifier, one of 'h' 'l' 'll' 'L' 'q'
52 'V' 'I32' 'I64' 'I',
53 - is finished by a specifier
54 - '%', that needs no argument,
55 - 'c', that needs a small integer argument,
56 - 's', that needs a string argument,
57 - '_', that needs a scalar vector argument,
58 - 'p', that needs a pointer argument,
59 - 'i', 'd', 'D', that need an integer argument,
60 - 'u', 'U', 'b', 'o', 'O', 'x', 'X', that need an unsigned integer
61 argument,
62 - 'e', 'E', 'f', 'F', 'g', 'G', that need a floating-point argument,
63 - 'n', that needs a pointer to integer.
64 So there can be numbered argument specifications:
65 - '%m$' for the format string,
66 - '*m$v' for the vector,
67 - '*m$' for the width,
68 - '.*m$' for the precision.
69 Numbered and unnumbered argument specifications can be used in the same
70 string. The effect of '%m$' is to take argument number m, without affecting
71 the current argument number. The current argument number is incremented
72 after processing a directive with an unnumbered argument specification.
73 */
74
75 enum format_arg_type
76 {
77 FAT_NONE = 0,
78 /* Basic types */
79 FAT_INTEGER = 1,
80 FAT_DOUBLE = 2,
81 FAT_CHAR = 3,
82 FAT_STRING = 4,
83 FAT_SCALAR_VECTOR = 5,
84 FAT_POINTER = 6,
85 FAT_COUNT_POINTER = 7,
86 /* Flags */
87 FAT_UNSIGNED = 1 << 3,
88 FAT_SIZE_SHORT = 1 << 4,
89 FAT_SIZE_V = 2 << 4,
90 FAT_SIZE_PTR = 3 << 4,
91 FAT_SIZE_LONG = 4 << 4,
92 FAT_SIZE_LONGLONG = 5 << 4,
93 /* Bitmasks */
94 FAT_SIZE_MASK = (FAT_SIZE_SHORT | FAT_SIZE_V | FAT_SIZE_PTR
95 | FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
96 };
97
98 struct numbered_arg
99 {
100 unsigned int number;
101 enum format_arg_type type;
102 };
103
104 struct spec
105 {
106 unsigned int directives;
107 unsigned int numbered_arg_count;
108 unsigned int allocated;
109 struct numbered_arg *numbered;
110 };
111
112 /* Locale independent test for a decimal digit.
113 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
114 <ctype.h> isdigit must be an 'unsigned char'.) */
115 #undef isdigit
116 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
117
118 /* Locale independent test for a nonzero decimal digit. */
119 #define isnonzerodigit(c) ((unsigned int) ((c) - '1') < 9)
120
121
122 static int
numbered_arg_compare(const void * p1,const void * p2)123 numbered_arg_compare (const void *p1, const void *p2)
124 {
125 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
126 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
127
128 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
129 }
130
131 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)132 format_parse (const char *format, bool translated, char **invalid_reason)
133 {
134 unsigned int directives;
135 unsigned int numbered_arg_count;
136 unsigned int allocated;
137 struct numbered_arg *numbered;
138 unsigned int unnumbered_arg_count;
139 struct spec *result;
140
141 directives = 0;
142 numbered_arg_count = 0;
143 unnumbered_arg_count = 0;
144 allocated = 0;
145 numbered = NULL;
146
147 for (; *format != '\0';)
148 if (*format++ == '%')
149 {
150 /* A directive. */
151 unsigned int number = 0;
152 bool vectorize = false;
153 enum format_arg_type type;
154 enum format_arg_type size;
155
156 directives++;
157
158 if (isnonzerodigit (*format))
159 {
160 const char *f = format;
161 unsigned int m = 0;
162
163 do
164 {
165 m = 10 * m + (*f - '0');
166 f++;
167 }
168 while (isdigit (*f));
169
170 if (*f == '$')
171 {
172 number = m;
173 format = ++f;
174 }
175 }
176
177 /* Parse flags. */
178 while (*format == ' ' || *format == '+' || *format == '-'
179 || *format == '#' || *format == '0')
180 format++;
181
182 /* Parse vector. */
183 if (*format == 'v')
184 {
185 format++;
186 vectorize = true;
187 }
188 else if (*format == '*')
189 {
190 const char *f = format;
191
192 f++;
193 if (*f == 'v')
194 {
195 format = ++f;
196 vectorize = true;
197
198 /* Unnumbered argument. */
199 if (allocated == numbered_arg_count)
200 {
201 allocated = 2 * allocated + 1;
202 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
203 }
204 numbered[numbered_arg_count].number = ++unnumbered_arg_count;
205 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
206 numbered_arg_count++;
207 }
208 else if (isnonzerodigit (*f))
209 {
210 unsigned int m = 0;
211
212 do
213 {
214 m = 10 * m + (*f - '0');
215 f++;
216 }
217 while (isdigit (*f));
218
219 if (*f == '$')
220 {
221 f++;
222 if (*f == 'v')
223 {
224 unsigned int vector_number = m;
225
226 format = ++f;
227 vectorize = true;
228
229 /* Numbered argument. */
230 /* Note: As of perl-5.8.0, this is not correctly
231 implemented in perl's sv.c. */
232 if (allocated == numbered_arg_count)
233 {
234 allocated = 2 * allocated + 1;
235 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
236 }
237 numbered[numbered_arg_count].number = vector_number;
238 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
239 numbered_arg_count++;
240 }
241 }
242 }
243 }
244
245 if (vectorize)
246 {
247 /* Numbered or unnumbered argument. */
248 if (allocated == numbered_arg_count)
249 {
250 allocated = 2 * allocated + 1;
251 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
252 }
253 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
254 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR;
255 numbered_arg_count++;
256 }
257
258 /* Parse width. */
259 if (*format == '*')
260 {
261 unsigned int width_number = 0;
262
263 format++;
264
265 if (isnonzerodigit (*format))
266 {
267 const char *f = format;
268 unsigned int m = 0;
269
270 do
271 {
272 m = 10 * m + (*f - '0');
273 f++;
274 }
275 while (isdigit (*f));
276
277 if (*f == '$')
278 {
279 width_number = m;
280 format = ++f;
281 }
282 }
283
284 /* Numbered or unnumbered argument. */
285 /* Note: As of perl-5.8.0, this is not correctly
286 implemented in perl's sv.c. */
287 if (allocated == numbered_arg_count)
288 {
289 allocated = 2 * allocated + 1;
290 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
291 }
292 numbered[numbered_arg_count].number = (width_number ? width_number : ++unnumbered_arg_count);
293 numbered[numbered_arg_count].type = FAT_INTEGER;
294 numbered_arg_count++;
295 }
296 else if (isnonzerodigit (*format))
297 {
298 do format++; while (isdigit (*format));
299 }
300
301 /* Parse precision. */
302 if (*format == '.')
303 {
304 format++;
305
306 if (*format == '*')
307 {
308 unsigned int precision_number = 0;
309
310 format++;
311
312 if (isnonzerodigit (*format))
313 {
314 const char *f = format;
315 unsigned int m = 0;
316
317 do
318 {
319 m = 10 * m + (*f - '0');
320 f++;
321 }
322 while (isdigit (*f));
323
324 if (*f == '$')
325 {
326 precision_number = m;
327 format = ++f;
328 }
329 }
330
331 /* Numbered or unnumbered argument. */
332 if (allocated == numbered_arg_count)
333 {
334 allocated = 2 * allocated + 1;
335 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
336 }
337 numbered[numbered_arg_count].number = (precision_number ? precision_number : ++unnumbered_arg_count);
338 numbered[numbered_arg_count].type = FAT_INTEGER;
339 numbered_arg_count++;
340 }
341 else
342 {
343 while (isdigit (*format)) format++;
344 }
345 }
346
347 /* Parse size. */
348 size = 0;
349 if (*format == 'h')
350 {
351 size = FAT_SIZE_SHORT;
352 format++;
353 }
354 else if (*format == 'l')
355 {
356 if (format[1] == 'l')
357 {
358 size = FAT_SIZE_LONGLONG;
359 format += 2;
360 }
361 else
362 {
363 size = FAT_SIZE_LONG;
364 format++;
365 }
366 }
367 else if (*format == 'L' || *format == 'q')
368 {
369 size = FAT_SIZE_LONGLONG;
370 format++;
371 }
372 else if (*format == 'V')
373 {
374 size = FAT_SIZE_V;
375 format++;
376 }
377 else if (*format == 'I')
378 {
379 if (format[1] == '6' && format[2] == '4')
380 {
381 size = FAT_SIZE_LONGLONG;
382 format += 3;
383 }
384 else if (format[1] == '3' && format[2] == '2')
385 {
386 size = 0; /* FAT_SIZE_INT */
387 format += 3;
388 }
389 else
390 {
391 size = FAT_SIZE_PTR;
392 format++;
393 }
394 }
395
396 switch (*format)
397 {
398 case '%':
399 type = FAT_NONE;
400 break;
401 case 'c':
402 type = FAT_CHAR;
403 break;
404 case 's':
405 type = FAT_STRING;
406 break;
407 case '_':
408 type = FAT_SCALAR_VECTOR;
409 break;
410 case 'D':
411 type = FAT_INTEGER | FAT_SIZE_V;
412 break;
413 case 'i': case 'd':
414 type = FAT_INTEGER | size;
415 break;
416 case 'U': case 'O':
417 type = FAT_INTEGER | FAT_UNSIGNED | FAT_SIZE_V;
418 break;
419 case 'u': case 'b': case 'o': case 'x': case 'X':
420 type = FAT_INTEGER | FAT_UNSIGNED | size;
421 break;
422 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
423 if (size == FAT_SIZE_SHORT || size == FAT_SIZE_LONG)
424 {
425 *invalid_reason =
426 xasprintf (_("In the directive number %u, the size specifier is incompatible with the conversion specifier '%c'."), directives, *format);
427 goto bad_format;
428 }
429 type = FAT_DOUBLE | size;
430 break;
431 case 'p':
432 type = FAT_POINTER;
433 break;
434 case 'n':
435 type = FAT_COUNT_POINTER | size;
436 break;
437 default:
438 *invalid_reason =
439 (*format == '\0'
440 ? INVALID_UNTERMINATED_DIRECTIVE ()
441 : INVALID_CONVERSION_SPECIFIER (directives, *format));
442 goto bad_format;
443 }
444
445 if (type != FAT_NONE && !vectorize)
446 {
447 /* Numbered or unnumbered argument. */
448 if (allocated == numbered_arg_count)
449 {
450 allocated = 2 * allocated + 1;
451 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
452 }
453 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
454 numbered[numbered_arg_count].type = type;
455 numbered_arg_count++;
456 }
457
458 format++;
459 }
460
461 /* Sort the numbered argument array, and eliminate duplicates. */
462 if (numbered_arg_count > 1)
463 {
464 unsigned int i, j;
465 bool err;
466
467 qsort (numbered, numbered_arg_count,
468 sizeof (struct numbered_arg), numbered_arg_compare);
469
470 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
471 err = false;
472 for (i = j = 0; i < numbered_arg_count; i++)
473 if (j > 0 && numbered[i].number == numbered[j-1].number)
474 {
475 enum format_arg_type type1 = numbered[i].type;
476 enum format_arg_type type2 = numbered[j-1].type;
477 enum format_arg_type type_both;
478
479 if (type1 == type2)
480 type_both = type1;
481 else
482 {
483 /* Incompatible types. */
484 type_both = FAT_NONE;
485 if (!err)
486 *invalid_reason =
487 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
488 err = true;
489 }
490
491 numbered[j-1].type = type_both;
492 }
493 else
494 {
495 if (j < i)
496 {
497 numbered[j].number = numbered[i].number;
498 numbered[j].type = numbered[i].type;
499 }
500 j++;
501 }
502 numbered_arg_count = j;
503 if (err)
504 /* *invalid_reason has already been set above. */
505 goto bad_format;
506 }
507
508 result = (struct spec *) xmalloc (sizeof (struct spec));
509 result->directives = directives;
510 result->numbered_arg_count = numbered_arg_count;
511 result->allocated = allocated;
512 result->numbered = numbered;
513 return result;
514
515 bad_format:
516 if (numbered != NULL)
517 free (numbered);
518 return NULL;
519 }
520
521 static void
format_free(void * descr)522 format_free (void *descr)
523 {
524 struct spec *spec = (struct spec *) descr;
525
526 if (spec->numbered != NULL)
527 free (spec->numbered);
528 free (spec);
529 }
530
531 static int
format_get_number_of_directives(void * descr)532 format_get_number_of_directives (void *descr)
533 {
534 struct spec *spec = (struct spec *) descr;
535
536 return spec->directives;
537 }
538
539 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)540 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
541 formatstring_error_logger_t error_logger,
542 const char *pretty_msgstr)
543 {
544 struct spec *spec1 = (struct spec *) msgid_descr;
545 struct spec *spec2 = (struct spec *) msgstr_descr;
546 bool err = false;
547
548 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
549 {
550 unsigned int i, j;
551 unsigned int n1 = spec1->numbered_arg_count;
552 unsigned int n2 = spec2->numbered_arg_count;
553
554 /* Check the argument names are the same.
555 Both arrays are sorted. We search for the first difference. */
556 for (i = 0, j = 0; i < n1 || j < n2; )
557 {
558 int cmp = (i >= n1 ? 1 :
559 j >= n2 ? -1 :
560 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
561 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
562 0);
563
564 if (cmp > 0)
565 {
566 if (error_logger)
567 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
568 spec2->numbered[j].number, pretty_msgstr);
569 err = true;
570 break;
571 }
572 else if (cmp < 0)
573 {
574 if (equality)
575 {
576 if (error_logger)
577 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
578 spec1->numbered[i].number, pretty_msgstr);
579 err = true;
580 break;
581 }
582 else
583 i++;
584 }
585 else
586 j++, i++;
587 }
588 /* Check the argument types are the same. */
589 if (!err)
590 for (i = 0, j = 0; j < n2; )
591 {
592 if (spec1->numbered[i].number == spec2->numbered[j].number)
593 {
594 if (spec1->numbered[i].type != spec2->numbered[j].type)
595 {
596 if (error_logger)
597 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
598 pretty_msgstr, spec2->numbered[j].number);
599 err = true;
600 break;
601 }
602 j++, i++;
603 }
604 else
605 i++;
606 }
607 }
608
609 return err;
610 }
611
612
613 struct formatstring_parser formatstring_perl =
614 {
615 format_parse,
616 format_free,
617 format_get_number_of_directives,
618 NULL,
619 format_check
620 };
621
622
623 #ifdef TEST
624
625 /* Test program: Print the argument list specification returned by
626 format_parse for strings read from standard input. */
627
628 #include <stdio.h>
629 #include "getline.h"
630
631 static void
format_print(void * descr)632 format_print (void *descr)
633 {
634 struct spec *spec = (struct spec *) descr;
635 unsigned int last;
636 unsigned int i;
637
638 if (spec == NULL)
639 {
640 printf ("INVALID");
641 return;
642 }
643
644 printf ("(");
645 last = 1;
646 for (i = 0; i < spec->numbered_arg_count; i++)
647 {
648 unsigned int number = spec->numbered[i].number;
649
650 if (i > 0)
651 printf (" ");
652 if (number < last)
653 abort ();
654 for (; last < number; last++)
655 printf ("_ ");
656 if (spec->numbered[i].type & FAT_UNSIGNED)
657 printf ("[unsigned]");
658 switch (spec->numbered[i].type & FAT_SIZE_MASK)
659 {
660 case 0:
661 break;
662 case FAT_SIZE_SHORT:
663 printf ("[short]");
664 break;
665 case FAT_SIZE_V:
666 printf ("[IV]");
667 break;
668 case FAT_SIZE_PTR:
669 printf ("[PTR]");
670 break;
671 case FAT_SIZE_LONG:
672 printf ("[long]");
673 break;
674 case FAT_SIZE_LONGLONG:
675 printf ("[long long]");
676 break;
677 default:
678 abort ();
679 }
680 switch (spec->numbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK))
681 {
682 case FAT_INTEGER:
683 printf ("i");
684 break;
685 case FAT_DOUBLE:
686 printf ("f");
687 break;
688 case FAT_CHAR:
689 printf ("c");
690 break;
691 case FAT_STRING:
692 printf ("s");
693 break;
694 case FAT_SCALAR_VECTOR:
695 printf ("sv");
696 break;
697 case FAT_POINTER:
698 printf ("p");
699 break;
700 case FAT_COUNT_POINTER:
701 printf ("n");
702 break;
703 default:
704 abort ();
705 }
706 last = number + 1;
707 }
708 printf (")");
709 }
710
711 int
main()712 main ()
713 {
714 for (;;)
715 {
716 char *line = NULL;
717 size_t line_size = 0;
718 int line_len;
719 char *invalid_reason;
720 void *descr;
721
722 line_len = getline (&line, &line_size, stdin);
723 if (line_len < 0)
724 break;
725 if (line_len > 0 && line[line_len - 1] == '\n')
726 line[--line_len] = '\0';
727
728 invalid_reason = NULL;
729 descr = format_parse (line, false, &invalid_reason);
730
731 format_print (descr);
732 printf ("\n");
733 if (descr == NULL)
734 printf ("%s\n", invalid_reason);
735
736 free (invalid_reason);
737 free (line);
738 }
739
740 return 0;
741 }
742
743 /*
744 * For Emacs M-x compile
745 * Local Variables:
746 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-perl.c ../lib/libgettextlib.la"
747 * End:
748 */
749
750 #endif /* TEST */
751