1 /* Perl format strings.
2    Copyright (C) 2004, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2003.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* Perl format strings are implemented in function Perl_sv_vcatpvfn in
36    perl-5.8.0/sv.c.
37    A directive
38    - starts with '%' or '%m$' where m is a positive integer starting with a
39      nonzero digit,
40    - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
41      each of which acts as a flag,
42    - is optionally followed by a vector specification: 'v' or '*v' (reads an
43      argument) or '*m$v' where m is a positive integer starting with a nonzero
44      digit,
45    - is optionally followed by a width specification: '*' (reads an argument)
46      or '*m$' where m is a positive integer starting with a nonzero digit or
47      a nonempty digit sequence starting with a nonzero digit,
48    - is optionally followed by '.' and a precision specification: '*' (reads
49      an argument) or '*m$' where m is a positive integer starting with a
50      nonzero digit or a digit sequence,
51    - is optionally followed by a size specifier, one of 'h' 'l' 'll' 'L' 'q'
52      'V' 'I32' 'I64' 'I',
53    - is finished by a specifier
54        - '%', that needs no argument,
55        - 'c', that needs a small integer argument,
56        - 's', that needs a string argument,
57        - '_', that needs a scalar vector argument,
58        - 'p', that needs a pointer argument,
59        - 'i', 'd', 'D', that need an integer argument,
60        - 'u', 'U', 'b', 'o', 'O', 'x', 'X', that need an unsigned integer
61          argument,
62        - 'e', 'E', 'f', 'F', 'g', 'G', that need a floating-point argument,
63        - 'n', that needs a pointer to integer.
64    So there can be numbered argument specifications:
65    - '%m$' for the format string,
66    - '*m$v' for the vector,
67    - '*m$' for the width,
68    - '.*m$' for the precision.
69    Numbered and unnumbered argument specifications can be used in the same
70    string. The effect of '%m$' is to take argument number m, without affecting
71    the current argument number. The current argument number is incremented
72    after processing a directive with an unnumbered argument specification.
73  */
74 
75 enum format_arg_type
76 {
77   FAT_NONE		= 0,
78   /* Basic types */
79   FAT_INTEGER		= 1,
80   FAT_DOUBLE		= 2,
81   FAT_CHAR		= 3,
82   FAT_STRING		= 4,
83   FAT_SCALAR_VECTOR	= 5,
84   FAT_POINTER		= 6,
85   FAT_COUNT_POINTER	= 7,
86   /* Flags */
87   FAT_UNSIGNED		= 1 << 3,
88   FAT_SIZE_SHORT	= 1 << 4,
89   FAT_SIZE_V		= 2 << 4,
90   FAT_SIZE_PTR		= 3 << 4,
91   FAT_SIZE_LONG		= 4 << 4,
92   FAT_SIZE_LONGLONG	= 5 << 4,
93   /* Bitmasks */
94   FAT_SIZE_MASK		= (FAT_SIZE_SHORT | FAT_SIZE_V | FAT_SIZE_PTR
95 			   | FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
96 };
97 
98 struct numbered_arg
99 {
100   unsigned int number;
101   enum format_arg_type type;
102 };
103 
104 struct spec
105 {
106   unsigned int directives;
107   unsigned int numbered_arg_count;
108   unsigned int allocated;
109   struct numbered_arg *numbered;
110 };
111 
112 /* Locale independent test for a decimal digit.
113    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
114    <ctype.h> isdigit must be an 'unsigned char'.)  */
115 #undef isdigit
116 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
117 
118 /* Locale independent test for a nonzero decimal digit.  */
119 #define isnonzerodigit(c) ((unsigned int) ((c) - '1') < 9)
120 
121 
122 static int
numbered_arg_compare(const void * p1,const void * p2)123 numbered_arg_compare (const void *p1, const void *p2)
124 {
125   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
126   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
127 
128   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
129 }
130 
131 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)132 format_parse (const char *format, bool translated, char **invalid_reason)
133 {
134   unsigned int directives;
135   unsigned int numbered_arg_count;
136   unsigned int allocated;
137   struct numbered_arg *numbered;
138   unsigned int unnumbered_arg_count;
139   struct spec *result;
140 
141   directives = 0;
142   numbered_arg_count = 0;
143   unnumbered_arg_count = 0;
144   allocated = 0;
145   numbered = NULL;
146 
147   for (; *format != '\0';)
148     if (*format++ == '%')
149       {
150 	/* A directive.  */
151 	unsigned int number = 0;
152 	bool vectorize = false;
153 	enum format_arg_type type;
154 	enum format_arg_type size;
155 
156 	directives++;
157 
158 	if (isnonzerodigit (*format))
159 	  {
160 	    const char *f = format;
161 	    unsigned int m = 0;
162 
163 	    do
164 	      {
165 		m = 10 * m + (*f - '0');
166 		f++;
167 	      }
168 	    while (isdigit (*f));
169 
170 	    if (*f == '$')
171 	      {
172 		number = m;
173 		format = ++f;
174 	      }
175 	  }
176 
177 	/* Parse flags.  */
178 	while (*format == ' ' || *format == '+' || *format == '-'
179 	       || *format == '#' || *format == '0')
180 	  format++;
181 
182 	/* Parse vector.  */
183 	if (*format == 'v')
184 	  {
185 	    format++;
186 	    vectorize = true;
187 	  }
188 	else if (*format == '*')
189 	  {
190 	    const char *f = format;
191 
192 	    f++;
193 	    if (*f == 'v')
194 	      {
195 		format = ++f;
196 		vectorize = true;
197 
198 		/* Unnumbered argument.  */
199 		if (allocated == numbered_arg_count)
200 		  {
201 		    allocated = 2 * allocated + 1;
202 		    numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
203 		  }
204 		numbered[numbered_arg_count].number = ++unnumbered_arg_count;
205 		numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
206 		numbered_arg_count++;
207 	      }
208 	    else if (isnonzerodigit (*f))
209 	      {
210 		unsigned int m = 0;
211 
212 		do
213 		  {
214 		    m = 10 * m + (*f - '0');
215 		    f++;
216 		  }
217 		while (isdigit (*f));
218 
219 		if (*f == '$')
220 		  {
221 		    f++;
222 		    if (*f == 'v')
223 		      {
224 			unsigned int vector_number = m;
225 
226 			format = ++f;
227 			vectorize = true;
228 
229 			/* Numbered argument.  */
230 			/* Note: As of perl-5.8.0, this is not correctly
231 			   implemented in perl's sv.c.  */
232 			if (allocated == numbered_arg_count)
233 			  {
234 			    allocated = 2 * allocated + 1;
235 			    numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
236 			  }
237 			numbered[numbered_arg_count].number = vector_number;
238 			numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
239 			numbered_arg_count++;
240 		      }
241 		  }
242 	      }
243 	  }
244 
245 	if (vectorize)
246 	  {
247 	    /* Numbered or unnumbered argument.  */
248 	    if (allocated == numbered_arg_count)
249 	      {
250 		allocated = 2 * allocated + 1;
251 		numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
252 	      }
253 	    numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
254 	    numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR;
255 	    numbered_arg_count++;
256 	  }
257 
258 	/* Parse width.  */
259 	if (*format == '*')
260 	  {
261 	    unsigned int width_number = 0;
262 
263 	    format++;
264 
265 	    if (isnonzerodigit (*format))
266 	      {
267 		const char *f = format;
268 		unsigned int m = 0;
269 
270 		do
271 		  {
272 		    m = 10 * m + (*f - '0');
273 		    f++;
274 		  }
275 		while (isdigit (*f));
276 
277 		if (*f == '$')
278 		  {
279 		    width_number = m;
280 		    format = ++f;
281 		  }
282 	      }
283 
284 	    /* Numbered or unnumbered argument.  */
285 	    /* Note: As of perl-5.8.0, this is not correctly
286 	       implemented in perl's sv.c.  */
287 	    if (allocated == numbered_arg_count)
288 	      {
289 		allocated = 2 * allocated + 1;
290 		numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
291 	      }
292 	    numbered[numbered_arg_count].number = (width_number ? width_number : ++unnumbered_arg_count);
293 	    numbered[numbered_arg_count].type = FAT_INTEGER;
294 	    numbered_arg_count++;
295 	  }
296 	else if (isnonzerodigit (*format))
297 	  {
298 	    do format++; while (isdigit (*format));
299 	  }
300 
301 	/* Parse precision.  */
302 	if (*format == '.')
303 	  {
304 	    format++;
305 
306 	    if (*format == '*')
307 	      {
308 		unsigned int precision_number = 0;
309 
310 		format++;
311 
312 		if (isnonzerodigit (*format))
313 		  {
314 		    const char *f = format;
315 		    unsigned int m = 0;
316 
317 		    do
318 		      {
319 			m = 10 * m + (*f - '0');
320 			f++;
321 		      }
322 		    while (isdigit (*f));
323 
324 		    if (*f == '$')
325 		      {
326 			precision_number = m;
327 			format = ++f;
328 		      }
329 		  }
330 
331 		/* Numbered or unnumbered argument.  */
332 		if (allocated == numbered_arg_count)
333 		  {
334 		    allocated = 2 * allocated + 1;
335 		    numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
336 		  }
337 		numbered[numbered_arg_count].number = (precision_number ? precision_number : ++unnumbered_arg_count);
338 		numbered[numbered_arg_count].type = FAT_INTEGER;
339 		numbered_arg_count++;
340 	      }
341 	    else
342 	      {
343 		while (isdigit (*format)) format++;
344 	      }
345 	  }
346 
347 	/* Parse size.  */
348 	size = 0;
349 	if (*format == 'h')
350 	  {
351 	    size = FAT_SIZE_SHORT;
352 	    format++;
353 	  }
354 	else if (*format == 'l')
355 	  {
356 	    if (format[1] == 'l')
357 	      {
358 		size = FAT_SIZE_LONGLONG;
359 		format += 2;
360 	      }
361 	    else
362 	      {
363 		size = FAT_SIZE_LONG;
364 		format++;
365 	      }
366 	  }
367 	else if (*format == 'L' || *format == 'q')
368 	  {
369 	    size = FAT_SIZE_LONGLONG;
370 	    format++;
371 	  }
372 	else if (*format == 'V')
373 	  {
374 	    size = FAT_SIZE_V;
375 	    format++;
376 	  }
377 	else if (*format == 'I')
378 	  {
379 	    if (format[1] == '6' && format[2] == '4')
380 	      {
381 		size = FAT_SIZE_LONGLONG;
382 		format += 3;
383 	      }
384 	    else if (format[1] == '3' && format[2] == '2')
385 	      {
386 		size = 0; /* FAT_SIZE_INT */
387 		format += 3;
388 	      }
389 	    else
390 	      {
391 		size = FAT_SIZE_PTR;
392 		format++;
393 	      }
394 	  }
395 
396 	switch (*format)
397 	  {
398 	  case '%':
399 	    type = FAT_NONE;
400 	    break;
401 	  case 'c':
402 	    type = FAT_CHAR;
403 	    break;
404 	  case 's':
405 	    type = FAT_STRING;
406 	    break;
407 	  case '_':
408 	    type = FAT_SCALAR_VECTOR;
409 	    break;
410 	  case 'D':
411 	    type = FAT_INTEGER | FAT_SIZE_V;
412 	    break;
413 	  case 'i': case 'd':
414 	    type = FAT_INTEGER | size;
415 	    break;
416 	  case 'U': case 'O':
417 	    type = FAT_INTEGER | FAT_UNSIGNED | FAT_SIZE_V;
418 	    break;
419 	  case 'u': case 'b': case 'o': case 'x': case 'X':
420 	    type = FAT_INTEGER | FAT_UNSIGNED | size;
421 	    break;
422 	  case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
423 	    if (size == FAT_SIZE_SHORT || size == FAT_SIZE_LONG)
424 	      {
425 		*invalid_reason =
426 		  xasprintf (_("In the directive number %u, the size specifier is incompatible with the conversion specifier '%c'."), directives, *format);
427 		goto bad_format;
428 	      }
429 	    type = FAT_DOUBLE | size;
430 	    break;
431 	  case 'p':
432 	    type = FAT_POINTER;
433 	    break;
434 	  case 'n':
435 	    type = FAT_COUNT_POINTER | size;
436 	    break;
437 	  default:
438 	    *invalid_reason =
439 	      (*format == '\0'
440 	       ? INVALID_UNTERMINATED_DIRECTIVE ()
441 	       : INVALID_CONVERSION_SPECIFIER (directives, *format));
442 	    goto bad_format;
443 	  }
444 
445 	if (type != FAT_NONE && !vectorize)
446 	  {
447 	    /* Numbered or unnumbered argument.  */
448 	    if (allocated == numbered_arg_count)
449 	      {
450 		allocated = 2 * allocated + 1;
451 		numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
452 	      }
453 	    numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
454 	    numbered[numbered_arg_count].type = type;
455 	    numbered_arg_count++;
456 	  }
457 
458 	format++;
459       }
460 
461   /* Sort the numbered argument array, and eliminate duplicates.  */
462   if (numbered_arg_count > 1)
463     {
464       unsigned int i, j;
465       bool err;
466 
467       qsort (numbered, numbered_arg_count,
468 	     sizeof (struct numbered_arg), numbered_arg_compare);
469 
470       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
471       err = false;
472       for (i = j = 0; i < numbered_arg_count; i++)
473 	if (j > 0 && numbered[i].number == numbered[j-1].number)
474 	  {
475 	    enum format_arg_type type1 = numbered[i].type;
476 	    enum format_arg_type type2 = numbered[j-1].type;
477 	    enum format_arg_type type_both;
478 
479 	    if (type1 == type2)
480 	      type_both = type1;
481 	    else
482 	      {
483 		/* Incompatible types.  */
484 		type_both = FAT_NONE;
485 		if (!err)
486 		  *invalid_reason =
487 		    INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
488 		err = true;
489 	      }
490 
491 	    numbered[j-1].type = type_both;
492 	  }
493 	else
494 	  {
495 	    if (j < i)
496 	      {
497 		numbered[j].number = numbered[i].number;
498 		numbered[j].type = numbered[i].type;
499 	      }
500 	    j++;
501 	  }
502       numbered_arg_count = j;
503       if (err)
504 	/* *invalid_reason has already been set above.  */
505 	goto bad_format;
506     }
507 
508   result = (struct spec *) xmalloc (sizeof (struct spec));
509   result->directives = directives;
510   result->numbered_arg_count = numbered_arg_count;
511   result->allocated = allocated;
512   result->numbered = numbered;
513   return result;
514 
515  bad_format:
516   if (numbered != NULL)
517     free (numbered);
518   return NULL;
519 }
520 
521 static void
format_free(void * descr)522 format_free (void *descr)
523 {
524   struct spec *spec = (struct spec *) descr;
525 
526   if (spec->numbered != NULL)
527     free (spec->numbered);
528   free (spec);
529 }
530 
531 static int
format_get_number_of_directives(void * descr)532 format_get_number_of_directives (void *descr)
533 {
534   struct spec *spec = (struct spec *) descr;
535 
536   return spec->directives;
537 }
538 
539 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)540 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
541 	      formatstring_error_logger_t error_logger,
542 	      const char *pretty_msgstr)
543 {
544   struct spec *spec1 = (struct spec *) msgid_descr;
545   struct spec *spec2 = (struct spec *) msgstr_descr;
546   bool err = false;
547 
548   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
549     {
550       unsigned int i, j;
551       unsigned int n1 = spec1->numbered_arg_count;
552       unsigned int n2 = spec2->numbered_arg_count;
553 
554       /* Check the argument names are the same.
555 	 Both arrays are sorted.  We search for the first difference.  */
556       for (i = 0, j = 0; i < n1 || j < n2; )
557 	{
558 	  int cmp = (i >= n1 ? 1 :
559 		     j >= n2 ? -1 :
560 		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
561 		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
562 		     0);
563 
564 	  if (cmp > 0)
565 	    {
566 	      if (error_logger)
567 		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
568 			      spec2->numbered[j].number, pretty_msgstr);
569 	      err = true;
570 	      break;
571 	    }
572 	  else if (cmp < 0)
573 	    {
574 	      if (equality)
575 		{
576 		  if (error_logger)
577 		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
578 				  spec1->numbered[i].number, pretty_msgstr);
579 		  err = true;
580 		  break;
581 		}
582 	      else
583 		i++;
584 	    }
585 	  else
586 	    j++, i++;
587 	}
588       /* Check the argument types are the same.  */
589       if (!err)
590 	for (i = 0, j = 0; j < n2; )
591 	  {
592 	    if (spec1->numbered[i].number == spec2->numbered[j].number)
593 	      {
594 		if (spec1->numbered[i].type != spec2->numbered[j].type)
595 		  {
596 		    if (error_logger)
597 		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
598 				    pretty_msgstr, spec2->numbered[j].number);
599 		    err = true;
600 		    break;
601 		  }
602 		j++, i++;
603 	      }
604 	    else
605 	      i++;
606 	  }
607     }
608 
609   return err;
610 }
611 
612 
613 struct formatstring_parser formatstring_perl =
614 {
615   format_parse,
616   format_free,
617   format_get_number_of_directives,
618   NULL,
619   format_check
620 };
621 
622 
623 #ifdef TEST
624 
625 /* Test program: Print the argument list specification returned by
626    format_parse for strings read from standard input.  */
627 
628 #include <stdio.h>
629 #include "getline.h"
630 
631 static void
format_print(void * descr)632 format_print (void *descr)
633 {
634   struct spec *spec = (struct spec *) descr;
635   unsigned int last;
636   unsigned int i;
637 
638   if (spec == NULL)
639     {
640       printf ("INVALID");
641       return;
642     }
643 
644   printf ("(");
645   last = 1;
646   for (i = 0; i < spec->numbered_arg_count; i++)
647     {
648       unsigned int number = spec->numbered[i].number;
649 
650       if (i > 0)
651 	printf (" ");
652       if (number < last)
653 	abort ();
654       for (; last < number; last++)
655 	printf ("_ ");
656       if (spec->numbered[i].type & FAT_UNSIGNED)
657 	printf ("[unsigned]");
658       switch (spec->numbered[i].type & FAT_SIZE_MASK)
659 	{
660 	case 0:
661 	  break;
662 	case FAT_SIZE_SHORT:
663 	  printf ("[short]");
664 	  break;
665 	case FAT_SIZE_V:
666 	  printf ("[IV]");
667 	  break;
668 	case FAT_SIZE_PTR:
669 	  printf ("[PTR]");
670 	  break;
671 	case FAT_SIZE_LONG:
672 	  printf ("[long]");
673 	  break;
674 	case FAT_SIZE_LONGLONG:
675 	  printf ("[long long]");
676 	  break;
677 	default:
678 	  abort ();
679 	}
680       switch (spec->numbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK))
681 	{
682 	case FAT_INTEGER:
683 	  printf ("i");
684 	  break;
685 	case FAT_DOUBLE:
686 	  printf ("f");
687 	  break;
688 	case FAT_CHAR:
689 	  printf ("c");
690 	  break;
691 	case FAT_STRING:
692 	  printf ("s");
693 	  break;
694 	case FAT_SCALAR_VECTOR:
695 	  printf ("sv");
696 	  break;
697 	case FAT_POINTER:
698 	  printf ("p");
699 	  break;
700 	case FAT_COUNT_POINTER:
701 	  printf ("n");
702 	  break;
703 	default:
704 	  abort ();
705 	}
706       last = number + 1;
707     }
708   printf (")");
709 }
710 
711 int
main()712 main ()
713 {
714   for (;;)
715     {
716       char *line = NULL;
717       size_t line_size = 0;
718       int line_len;
719       char *invalid_reason;
720       void *descr;
721 
722       line_len = getline (&line, &line_size, stdin);
723       if (line_len < 0)
724 	break;
725       if (line_len > 0 && line[line_len - 1] == '\n')
726 	line[--line_len] = '\0';
727 
728       invalid_reason = NULL;
729       descr = format_parse (line, false, &invalid_reason);
730 
731       format_print (descr);
732       printf ("\n");
733       if (descr == NULL)
734 	printf ("%s\n", invalid_reason);
735 
736       free (invalid_reason);
737       free (line);
738     }
739 
740   return 0;
741 }
742 
743 /*
744  * For Emacs M-x compile
745  * Local Variables:
746  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-perl.c ../lib/libgettextlib.la"
747  * End:
748  */
749 
750 #endif /* TEST */
751