1 /* PHP format strings.
2    Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2002.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* PHP format strings are described in phpdoc-4.0.6, file
36    phpdoc/manual/function.sprintf.html, and are implemented in
37    php-4.1.0/ext/standard/formatted_print.c.
38    A directive
39    - starts with '%' or '%m$' where m is a positive integer,
40    - is optionally followed by any of the characters '0', '-', ' ', or
41      "'<anychar>", each of which acts as a flag,
42    - is optionally followed by a width specification: a nonempty digit
43      sequence,
44    - is optionally followed by '.' and a precision specification: a nonempty
45      digit sequence,
46    - is optionally followed by a size specifier 'l', which is ignored,
47    - is finished by a specifier
48        - 's', that needs a string argument,
49        - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
50        - 'e', 'f', that need a floating-point argument,
51        - 'c', that needs a character argument.
52    Additionally there is the directive '%%', which takes no argument.
53    Numbered and unnumbered argument specifications can be used in the same
54    string.  Numbered argument specifications have no influence on the
55    "current argument index", that is incremented each time an argument is read.
56  */
57 
58 enum format_arg_type
59 {
60   FAT_INTEGER,
61   FAT_FLOAT,
62   FAT_CHARACTER,
63   FAT_STRING
64 };
65 
66 struct numbered_arg
67 {
68   unsigned int number;
69   enum format_arg_type type;
70 };
71 
72 struct spec
73 {
74   unsigned int directives;
75   unsigned int numbered_arg_count;
76   unsigned int allocated;
77   struct numbered_arg *numbered;
78 };
79 
80 /* Locale independent test for a decimal digit.
81    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
82    <ctype.h> isdigit must be an 'unsigned char'.)  */
83 #undef isdigit
84 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
85 
86 
87 static int
numbered_arg_compare(const void * p1,const void * p2)88 numbered_arg_compare (const void *p1, const void *p2)
89 {
90   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
91   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
92 
93   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
94 }
95 
96 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)97 format_parse (const char *format, bool translated, char **invalid_reason)
98 {
99   unsigned int directives;
100   unsigned int numbered_arg_count;
101   unsigned int allocated;
102   struct numbered_arg *numbered;
103   unsigned int unnumbered_arg_count;
104   struct spec *result;
105 
106   directives = 0;
107   numbered_arg_count = 0;
108   allocated = 0;
109   numbered = NULL;
110   unnumbered_arg_count = 0;
111 
112   for (; *format != '\0';)
113     if (*format++ == '%')
114       {
115 	/* A directive.  */
116 	directives++;
117 
118 	if (*format != '%')
119 	  {
120 	    /* A complex directive.  */
121 	    unsigned int number;
122 	    enum format_arg_type type;
123 
124 	    number = ++unnumbered_arg_count;
125 	    if (isdigit (*format))
126 	      {
127 		const char *f = format;
128 		unsigned int m = 0;
129 
130 		do
131 		  {
132 		    m = 10 * m + (*f - '0');
133 		    f++;
134 		  }
135 		while (isdigit (*f));
136 
137 		if (*f == '$')
138 		  {
139 		    if (m == 0)
140 		      {
141 			*invalid_reason = INVALID_ARGNO_0 (directives);
142 			goto bad_format;
143 		      }
144 		    number = m;
145 		    format = ++f;
146 		    --unnumbered_arg_count;
147 		  }
148 	      }
149 
150 	    /* Parse flags.  */
151 	    for (;;)
152 	      {
153 		if (*format == '0' || *format == '-' || *format == ' ')
154 		  format++;
155 		else if (*format == '\'')
156 		  {
157 		    format++;
158 		    if (*format == '\0')
159 		      {
160 			*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
161 			goto bad_format;
162 		      }
163 		    format++;
164 		  }
165 		else
166 		  break;
167 	      }
168 
169 	    /* Parse width.  */
170 	    if (isdigit (*format))
171 	      {
172 		do
173 		  format++;
174 		while (isdigit (*format));
175 	      }
176 
177 	    /* Parse precision.  */
178 	    if (*format == '.')
179 	      {
180 		format++;
181 
182 		if (isdigit (*format))
183 		  {
184 		    do
185 		      format++;
186 		    while (isdigit (*format));
187 		  }
188 		else
189 		  --format;	/* will jump to bad_format */
190 	      }
191 
192 	    /* Parse size.  */
193 	    if (*format == 'l')
194 	      format++;
195 
196 	    switch (*format)
197 	      {
198 	      case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
199 		type = FAT_INTEGER;
200 		break;
201 	      case 'e': case 'f':
202 		type = FAT_FLOAT;
203 		break;
204 	      case 'c':
205 		type = FAT_CHARACTER;
206 		break;
207 	      case 's':
208 		type = FAT_STRING;
209 		break;
210 	      default:
211 		*invalid_reason =
212 		  (*format == '\0'
213 		   ? INVALID_UNTERMINATED_DIRECTIVE ()
214 		   : INVALID_CONVERSION_SPECIFIER (directives, *format));
215 		goto bad_format;
216 	      }
217 
218 	    if (allocated == numbered_arg_count)
219 	      {
220 		allocated = 2 * allocated + 1;
221 		numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
222 	      }
223 	    numbered[numbered_arg_count].number = number;
224 	    numbered[numbered_arg_count].type = type;
225 	    numbered_arg_count++;
226 	  }
227 
228 	format++;
229       }
230 
231   /* Sort the numbered argument array, and eliminate duplicates.  */
232   if (numbered_arg_count > 1)
233     {
234       unsigned int i, j;
235       bool err;
236 
237       qsort (numbered, numbered_arg_count,
238 	     sizeof (struct numbered_arg), numbered_arg_compare);
239 
240       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
241       err = false;
242       for (i = j = 0; i < numbered_arg_count; i++)
243 	if (j > 0 && numbered[i].number == numbered[j-1].number)
244 	  {
245 	    enum format_arg_type type1 = numbered[i].type;
246 	    enum format_arg_type type2 = numbered[j-1].type;
247 	    enum format_arg_type type_both;
248 
249 	    if (type1 == type2)
250 	      type_both = type1;
251 	    else
252 	      {
253 		/* Incompatible types.  */
254 		type_both = type1;
255 		if (!err)
256 		  *invalid_reason =
257 		    INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
258 		err = true;
259 	      }
260 
261 	    numbered[j-1].type = type_both;
262 	  }
263 	else
264 	  {
265 	    if (j < i)
266 	      {
267 		numbered[j].number = numbered[i].number;
268 		numbered[j].type = numbered[i].type;
269 	      }
270 	    j++;
271 	  }
272       numbered_arg_count = j;
273       if (err)
274 	/* *invalid_reason has already been set above.  */
275 	goto bad_format;
276     }
277 
278   result = (struct spec *) xmalloc (sizeof (struct spec));
279   result->directives = directives;
280   result->numbered_arg_count = numbered_arg_count;
281   result->allocated = allocated;
282   result->numbered = numbered;
283   return result;
284 
285  bad_format:
286   if (numbered != NULL)
287     free (numbered);
288   return NULL;
289 }
290 
291 static void
format_free(void * descr)292 format_free (void *descr)
293 {
294   struct spec *spec = (struct spec *) descr;
295 
296   if (spec->numbered != NULL)
297     free (spec->numbered);
298   free (spec);
299 }
300 
301 static int
format_get_number_of_directives(void * descr)302 format_get_number_of_directives (void *descr)
303 {
304   struct spec *spec = (struct spec *) descr;
305 
306   return spec->directives;
307 }
308 
309 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)310 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
311 	      formatstring_error_logger_t error_logger,
312 	      const char *pretty_msgstr)
313 {
314   struct spec *spec1 = (struct spec *) msgid_descr;
315   struct spec *spec2 = (struct spec *) msgstr_descr;
316   bool err = false;
317 
318   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
319     {
320       unsigned int i, j;
321       unsigned int n1 = spec1->numbered_arg_count;
322       unsigned int n2 = spec2->numbered_arg_count;
323 
324       /* Check the argument names are the same.
325 	 Both arrays are sorted.  We search for the first difference.  */
326       for (i = 0, j = 0; i < n1 || j < n2; )
327 	{
328 	  int cmp = (i >= n1 ? 1 :
329 		     j >= n2 ? -1 :
330 		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
331 		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
332 		     0);
333 
334 	  if (cmp > 0)
335 	    {
336 	      if (error_logger)
337 		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
338 			      spec2->numbered[j].number, pretty_msgstr);
339 	      err = true;
340 	      break;
341 	    }
342 	  else if (cmp < 0)
343 	    {
344 	      if (equality)
345 		{
346 		  if (error_logger)
347 		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
348 				  spec1->numbered[i].number, pretty_msgstr);
349 		  err = true;
350 		  break;
351 		}
352 	      else
353 		i++;
354 	    }
355 	  else
356 	    j++, i++;
357 	}
358       /* Check the argument types are the same.  */
359       if (!err)
360 	for (i = 0, j = 0; j < n2; )
361 	  {
362 	    if (spec1->numbered[i].number == spec2->numbered[j].number)
363 	      {
364 		if (spec1->numbered[i].type != spec2->numbered[j].type)
365 		  {
366 		    if (error_logger)
367 		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
368 				    pretty_msgstr, spec2->numbered[j].number);
369 		    err = true;
370 		    break;
371 		  }
372 		j++, i++;
373 	      }
374 	    else
375 	      i++;
376 	  }
377     }
378 
379   return err;
380 }
381 
382 
383 struct formatstring_parser formatstring_php =
384 {
385   format_parse,
386   format_free,
387   format_get_number_of_directives,
388   NULL,
389   format_check
390 };
391 
392 
393 #ifdef TEST
394 
395 /* Test program: Print the argument list specification returned by
396    format_parse for strings read from standard input.  */
397 
398 #include <stdio.h>
399 #include "getline.h"
400 
401 static void
format_print(void * descr)402 format_print (void *descr)
403 {
404   struct spec *spec = (struct spec *) descr;
405   unsigned int last;
406   unsigned int i;
407 
408   if (spec == NULL)
409     {
410       printf ("INVALID");
411       return;
412     }
413 
414   printf ("(");
415   last = 1;
416   for (i = 0; i < spec->numbered_arg_count; i++)
417     {
418       unsigned int number = spec->numbered[i].number;
419 
420       if (i > 0)
421 	printf (" ");
422       if (number < last)
423 	abort ();
424       for (; last < number; last++)
425 	printf ("_ ");
426       switch (spec->numbered[i].type)
427 	{
428 	case FAT_INTEGER:
429 	  printf ("i");
430 	  break;
431 	case FAT_FLOAT:
432 	  printf ("f");
433 	  break;
434 	case FAT_CHARACTER:
435 	  printf ("c");
436 	  break;
437 	case FAT_STRING:
438 	  printf ("s");
439 	  break;
440 	default:
441 	  abort ();
442 	}
443       last = number + 1;
444     }
445   printf (")");
446 }
447 
448 int
main()449 main ()
450 {
451   for (;;)
452     {
453       char *line = NULL;
454       size_t line_size = 0;
455       int line_len;
456       char *invalid_reason;
457       void *descr;
458 
459       line_len = getline (&line, &line_size, stdin);
460       if (line_len < 0)
461 	break;
462       if (line_len > 0 && line[line_len - 1] == '\n')
463 	line[--line_len] = '\0';
464 
465       invalid_reason = NULL;
466       descr = format_parse (line, false, &invalid_reason);
467 
468       format_print (descr);
469       printf ("\n");
470       if (descr == NULL)
471 	printf ("%s\n", invalid_reason);
472 
473       free (invalid_reason);
474       free (line);
475     }
476 
477   return 0;
478 }
479 
480 /*
481  * For Emacs M-x compile
482  * Local Variables:
483  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-php.c ../lib/libgettextlib.la"
484  * End:
485  */
486 
487 #endif /* TEST */
488