1 /* PHP format strings.
2 Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2002.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdbool.h>
24 #include <stdlib.h>
25
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32
33 #define _(str) gettext (str)
34
35 /* PHP format strings are described in phpdoc-4.0.6, file
36 phpdoc/manual/function.sprintf.html, and are implemented in
37 php-4.1.0/ext/standard/formatted_print.c.
38 A directive
39 - starts with '%' or '%m$' where m is a positive integer,
40 - is optionally followed by any of the characters '0', '-', ' ', or
41 "'<anychar>", each of which acts as a flag,
42 - is optionally followed by a width specification: a nonempty digit
43 sequence,
44 - is optionally followed by '.' and a precision specification: a nonempty
45 digit sequence,
46 - is optionally followed by a size specifier 'l', which is ignored,
47 - is finished by a specifier
48 - 's', that needs a string argument,
49 - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
50 - 'e', 'f', that need a floating-point argument,
51 - 'c', that needs a character argument.
52 Additionally there is the directive '%%', which takes no argument.
53 Numbered and unnumbered argument specifications can be used in the same
54 string. Numbered argument specifications have no influence on the
55 "current argument index", that is incremented each time an argument is read.
56 */
57
58 enum format_arg_type
59 {
60 FAT_INTEGER,
61 FAT_FLOAT,
62 FAT_CHARACTER,
63 FAT_STRING
64 };
65
66 struct numbered_arg
67 {
68 unsigned int number;
69 enum format_arg_type type;
70 };
71
72 struct spec
73 {
74 unsigned int directives;
75 unsigned int numbered_arg_count;
76 unsigned int allocated;
77 struct numbered_arg *numbered;
78 };
79
80 /* Locale independent test for a decimal digit.
81 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
82 <ctype.h> isdigit must be an 'unsigned char'.) */
83 #undef isdigit
84 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
85
86
87 static int
numbered_arg_compare(const void * p1,const void * p2)88 numbered_arg_compare (const void *p1, const void *p2)
89 {
90 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
91 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
92
93 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
94 }
95
96 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)97 format_parse (const char *format, bool translated, char **invalid_reason)
98 {
99 unsigned int directives;
100 unsigned int numbered_arg_count;
101 unsigned int allocated;
102 struct numbered_arg *numbered;
103 unsigned int unnumbered_arg_count;
104 struct spec *result;
105
106 directives = 0;
107 numbered_arg_count = 0;
108 allocated = 0;
109 numbered = NULL;
110 unnumbered_arg_count = 0;
111
112 for (; *format != '\0';)
113 if (*format++ == '%')
114 {
115 /* A directive. */
116 directives++;
117
118 if (*format != '%')
119 {
120 /* A complex directive. */
121 unsigned int number;
122 enum format_arg_type type;
123
124 number = ++unnumbered_arg_count;
125 if (isdigit (*format))
126 {
127 const char *f = format;
128 unsigned int m = 0;
129
130 do
131 {
132 m = 10 * m + (*f - '0');
133 f++;
134 }
135 while (isdigit (*f));
136
137 if (*f == '$')
138 {
139 if (m == 0)
140 {
141 *invalid_reason = INVALID_ARGNO_0 (directives);
142 goto bad_format;
143 }
144 number = m;
145 format = ++f;
146 --unnumbered_arg_count;
147 }
148 }
149
150 /* Parse flags. */
151 for (;;)
152 {
153 if (*format == '0' || *format == '-' || *format == ' ')
154 format++;
155 else if (*format == '\'')
156 {
157 format++;
158 if (*format == '\0')
159 {
160 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
161 goto bad_format;
162 }
163 format++;
164 }
165 else
166 break;
167 }
168
169 /* Parse width. */
170 if (isdigit (*format))
171 {
172 do
173 format++;
174 while (isdigit (*format));
175 }
176
177 /* Parse precision. */
178 if (*format == '.')
179 {
180 format++;
181
182 if (isdigit (*format))
183 {
184 do
185 format++;
186 while (isdigit (*format));
187 }
188 else
189 --format; /* will jump to bad_format */
190 }
191
192 /* Parse size. */
193 if (*format == 'l')
194 format++;
195
196 switch (*format)
197 {
198 case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
199 type = FAT_INTEGER;
200 break;
201 case 'e': case 'f':
202 type = FAT_FLOAT;
203 break;
204 case 'c':
205 type = FAT_CHARACTER;
206 break;
207 case 's':
208 type = FAT_STRING;
209 break;
210 default:
211 *invalid_reason =
212 (*format == '\0'
213 ? INVALID_UNTERMINATED_DIRECTIVE ()
214 : INVALID_CONVERSION_SPECIFIER (directives, *format));
215 goto bad_format;
216 }
217
218 if (allocated == numbered_arg_count)
219 {
220 allocated = 2 * allocated + 1;
221 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
222 }
223 numbered[numbered_arg_count].number = number;
224 numbered[numbered_arg_count].type = type;
225 numbered_arg_count++;
226 }
227
228 format++;
229 }
230
231 /* Sort the numbered argument array, and eliminate duplicates. */
232 if (numbered_arg_count > 1)
233 {
234 unsigned int i, j;
235 bool err;
236
237 qsort (numbered, numbered_arg_count,
238 sizeof (struct numbered_arg), numbered_arg_compare);
239
240 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
241 err = false;
242 for (i = j = 0; i < numbered_arg_count; i++)
243 if (j > 0 && numbered[i].number == numbered[j-1].number)
244 {
245 enum format_arg_type type1 = numbered[i].type;
246 enum format_arg_type type2 = numbered[j-1].type;
247 enum format_arg_type type_both;
248
249 if (type1 == type2)
250 type_both = type1;
251 else
252 {
253 /* Incompatible types. */
254 type_both = type1;
255 if (!err)
256 *invalid_reason =
257 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
258 err = true;
259 }
260
261 numbered[j-1].type = type_both;
262 }
263 else
264 {
265 if (j < i)
266 {
267 numbered[j].number = numbered[i].number;
268 numbered[j].type = numbered[i].type;
269 }
270 j++;
271 }
272 numbered_arg_count = j;
273 if (err)
274 /* *invalid_reason has already been set above. */
275 goto bad_format;
276 }
277
278 result = (struct spec *) xmalloc (sizeof (struct spec));
279 result->directives = directives;
280 result->numbered_arg_count = numbered_arg_count;
281 result->allocated = allocated;
282 result->numbered = numbered;
283 return result;
284
285 bad_format:
286 if (numbered != NULL)
287 free (numbered);
288 return NULL;
289 }
290
291 static void
format_free(void * descr)292 format_free (void *descr)
293 {
294 struct spec *spec = (struct spec *) descr;
295
296 if (spec->numbered != NULL)
297 free (spec->numbered);
298 free (spec);
299 }
300
301 static int
format_get_number_of_directives(void * descr)302 format_get_number_of_directives (void *descr)
303 {
304 struct spec *spec = (struct spec *) descr;
305
306 return spec->directives;
307 }
308
309 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)310 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
311 formatstring_error_logger_t error_logger,
312 const char *pretty_msgstr)
313 {
314 struct spec *spec1 = (struct spec *) msgid_descr;
315 struct spec *spec2 = (struct spec *) msgstr_descr;
316 bool err = false;
317
318 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
319 {
320 unsigned int i, j;
321 unsigned int n1 = spec1->numbered_arg_count;
322 unsigned int n2 = spec2->numbered_arg_count;
323
324 /* Check the argument names are the same.
325 Both arrays are sorted. We search for the first difference. */
326 for (i = 0, j = 0; i < n1 || j < n2; )
327 {
328 int cmp = (i >= n1 ? 1 :
329 j >= n2 ? -1 :
330 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
331 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
332 0);
333
334 if (cmp > 0)
335 {
336 if (error_logger)
337 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
338 spec2->numbered[j].number, pretty_msgstr);
339 err = true;
340 break;
341 }
342 else if (cmp < 0)
343 {
344 if (equality)
345 {
346 if (error_logger)
347 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
348 spec1->numbered[i].number, pretty_msgstr);
349 err = true;
350 break;
351 }
352 else
353 i++;
354 }
355 else
356 j++, i++;
357 }
358 /* Check the argument types are the same. */
359 if (!err)
360 for (i = 0, j = 0; j < n2; )
361 {
362 if (spec1->numbered[i].number == spec2->numbered[j].number)
363 {
364 if (spec1->numbered[i].type != spec2->numbered[j].type)
365 {
366 if (error_logger)
367 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
368 pretty_msgstr, spec2->numbered[j].number);
369 err = true;
370 break;
371 }
372 j++, i++;
373 }
374 else
375 i++;
376 }
377 }
378
379 return err;
380 }
381
382
383 struct formatstring_parser formatstring_php =
384 {
385 format_parse,
386 format_free,
387 format_get_number_of_directives,
388 NULL,
389 format_check
390 };
391
392
393 #ifdef TEST
394
395 /* Test program: Print the argument list specification returned by
396 format_parse for strings read from standard input. */
397
398 #include <stdio.h>
399 #include "getline.h"
400
401 static void
format_print(void * descr)402 format_print (void *descr)
403 {
404 struct spec *spec = (struct spec *) descr;
405 unsigned int last;
406 unsigned int i;
407
408 if (spec == NULL)
409 {
410 printf ("INVALID");
411 return;
412 }
413
414 printf ("(");
415 last = 1;
416 for (i = 0; i < spec->numbered_arg_count; i++)
417 {
418 unsigned int number = spec->numbered[i].number;
419
420 if (i > 0)
421 printf (" ");
422 if (number < last)
423 abort ();
424 for (; last < number; last++)
425 printf ("_ ");
426 switch (spec->numbered[i].type)
427 {
428 case FAT_INTEGER:
429 printf ("i");
430 break;
431 case FAT_FLOAT:
432 printf ("f");
433 break;
434 case FAT_CHARACTER:
435 printf ("c");
436 break;
437 case FAT_STRING:
438 printf ("s");
439 break;
440 default:
441 abort ();
442 }
443 last = number + 1;
444 }
445 printf (")");
446 }
447
448 int
main()449 main ()
450 {
451 for (;;)
452 {
453 char *line = NULL;
454 size_t line_size = 0;
455 int line_len;
456 char *invalid_reason;
457 void *descr;
458
459 line_len = getline (&line, &line_size, stdin);
460 if (line_len < 0)
461 break;
462 if (line_len > 0 && line[line_len - 1] == '\n')
463 line[--line_len] = '\0';
464
465 invalid_reason = NULL;
466 descr = format_parse (line, false, &invalid_reason);
467
468 format_print (descr);
469 printf ("\n");
470 if (descr == NULL)
471 printf ("%s\n", invalid_reason);
472
473 free (invalid_reason);
474 free (line);
475 }
476
477 return 0;
478 }
479
480 /*
481 * For Emacs M-x compile
482 * Local Variables:
483 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-php.c ../lib/libgettextlib.la"
484 * End:
485 */
486
487 #endif /* TEST */
488