1 /* awk format strings.
2 Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2002.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdbool.h>
24 #include <stdlib.h>
25
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32
33 #define _(str) gettext (str)
34
35 /* awk format strings are described in the gawk-3.1 documentation and
36 implemented in gawk-3.1.0/builtin.c: format_tree().
37 A directive
38 - starts with '%' or '%m$' where m is a positive integer,
39 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40 each of which acts as a flag,
41 - is optionally followed by a width specification: '*' (reads an argument)
42 or '*m$' or a nonempty digit sequence,
43 - is optionally followed by '.' and a precision specification: '*' (reads
44 an argument) or '*m$' or a nonempty digit sequence,
45 - is finished by a specifier
46 - '%', that needs no argument,
47 - 'c', that need a character argument,
48 - 's', that need a string argument,
49 - 'i', 'd', that need a signed integer argument,
50 - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
51 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
52 Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
53 be used in the same string.
54 */
55
56 enum format_arg_type
57 {
58 FAT_NONE,
59 FAT_CHARACTER,
60 FAT_STRING,
61 FAT_INTEGER,
62 FAT_UNSIGNED_INTEGER,
63 FAT_FLOAT
64 };
65
66 struct numbered_arg
67 {
68 unsigned int number;
69 enum format_arg_type type;
70 };
71
72 struct spec
73 {
74 unsigned int directives;
75 unsigned int numbered_arg_count;
76 unsigned int allocated;
77 struct numbered_arg *numbered;
78 };
79
80 /* Locale independent test for a decimal digit.
81 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
82 <ctype.h> isdigit must be an 'unsigned char'.) */
83 #undef isdigit
84 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
85
86
87 static int
numbered_arg_compare(const void * p1,const void * p2)88 numbered_arg_compare (const void *p1, const void *p2)
89 {
90 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
91 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
92
93 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
94 }
95
96 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)97 format_parse (const char *format, bool translated, char **invalid_reason)
98 {
99 struct spec spec;
100 unsigned int unnumbered_arg_count;
101 struct spec *result;
102
103 spec.directives = 0;
104 spec.numbered_arg_count = 0;
105 spec.allocated = 0;
106 spec.numbered = NULL;
107 unnumbered_arg_count = 0;
108
109 for (; *format != '\0';)
110 if (*format++ == '%')
111 {
112 /* A directive. */
113 unsigned int number = 0;
114 enum format_arg_type type;
115
116 spec.directives++;
117
118 if (isdigit (*format))
119 {
120 const char *f = format;
121 unsigned int m = 0;
122
123 do
124 {
125 m = 10 * m + (*f - '0');
126 f++;
127 }
128 while (isdigit (*f));
129
130 if (*f == '$')
131 {
132 if (m == 0)
133 {
134 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
135 goto bad_format;
136 }
137 number = m;
138 format = ++f;
139 }
140 }
141
142 /* Parse flags. */
143 while (*format == ' ' || *format == '+' || *format == '-'
144 || *format == '#' || *format == '0')
145 format++;
146
147 /* Parse width. */
148 if (*format == '*')
149 {
150 unsigned int width_number = 0;
151
152 format++;
153
154 if (isdigit (*format))
155 {
156 const char *f = format;
157 unsigned int m = 0;
158
159 do
160 {
161 m = 10 * m + (*f - '0');
162 f++;
163 }
164 while (isdigit (*f));
165
166 if (*f == '$')
167 {
168 if (m == 0)
169 {
170 *invalid_reason =
171 INVALID_WIDTH_ARGNO_0 (spec.directives);
172 goto bad_format;
173 }
174 width_number = m;
175 format = ++f;
176 }
177 }
178
179 if (width_number)
180 {
181 /* Numbered argument. */
182
183 /* Numbered and unnumbered specifications are exclusive. */
184 if (unnumbered_arg_count > 0)
185 {
186 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
187 goto bad_format;
188 }
189
190 if (spec.allocated == spec.numbered_arg_count)
191 {
192 spec.allocated = 2 * spec.allocated + 1;
193 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
194 }
195 spec.numbered[spec.numbered_arg_count].number = width_number;
196 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
197 spec.numbered_arg_count++;
198 }
199 else
200 {
201 /* Unnumbered argument. */
202
203 /* Numbered and unnumbered specifications are exclusive. */
204 if (spec.numbered_arg_count > 0)
205 {
206 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
207 goto bad_format;
208 }
209
210 if (spec.allocated == unnumbered_arg_count)
211 {
212 spec.allocated = 2 * spec.allocated + 1;
213 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
214 }
215 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
216 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
217 unnumbered_arg_count++;
218 }
219 }
220 else if (isdigit (*format))
221 {
222 do format++; while (isdigit (*format));
223 }
224
225 /* Parse precision. */
226 if (*format == '.')
227 {
228 format++;
229
230 if (*format == '*')
231 {
232 unsigned int precision_number = 0;
233
234 format++;
235
236 if (isdigit (*format))
237 {
238 const char *f = format;
239 unsigned int m = 0;
240
241 do
242 {
243 m = 10 * m + (*f - '0');
244 f++;
245 }
246 while (isdigit (*f));
247
248 if (*f == '$')
249 {
250 if (m == 0)
251 {
252 *invalid_reason =
253 INVALID_PRECISION_ARGNO_0 (spec.directives);
254 goto bad_format;
255 }
256 precision_number = m;
257 format = ++f;
258 }
259 }
260
261 if (precision_number)
262 {
263 /* Numbered argument. */
264
265 /* Numbered and unnumbered specifications are exclusive. */
266 if (unnumbered_arg_count > 0)
267 {
268 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
269 goto bad_format;
270 }
271
272 if (spec.allocated == spec.numbered_arg_count)
273 {
274 spec.allocated = 2 * spec.allocated + 1;
275 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
276 }
277 spec.numbered[spec.numbered_arg_count].number = precision_number;
278 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
279 spec.numbered_arg_count++;
280 }
281 else
282 {
283 /* Unnumbered argument. */
284
285 /* Numbered and unnumbered specifications are exclusive. */
286 if (spec.numbered_arg_count > 0)
287 {
288 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
289 goto bad_format;
290 }
291
292 if (spec.allocated == unnumbered_arg_count)
293 {
294 spec.allocated = 2 * spec.allocated + 1;
295 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
296 }
297 spec.numbered[unnumbered_arg_count].type = unnumbered_arg_count + 1;
298 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
299 unnumbered_arg_count++;
300 }
301 }
302 else if (isdigit (*format))
303 {
304 do format++; while (isdigit (*format));
305 }
306 }
307
308 switch (*format)
309 {
310 case '%':
311 type = FAT_NONE;
312 break;
313 case 'c':
314 type = FAT_CHARACTER;
315 break;
316 case 's':
317 type = FAT_STRING;
318 break;
319 case 'i': case 'd':
320 type = FAT_INTEGER;
321 break;
322 case 'u': case 'o': case 'x': case 'X':
323 type = FAT_UNSIGNED_INTEGER;
324 break;
325 case 'e': case 'E': case 'f': case 'g': case 'G':
326 type = FAT_FLOAT;
327 break;
328 default:
329 *invalid_reason =
330 (*format == '\0'
331 ? INVALID_UNTERMINATED_DIRECTIVE ()
332 : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
333 goto bad_format;
334 }
335
336 if (type != FAT_NONE)
337 {
338 if (number)
339 {
340 /* Numbered argument. */
341
342 /* Numbered and unnumbered specifications are exclusive. */
343 if (unnumbered_arg_count > 0)
344 {
345 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
346 goto bad_format;
347 }
348
349 if (spec.allocated == spec.numbered_arg_count)
350 {
351 spec.allocated = 2 * spec.allocated + 1;
352 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
353 }
354 spec.numbered[spec.numbered_arg_count].number = number;
355 spec.numbered[spec.numbered_arg_count].type = type;
356 spec.numbered_arg_count++;
357 }
358 else
359 {
360 /* Unnumbered argument. */
361
362 /* Numbered and unnumbered specifications are exclusive. */
363 if (spec.numbered_arg_count > 0)
364 {
365 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
366 goto bad_format;
367 }
368
369 if (spec.allocated == unnumbered_arg_count)
370 {
371 spec.allocated = 2 * spec.allocated + 1;
372 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
373 }
374 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
375 spec.numbered[unnumbered_arg_count].type = type;
376 unnumbered_arg_count++;
377 }
378 }
379
380 format++;
381 }
382
383 /* Convert the unnumbered argument array to numbered arguments. */
384 if (unnumbered_arg_count > 0)
385 spec.numbered_arg_count = unnumbered_arg_count;
386 /* Sort the numbered argument array, and eliminate duplicates. */
387 else if (spec.numbered_arg_count > 1)
388 {
389 unsigned int i, j;
390 bool err;
391
392 qsort (spec.numbered, spec.numbered_arg_count,
393 sizeof (struct numbered_arg), numbered_arg_compare);
394
395 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
396 err = false;
397 for (i = j = 0; i < spec.numbered_arg_count; i++)
398 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
399 {
400 enum format_arg_type type1 = spec.numbered[i].type;
401 enum format_arg_type type2 = spec.numbered[j-1].type;
402 enum format_arg_type type_both;
403
404 if (type1 == type2)
405 type_both = type1;
406 else
407 {
408 /* Incompatible types. */
409 type_both = FAT_NONE;
410 if (!err)
411 *invalid_reason =
412 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
413 err = true;
414 }
415
416 spec.numbered[j-1].type = type_both;
417 }
418 else
419 {
420 if (j < i)
421 {
422 spec.numbered[j].number = spec.numbered[i].number;
423 spec.numbered[j].type = spec.numbered[i].type;
424 }
425 j++;
426 }
427 spec.numbered_arg_count = j;
428 if (err)
429 /* *invalid_reason has already been set above. */
430 goto bad_format;
431 }
432
433 result = (struct spec *) xmalloc (sizeof (struct spec));
434 *result = spec;
435 return result;
436
437 bad_format:
438 if (spec.numbered != NULL)
439 free (spec.numbered);
440 return NULL;
441 }
442
443 static void
format_free(void * descr)444 format_free (void *descr)
445 {
446 struct spec *spec = (struct spec *) descr;
447
448 if (spec->numbered != NULL)
449 free (spec->numbered);
450 free (spec);
451 }
452
453 static int
format_get_number_of_directives(void * descr)454 format_get_number_of_directives (void *descr)
455 {
456 struct spec *spec = (struct spec *) descr;
457
458 return spec->directives;
459 }
460
461 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)462 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
463 formatstring_error_logger_t error_logger,
464 const char *pretty_msgstr)
465 {
466 struct spec *spec1 = (struct spec *) msgid_descr;
467 struct spec *spec2 = (struct spec *) msgstr_descr;
468 bool err = false;
469
470 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
471 {
472 unsigned int i, j;
473 unsigned int n1 = spec1->numbered_arg_count;
474 unsigned int n2 = spec2->numbered_arg_count;
475
476 /* Check the argument names are the same.
477 Both arrays are sorted. We search for the first difference. */
478 for (i = 0, j = 0; i < n1 || j < n2; )
479 {
480 int cmp = (i >= n1 ? 1 :
481 j >= n2 ? -1 :
482 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
483 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
484 0);
485
486 if (cmp > 0)
487 {
488 if (error_logger)
489 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
490 spec2->numbered[j].number, pretty_msgstr);
491 err = true;
492 break;
493 }
494 else if (cmp < 0)
495 {
496 if (equality)
497 {
498 if (error_logger)
499 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
500 spec1->numbered[i].number, pretty_msgstr);
501 err = true;
502 break;
503 }
504 else
505 i++;
506 }
507 else
508 j++, i++;
509 }
510 /* Check the argument types are the same. */
511 if (!err)
512 for (i = 0, j = 0; j < n2; )
513 {
514 if (spec1->numbered[i].number == spec2->numbered[j].number)
515 {
516 if (spec1->numbered[i].type != spec2->numbered[j].type)
517 {
518 if (error_logger)
519 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
520 pretty_msgstr, spec2->numbered[j].number);
521 err = true;
522 break;
523 }
524 j++, i++;
525 }
526 else
527 i++;
528 }
529 }
530
531 return err;
532 }
533
534
535 struct formatstring_parser formatstring_awk =
536 {
537 format_parse,
538 format_free,
539 format_get_number_of_directives,
540 NULL,
541 format_check
542 };
543
544
545 #ifdef TEST
546
547 /* Test program: Print the argument list specification returned by
548 format_parse for strings read from standard input. */
549
550 #include <stdio.h>
551 #include "getline.h"
552
553 static void
format_print(void * descr)554 format_print (void *descr)
555 {
556 struct spec *spec = (struct spec *) descr;
557 unsigned int last;
558 unsigned int i;
559
560 if (spec == NULL)
561 {
562 printf ("INVALID");
563 return;
564 }
565
566 printf ("(");
567 last = 1;
568 for (i = 0; i < spec->numbered_arg_count; i++)
569 {
570 unsigned int number = spec->numbered[i].number;
571
572 if (i > 0)
573 printf (" ");
574 if (number < last)
575 abort ();
576 for (; last < number; last++)
577 printf ("_ ");
578 switch (spec->numbered[i].type)
579 {
580 case FAT_CHARACTER:
581 printf ("c");
582 break;
583 case FAT_STRING:
584 printf ("s");
585 break;
586 case FAT_INTEGER:
587 printf ("i");
588 break;
589 case FAT_UNSIGNED_INTEGER:
590 printf ("[unsigned]i");
591 break;
592 case FAT_FLOAT:
593 printf ("f");
594 break;
595 default:
596 abort ();
597 }
598 last = number + 1;
599 }
600 printf (")");
601 }
602
603 int
main()604 main ()
605 {
606 for (;;)
607 {
608 char *line = NULL;
609 size_t line_size = 0;
610 int line_len;
611 char *invalid_reason;
612 void *descr;
613
614 line_len = getline (&line, &line_size, stdin);
615 if (line_len < 0)
616 break;
617 if (line_len > 0 && line[line_len - 1] == '\n')
618 line[--line_len] = '\0';
619
620 invalid_reason = NULL;
621 descr = format_parse (line, false, &invalid_reason);
622
623 format_print (descr);
624 printf ("\n");
625 if (descr == NULL)
626 printf ("%s\n", invalid_reason);
627
628 free (invalid_reason);
629 free (line);
630 }
631
632 return 0;
633 }
634
635 /*
636 * For Emacs M-x compile
637 * Local Variables:
638 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../lib/libgettextlib.la"
639 * End:
640 */
641
642 #endif /* TEST */
643