1 /* awk format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2002.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31
32 #define _(str) gettext (str)
33
34 /* awk format strings are described in the gawk-3.1 documentation and
35 implemented in gawk-3.1.0/builtin.c: format_tree().
36 A directive
37 - starts with '%' or '%m$' where m is a positive integer,
38 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
39 each of which acts as a flag,
40 - is optionally followed by a width specification: '*' (reads an argument)
41 or '*m$' or a nonempty digit sequence,
42 - is optionally followed by '.' and a precision specification: '*' (reads
43 an argument) or '*m$' or a nonempty digit sequence,
44 - is finished by a specifier
45 - '%', that needs no argument,
46 - 'c', that need a character argument,
47 - 's', that need a string argument,
48 - 'i', 'd', that need a signed integer argument,
49 - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
50 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
51 Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
52 be used in the same string.
53 */
54
55 enum format_arg_type
56 {
57 FAT_NONE,
58 FAT_CHARACTER,
59 FAT_STRING,
60 FAT_INTEGER,
61 FAT_UNSIGNED_INTEGER,
62 FAT_FLOAT
63 };
64
65 struct numbered_arg
66 {
67 unsigned int number;
68 enum format_arg_type type;
69 };
70
71 struct spec
72 {
73 unsigned int directives;
74 unsigned int numbered_arg_count;
75 struct numbered_arg *numbered;
76 };
77
78 /* Locale independent test for a decimal digit.
79 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
80 <ctype.h> isdigit must be an 'unsigned char'.) */
81 #undef isdigit
82 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
83
84
85 static int
numbered_arg_compare(const void * p1,const void * p2)86 numbered_arg_compare (const void *p1, const void *p2)
87 {
88 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
89 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
90
91 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
92 }
93
94 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)95 format_parse (const char *format, bool translated, char *fdi,
96 char **invalid_reason)
97 {
98 const char *const format_start = format;
99 struct spec spec;
100 unsigned int numbered_allocated;
101 unsigned int unnumbered_arg_count;
102 struct spec *result;
103
104 spec.directives = 0;
105 spec.numbered_arg_count = 0;
106 spec.numbered = NULL;
107 numbered_allocated = 0;
108 unnumbered_arg_count = 0;
109
110 for (; *format != '\0';)
111 if (*format++ == '%')
112 {
113 /* A directive. */
114 unsigned int number = 0;
115 enum format_arg_type type;
116
117 FDI_SET (format - 1, FMTDIR_START);
118 spec.directives++;
119
120 if (isdigit (*format))
121 {
122 const char *f = format;
123 unsigned int m = 0;
124
125 do
126 {
127 m = 10 * m + (*f - '0');
128 f++;
129 }
130 while (isdigit (*f));
131
132 if (*f == '$')
133 {
134 if (m == 0)
135 {
136 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
137 FDI_SET (f, FMTDIR_ERROR);
138 goto bad_format;
139 }
140 number = m;
141 format = ++f;
142 }
143 }
144
145 /* Parse flags. */
146 while (*format == ' ' || *format == '+' || *format == '-'
147 || *format == '#' || *format == '0')
148 format++;
149
150 /* Parse width. */
151 if (*format == '*')
152 {
153 unsigned int width_number = 0;
154
155 format++;
156
157 if (isdigit (*format))
158 {
159 const char *f = format;
160 unsigned int m = 0;
161
162 do
163 {
164 m = 10 * m + (*f - '0');
165 f++;
166 }
167 while (isdigit (*f));
168
169 if (*f == '$')
170 {
171 if (m == 0)
172 {
173 *invalid_reason =
174 INVALID_WIDTH_ARGNO_0 (spec.directives);
175 FDI_SET (f, FMTDIR_ERROR);
176 goto bad_format;
177 }
178 width_number = m;
179 format = ++f;
180 }
181 }
182
183 if (width_number)
184 {
185 /* Numbered argument. */
186
187 /* Numbered and unnumbered specifications are exclusive. */
188 if (unnumbered_arg_count > 0)
189 {
190 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
191 FDI_SET (format - 1, FMTDIR_ERROR);
192 goto bad_format;
193 }
194
195 if (numbered_allocated == spec.numbered_arg_count)
196 {
197 numbered_allocated = 2 * numbered_allocated + 1;
198 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
199 }
200 spec.numbered[spec.numbered_arg_count].number = width_number;
201 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
202 spec.numbered_arg_count++;
203 }
204 else
205 {
206 /* Unnumbered argument. */
207
208 /* Numbered and unnumbered specifications are exclusive. */
209 if (spec.numbered_arg_count > 0)
210 {
211 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
212 FDI_SET (format - 1, FMTDIR_ERROR);
213 goto bad_format;
214 }
215
216 if (numbered_allocated == unnumbered_arg_count)
217 {
218 numbered_allocated = 2 * numbered_allocated + 1;
219 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
220 }
221 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
222 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
223 unnumbered_arg_count++;
224 }
225 }
226 else if (isdigit (*format))
227 {
228 do format++; while (isdigit (*format));
229 }
230
231 /* Parse precision. */
232 if (*format == '.')
233 {
234 format++;
235
236 if (*format == '*')
237 {
238 unsigned int precision_number = 0;
239
240 format++;
241
242 if (isdigit (*format))
243 {
244 const char *f = format;
245 unsigned int m = 0;
246
247 do
248 {
249 m = 10 * m + (*f - '0');
250 f++;
251 }
252 while (isdigit (*f));
253
254 if (*f == '$')
255 {
256 if (m == 0)
257 {
258 *invalid_reason =
259 INVALID_PRECISION_ARGNO_0 (spec.directives);
260 FDI_SET (f, FMTDIR_ERROR);
261 goto bad_format;
262 }
263 precision_number = m;
264 format = ++f;
265 }
266 }
267
268 if (precision_number)
269 {
270 /* Numbered argument. */
271
272 /* Numbered and unnumbered specifications are exclusive. */
273 if (unnumbered_arg_count > 0)
274 {
275 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
276 FDI_SET (format - 1, FMTDIR_ERROR);
277 goto bad_format;
278 }
279
280 if (numbered_allocated == spec.numbered_arg_count)
281 {
282 numbered_allocated = 2 * numbered_allocated + 1;
283 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
284 }
285 spec.numbered[spec.numbered_arg_count].number = precision_number;
286 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
287 spec.numbered_arg_count++;
288 }
289 else
290 {
291 /* Unnumbered argument. */
292
293 /* Numbered and unnumbered specifications are exclusive. */
294 if (spec.numbered_arg_count > 0)
295 {
296 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
297 FDI_SET (format - 1, FMTDIR_ERROR);
298 goto bad_format;
299 }
300
301 if (numbered_allocated == unnumbered_arg_count)
302 {
303 numbered_allocated = 2 * numbered_allocated + 1;
304 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
305 }
306 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
307 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
308 unnumbered_arg_count++;
309 }
310 }
311 else if (isdigit (*format))
312 {
313 do format++; while (isdigit (*format));
314 }
315 }
316
317 switch (*format)
318 {
319 case '%':
320 type = FAT_NONE;
321 break;
322 case 'c':
323 type = FAT_CHARACTER;
324 break;
325 case 's':
326 type = FAT_STRING;
327 break;
328 case 'i': case 'd':
329 type = FAT_INTEGER;
330 break;
331 case 'u': case 'o': case 'x': case 'X':
332 type = FAT_UNSIGNED_INTEGER;
333 break;
334 case 'e': case 'E': case 'f': case 'g': case 'G':
335 type = FAT_FLOAT;
336 break;
337 default:
338 if (*format == '\0')
339 {
340 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
341 FDI_SET (format - 1, FMTDIR_ERROR);
342 }
343 else
344 {
345 *invalid_reason =
346 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
347 FDI_SET (format, FMTDIR_ERROR);
348 }
349 goto bad_format;
350 }
351
352 if (type != FAT_NONE)
353 {
354 if (number)
355 {
356 /* Numbered argument. */
357
358 /* Numbered and unnumbered specifications are exclusive. */
359 if (unnumbered_arg_count > 0)
360 {
361 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
362 FDI_SET (format, FMTDIR_ERROR);
363 goto bad_format;
364 }
365
366 if (numbered_allocated == spec.numbered_arg_count)
367 {
368 numbered_allocated = 2 * numbered_allocated + 1;
369 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
370 }
371 spec.numbered[spec.numbered_arg_count].number = number;
372 spec.numbered[spec.numbered_arg_count].type = type;
373 spec.numbered_arg_count++;
374 }
375 else
376 {
377 /* Unnumbered argument. */
378
379 /* Numbered and unnumbered specifications are exclusive. */
380 if (spec.numbered_arg_count > 0)
381 {
382 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
383 FDI_SET (format, FMTDIR_ERROR);
384 goto bad_format;
385 }
386
387 if (numbered_allocated == unnumbered_arg_count)
388 {
389 numbered_allocated = 2 * numbered_allocated + 1;
390 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
391 }
392 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
393 spec.numbered[unnumbered_arg_count].type = type;
394 unnumbered_arg_count++;
395 }
396 }
397
398 FDI_SET (format, FMTDIR_END);
399
400 format++;
401 }
402
403 /* Convert the unnumbered argument array to numbered arguments. */
404 if (unnumbered_arg_count > 0)
405 spec.numbered_arg_count = unnumbered_arg_count;
406 /* Sort the numbered argument array, and eliminate duplicates. */
407 else if (spec.numbered_arg_count > 1)
408 {
409 unsigned int i, j;
410 bool err;
411
412 qsort (spec.numbered, spec.numbered_arg_count,
413 sizeof (struct numbered_arg), numbered_arg_compare);
414
415 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
416 err = false;
417 for (i = j = 0; i < spec.numbered_arg_count; i++)
418 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
419 {
420 enum format_arg_type type1 = spec.numbered[i].type;
421 enum format_arg_type type2 = spec.numbered[j-1].type;
422 enum format_arg_type type_both;
423
424 if (type1 == type2)
425 type_both = type1;
426 else
427 {
428 /* Incompatible types. */
429 type_both = FAT_NONE;
430 if (!err)
431 *invalid_reason =
432 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
433 err = true;
434 }
435
436 spec.numbered[j-1].type = type_both;
437 }
438 else
439 {
440 if (j < i)
441 {
442 spec.numbered[j].number = spec.numbered[i].number;
443 spec.numbered[j].type = spec.numbered[i].type;
444 }
445 j++;
446 }
447 spec.numbered_arg_count = j;
448 if (err)
449 /* *invalid_reason has already been set above. */
450 goto bad_format;
451 }
452
453 result = XMALLOC (struct spec);
454 *result = spec;
455 return result;
456
457 bad_format:
458 if (spec.numbered != NULL)
459 free (spec.numbered);
460 return NULL;
461 }
462
463 static void
format_free(void * descr)464 format_free (void *descr)
465 {
466 struct spec *spec = (struct spec *) descr;
467
468 if (spec->numbered != NULL)
469 free (spec->numbered);
470 free (spec);
471 }
472
473 static int
format_get_number_of_directives(void * descr)474 format_get_number_of_directives (void *descr)
475 {
476 struct spec *spec = (struct spec *) descr;
477
478 return spec->directives;
479 }
480
481 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)482 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
483 formatstring_error_logger_t error_logger,
484 const char *pretty_msgid, const char *pretty_msgstr)
485 {
486 struct spec *spec1 = (struct spec *) msgid_descr;
487 struct spec *spec2 = (struct spec *) msgstr_descr;
488 bool err = false;
489
490 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
491 {
492 unsigned int i, j;
493 unsigned int n1 = spec1->numbered_arg_count;
494 unsigned int n2 = spec2->numbered_arg_count;
495
496 /* Check the argument names are the same.
497 Both arrays are sorted. We search for the first difference. */
498 for (i = 0, j = 0; i < n1 || j < n2; )
499 {
500 int cmp = (i >= n1 ? 1 :
501 j >= n2 ? -1 :
502 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
503 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
504 0);
505
506 if (cmp > 0)
507 {
508 if (error_logger)
509 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
510 spec2->numbered[j].number, pretty_msgstr,
511 pretty_msgid);
512 err = true;
513 break;
514 }
515 else if (cmp < 0)
516 {
517 if (equality)
518 {
519 if (error_logger)
520 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
521 spec1->numbered[i].number, pretty_msgstr);
522 err = true;
523 break;
524 }
525 else
526 i++;
527 }
528 else
529 j++, i++;
530 }
531 /* Check the argument types are the same. */
532 if (!err)
533 for (i = 0, j = 0; j < n2; )
534 {
535 if (spec1->numbered[i].number == spec2->numbered[j].number)
536 {
537 if (spec1->numbered[i].type != spec2->numbered[j].type)
538 {
539 if (error_logger)
540 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
541 pretty_msgid, pretty_msgstr,
542 spec2->numbered[j].number);
543 err = true;
544 break;
545 }
546 j++, i++;
547 }
548 else
549 i++;
550 }
551 }
552
553 return err;
554 }
555
556
557 struct formatstring_parser formatstring_awk =
558 {
559 format_parse,
560 format_free,
561 format_get_number_of_directives,
562 NULL,
563 format_check
564 };
565
566
567 #ifdef TEST
568
569 /* Test program: Print the argument list specification returned by
570 format_parse for strings read from standard input. */
571
572 #include <stdio.h>
573
574 static void
format_print(void * descr)575 format_print (void *descr)
576 {
577 struct spec *spec = (struct spec *) descr;
578 unsigned int last;
579 unsigned int i;
580
581 if (spec == NULL)
582 {
583 printf ("INVALID");
584 return;
585 }
586
587 printf ("(");
588 last = 1;
589 for (i = 0; i < spec->numbered_arg_count; i++)
590 {
591 unsigned int number = spec->numbered[i].number;
592
593 if (i > 0)
594 printf (" ");
595 if (number < last)
596 abort ();
597 for (; last < number; last++)
598 printf ("_ ");
599 switch (spec->numbered[i].type)
600 {
601 case FAT_CHARACTER:
602 printf ("c");
603 break;
604 case FAT_STRING:
605 printf ("s");
606 break;
607 case FAT_INTEGER:
608 printf ("i");
609 break;
610 case FAT_UNSIGNED_INTEGER:
611 printf ("[unsigned]i");
612 break;
613 case FAT_FLOAT:
614 printf ("f");
615 break;
616 default:
617 abort ();
618 }
619 last = number + 1;
620 }
621 printf (")");
622 }
623
624 int
main()625 main ()
626 {
627 for (;;)
628 {
629 char *line = NULL;
630 size_t line_size = 0;
631 int line_len;
632 char *invalid_reason;
633 void *descr;
634
635 line_len = getline (&line, &line_size, stdin);
636 if (line_len < 0)
637 break;
638 if (line_len > 0 && line[line_len - 1] == '\n')
639 line[--line_len] = '\0';
640
641 invalid_reason = NULL;
642 descr = format_parse (line, false, NULL, &invalid_reason);
643
644 format_print (descr);
645 printf ("\n");
646 if (descr == NULL)
647 printf ("%s\n", invalid_reason);
648
649 free (invalid_reason);
650 free (line);
651 }
652
653 return 0;
654 }
655
656 /*
657 * For Emacs M-x compile
658 * Local Variables:
659 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-awk.c ../gnulib-lib/libgettextlib.la"
660 * End:
661 */
662
663 #endif /* TEST */
664