1 /* awk format strings.
2    Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2002.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* awk format strings are described in the gawk-3.1 documentation and
36    implemented in gawk-3.1.0/builtin.c: format_tree().
37    A directive
38    - starts with '%' or '%m$' where m is a positive integer,
39    - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40      each of which acts as a flag,
41    - is optionally followed by a width specification: '*' (reads an argument)
42      or '*m$' or a nonempty digit sequence,
43    - is optionally followed by '.' and a precision specification: '*' (reads
44      an argument) or '*m$' or a nonempty digit sequence,
45    - is finished by a specifier
46        - '%', that needs no argument,
47        - 'c', that need a character argument,
48        - 's', that need a string argument,
49        - 'i', 'd', that need a signed integer argument,
50        - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
51        - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
52    Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
53    be used in the same string.
54  */
55 
56 enum format_arg_type
57 {
58   FAT_NONE,
59   FAT_CHARACTER,
60   FAT_STRING,
61   FAT_INTEGER,
62   FAT_UNSIGNED_INTEGER,
63   FAT_FLOAT
64 };
65 
66 struct numbered_arg
67 {
68   unsigned int number;
69   enum format_arg_type type;
70 };
71 
72 struct spec
73 {
74   unsigned int directives;
75   unsigned int numbered_arg_count;
76   unsigned int allocated;
77   struct numbered_arg *numbered;
78 };
79 
80 /* Locale independent test for a decimal digit.
81    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
82    <ctype.h> isdigit must be an 'unsigned char'.)  */
83 #undef isdigit
84 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
85 
86 
87 static int
numbered_arg_compare(const void * p1,const void * p2)88 numbered_arg_compare (const void *p1, const void *p2)
89 {
90   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
91   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
92 
93   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
94 }
95 
96 static void *
format_parse(const char * format,bool translated,char ** invalid_reason)97 format_parse (const char *format, bool translated, char **invalid_reason)
98 {
99   struct spec spec;
100   unsigned int unnumbered_arg_count;
101   struct spec *result;
102 
103   spec.directives = 0;
104   spec.numbered_arg_count = 0;
105   spec.allocated = 0;
106   spec.numbered = NULL;
107   unnumbered_arg_count = 0;
108 
109   for (; *format != '\0';)
110     if (*format++ == '%')
111       {
112 	/* A directive.  */
113 	unsigned int number = 0;
114 	enum format_arg_type type;
115 
116 	spec.directives++;
117 
118 	if (isdigit (*format))
119 	  {
120 	    const char *f = format;
121 	    unsigned int m = 0;
122 
123 	    do
124 	      {
125 		m = 10 * m + (*f - '0');
126 		f++;
127 	      }
128 	    while (isdigit (*f));
129 
130 	    if (*f == '$')
131 	      {
132 		if (m == 0)
133 		  {
134 		    *invalid_reason = INVALID_ARGNO_0 (spec.directives);
135 		    goto bad_format;
136 		  }
137 		number = m;
138 		format = ++f;
139 	      }
140 	  }
141 
142 	/* Parse flags.  */
143 	while (*format == ' ' || *format == '+' || *format == '-'
144 	       || *format == '#' || *format == '0')
145 	  format++;
146 
147 	/* Parse width.  */
148 	if (*format == '*')
149 	  {
150 	    unsigned int width_number = 0;
151 
152 	    format++;
153 
154 	    if (isdigit (*format))
155 	      {
156 		const char *f = format;
157 		unsigned int m = 0;
158 
159 		do
160 		  {
161 		    m = 10 * m + (*f - '0');
162 		    f++;
163 		  }
164 		while (isdigit (*f));
165 
166 		if (*f == '$')
167 		  {
168 		    if (m == 0)
169 		      {
170 			*invalid_reason =
171 			  INVALID_WIDTH_ARGNO_0 (spec.directives);
172 			goto bad_format;
173 		      }
174 		    width_number = m;
175 		    format = ++f;
176 		  }
177 	      }
178 
179 	    if (width_number)
180 	      {
181 		/* Numbered argument.  */
182 
183 		/* Numbered and unnumbered specifications are exclusive.  */
184 		if (unnumbered_arg_count > 0)
185 		  {
186 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
187 		    goto bad_format;
188 		  }
189 
190 		if (spec.allocated == spec.numbered_arg_count)
191 		  {
192 		    spec.allocated = 2 * spec.allocated + 1;
193 		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
194 		  }
195 		spec.numbered[spec.numbered_arg_count].number = width_number;
196 		spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
197 		spec.numbered_arg_count++;
198 	      }
199 	    else
200 	      {
201 		/* Unnumbered argument.  */
202 
203 		/* Numbered and unnumbered specifications are exclusive.  */
204 		if (spec.numbered_arg_count > 0)
205 		  {
206 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
207 		    goto bad_format;
208 		  }
209 
210 		if (spec.allocated == unnumbered_arg_count)
211 		  {
212 		    spec.allocated = 2 * spec.allocated + 1;
213 		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
214 		  }
215 		spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
216 		spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
217 		unnumbered_arg_count++;
218 	      }
219 	  }
220 	else if (isdigit (*format))
221 	  {
222 	    do format++; while (isdigit (*format));
223 	  }
224 
225 	/* Parse precision.  */
226 	if (*format == '.')
227 	  {
228 	    format++;
229 
230 	    if (*format == '*')
231 	      {
232 		unsigned int precision_number = 0;
233 
234 		format++;
235 
236 		if (isdigit (*format))
237 		  {
238 		    const char *f = format;
239 		    unsigned int m = 0;
240 
241 		    do
242 		      {
243 			m = 10 * m + (*f - '0');
244 			f++;
245 		      }
246 		    while (isdigit (*f));
247 
248 		    if (*f == '$')
249 		      {
250 			if (m == 0)
251 			  {
252 			    *invalid_reason =
253 			      INVALID_PRECISION_ARGNO_0 (spec.directives);
254 			    goto bad_format;
255 			  }
256 			precision_number = m;
257 			format = ++f;
258 		      }
259 		  }
260 
261 		if (precision_number)
262 		  {
263 		    /* Numbered argument.  */
264 
265 		    /* Numbered and unnumbered specifications are exclusive.  */
266 		    if (unnumbered_arg_count > 0)
267 		      {
268 			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
269 			goto bad_format;
270 		      }
271 
272 		    if (spec.allocated == spec.numbered_arg_count)
273 		      {
274 			spec.allocated = 2 * spec.allocated + 1;
275 			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
276 		      }
277 		    spec.numbered[spec.numbered_arg_count].number = precision_number;
278 		    spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
279 		    spec.numbered_arg_count++;
280 		  }
281 		else
282 		  {
283 		    /* Unnumbered argument.  */
284 
285 		    /* Numbered and unnumbered specifications are exclusive.  */
286 		    if (spec.numbered_arg_count > 0)
287 		      {
288 			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
289 			goto bad_format;
290 		      }
291 
292 		    if (spec.allocated == unnumbered_arg_count)
293 		      {
294 			spec.allocated = 2 * spec.allocated + 1;
295 			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
296 		      }
297 		    spec.numbered[unnumbered_arg_count].type = unnumbered_arg_count + 1;
298 		    spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
299 		    unnumbered_arg_count++;
300 		  }
301 	      }
302 	    else if (isdigit (*format))
303 	      {
304 		do format++; while (isdigit (*format));
305 	      }
306 	  }
307 
308 	switch (*format)
309 	  {
310 	  case '%':
311 	    type = FAT_NONE;
312 	    break;
313 	  case 'c':
314 	    type = FAT_CHARACTER;
315 	    break;
316 	  case 's':
317 	    type = FAT_STRING;
318 	    break;
319 	  case 'i': case 'd':
320 	    type = FAT_INTEGER;
321 	    break;
322 	  case 'u': case 'o': case 'x': case 'X':
323 	    type = FAT_UNSIGNED_INTEGER;
324 	    break;
325 	  case 'e': case 'E': case 'f': case 'g': case 'G':
326 	    type = FAT_FLOAT;
327 	    break;
328 	  default:
329 	    *invalid_reason =
330 	      (*format == '\0'
331 	       ? INVALID_UNTERMINATED_DIRECTIVE ()
332 	       : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
333 	    goto bad_format;
334 	  }
335 
336 	if (type != FAT_NONE)
337 	  {
338 	    if (number)
339 	      {
340 		/* Numbered argument.  */
341 
342 		/* Numbered and unnumbered specifications are exclusive.  */
343 		if (unnumbered_arg_count > 0)
344 		  {
345 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
346 		    goto bad_format;
347 		  }
348 
349 		if (spec.allocated == spec.numbered_arg_count)
350 		  {
351 		    spec.allocated = 2 * spec.allocated + 1;
352 		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
353 		  }
354 		spec.numbered[spec.numbered_arg_count].number = number;
355 		spec.numbered[spec.numbered_arg_count].type = type;
356 		spec.numbered_arg_count++;
357 	      }
358 	    else
359 	      {
360 		/* Unnumbered argument.  */
361 
362 		/* Numbered and unnumbered specifications are exclusive.  */
363 		if (spec.numbered_arg_count > 0)
364 		  {
365 		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
366 		    goto bad_format;
367 		  }
368 
369 		if (spec.allocated == unnumbered_arg_count)
370 		  {
371 		    spec.allocated = 2 * spec.allocated + 1;
372 		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
373 		  }
374 		spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
375 		spec.numbered[unnumbered_arg_count].type = type;
376 		unnumbered_arg_count++;
377 	      }
378 	  }
379 
380 	format++;
381       }
382 
383   /* Convert the unnumbered argument array to numbered arguments.  */
384   if (unnumbered_arg_count > 0)
385     spec.numbered_arg_count = unnumbered_arg_count;
386   /* Sort the numbered argument array, and eliminate duplicates.  */
387   else if (spec.numbered_arg_count > 1)
388     {
389       unsigned int i, j;
390       bool err;
391 
392       qsort (spec.numbered, spec.numbered_arg_count,
393 	     sizeof (struct numbered_arg), numbered_arg_compare);
394 
395       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
396       err = false;
397       for (i = j = 0; i < spec.numbered_arg_count; i++)
398 	if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
399 	  {
400 	    enum format_arg_type type1 = spec.numbered[i].type;
401 	    enum format_arg_type type2 = spec.numbered[j-1].type;
402 	    enum format_arg_type type_both;
403 
404 	    if (type1 == type2)
405 	      type_both = type1;
406 	    else
407 	      {
408 		/* Incompatible types.  */
409 		type_both = FAT_NONE;
410 		if (!err)
411 		  *invalid_reason =
412 		    INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
413 		err = true;
414 	      }
415 
416 	    spec.numbered[j-1].type = type_both;
417 	  }
418 	else
419 	  {
420 	    if (j < i)
421 	      {
422 		spec.numbered[j].number = spec.numbered[i].number;
423 		spec.numbered[j].type = spec.numbered[i].type;
424 	      }
425 	    j++;
426 	  }
427       spec.numbered_arg_count = j;
428       if (err)
429 	/* *invalid_reason has already been set above.  */
430 	goto bad_format;
431     }
432 
433   result = (struct spec *) xmalloc (sizeof (struct spec));
434   *result = spec;
435   return result;
436 
437  bad_format:
438   if (spec.numbered != NULL)
439     free (spec.numbered);
440   return NULL;
441 }
442 
443 static void
format_free(void * descr)444 format_free (void *descr)
445 {
446   struct spec *spec = (struct spec *) descr;
447 
448   if (spec->numbered != NULL)
449     free (spec->numbered);
450   free (spec);
451 }
452 
453 static int
format_get_number_of_directives(void * descr)454 format_get_number_of_directives (void *descr)
455 {
456   struct spec *spec = (struct spec *) descr;
457 
458   return spec->directives;
459 }
460 
461 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgstr)462 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
463 	      formatstring_error_logger_t error_logger,
464 	      const char *pretty_msgstr)
465 {
466   struct spec *spec1 = (struct spec *) msgid_descr;
467   struct spec *spec2 = (struct spec *) msgstr_descr;
468   bool err = false;
469 
470   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
471     {
472       unsigned int i, j;
473       unsigned int n1 = spec1->numbered_arg_count;
474       unsigned int n2 = spec2->numbered_arg_count;
475 
476       /* Check the argument names are the same.
477 	 Both arrays are sorted.  We search for the first difference.  */
478       for (i = 0, j = 0; i < n1 || j < n2; )
479 	{
480 	  int cmp = (i >= n1 ? 1 :
481 		     j >= n2 ? -1 :
482 		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
483 		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
484 		     0);
485 
486 	  if (cmp > 0)
487 	    {
488 	      if (error_logger)
489 		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
490 			      spec2->numbered[j].number, pretty_msgstr);
491 	      err = true;
492 	      break;
493 	    }
494 	  else if (cmp < 0)
495 	    {
496 	      if (equality)
497 		{
498 		  if (error_logger)
499 		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
500 				  spec1->numbered[i].number, pretty_msgstr);
501 		  err = true;
502 		  break;
503 		}
504 	      else
505 		i++;
506 	    }
507 	  else
508 	    j++, i++;
509 	}
510       /* Check the argument types are the same.  */
511       if (!err)
512 	for (i = 0, j = 0; j < n2; )
513 	  {
514 	    if (spec1->numbered[i].number == spec2->numbered[j].number)
515 	      {
516 		if (spec1->numbered[i].type != spec2->numbered[j].type)
517 		  {
518 		    if (error_logger)
519 		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
520 				    pretty_msgstr, spec2->numbered[j].number);
521 		    err = true;
522 		    break;
523 		  }
524 		j++, i++;
525 	      }
526 	    else
527 	      i++;
528 	  }
529     }
530 
531   return err;
532 }
533 
534 
535 struct formatstring_parser formatstring_awk =
536 {
537   format_parse,
538   format_free,
539   format_get_number_of_directives,
540   NULL,
541   format_check
542 };
543 
544 
545 #ifdef TEST
546 
547 /* Test program: Print the argument list specification returned by
548    format_parse for strings read from standard input.  */
549 
550 #include <stdio.h>
551 #include "getline.h"
552 
553 static void
format_print(void * descr)554 format_print (void *descr)
555 {
556   struct spec *spec = (struct spec *) descr;
557   unsigned int last;
558   unsigned int i;
559 
560   if (spec == NULL)
561     {
562       printf ("INVALID");
563       return;
564     }
565 
566   printf ("(");
567   last = 1;
568   for (i = 0; i < spec->numbered_arg_count; i++)
569     {
570       unsigned int number = spec->numbered[i].number;
571 
572       if (i > 0)
573 	printf (" ");
574       if (number < last)
575 	abort ();
576       for (; last < number; last++)
577 	printf ("_ ");
578       switch (spec->numbered[i].type)
579 	{
580 	case FAT_CHARACTER:
581 	  printf ("c");
582 	  break;
583 	case FAT_STRING:
584 	  printf ("s");
585 	  break;
586 	case FAT_INTEGER:
587 	  printf ("i");
588 	  break;
589 	case FAT_UNSIGNED_INTEGER:
590 	  printf ("[unsigned]i");
591 	  break;
592 	case FAT_FLOAT:
593 	  printf ("f");
594 	  break;
595 	default:
596 	  abort ();
597 	}
598       last = number + 1;
599     }
600   printf (")");
601 }
602 
603 int
main()604 main ()
605 {
606   for (;;)
607     {
608       char *line = NULL;
609       size_t line_size = 0;
610       int line_len;
611       char *invalid_reason;
612       void *descr;
613 
614       line_len = getline (&line, &line_size, stdin);
615       if (line_len < 0)
616 	break;
617       if (line_len > 0 && line[line_len - 1] == '\n')
618 	line[--line_len] = '\0';
619 
620       invalid_reason = NULL;
621       descr = format_parse (line, false, &invalid_reason);
622 
623       format_print (descr);
624       printf ("\n");
625       if (descr == NULL)
626 	printf ("%s\n", invalid_reason);
627 
628       free (invalid_reason);
629       free (line);
630     }
631 
632   return 0;
633 }
634 
635 /*
636  * For Emacs M-x compile
637  * Local Variables:
638  * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../lib/libgettextlib.la"
639  * End:
640  */
641 
642 #endif /* TEST */
643