1 /* GNU m4 -- A simple macro processor
2
3 Copyright (C) 1989-1994, 2006-2014, 2016-2017, 2020-2021 Free
4 Software Foundation, Inc.
5
6 This file is part of GNU M4.
7
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 /* printf like formatting for m4. */
23
24 #include "m4.h"
25 #include "xvasprintf.h"
26
27 /* Simple varargs substitute. We assume int and unsigned int are the
28 same size; likewise for long and unsigned long. */
29
30 /* Parse STR as an integer, reporting warnings. */
31 static int
arg_int(const char * str)32 arg_int (const char *str)
33 {
34 char *endp;
35 long value;
36 size_t len = strlen (str);
37
38 if (!len)
39 {
40 M4ERROR ((warning_status, 0, _("empty string treated as 0")));
41 return 0;
42 }
43 errno = 0;
44 value = strtol (str, &endp, 10);
45 if (endp - str - len)
46 M4ERROR ((warning_status, 0, _("non-numeric argument %s"), str));
47 else if (c_isspace (*str))
48 M4ERROR ((warning_status, 0, _("leading whitespace ignored")));
49 else if (errno == ERANGE || (int) value != value)
50 M4ERROR ((warning_status, 0, _("numeric overflow detected")));
51 return value;
52 }
53
54 /* Parse STR as a long, reporting warnings. */
55 static long
arg_long(const char * str)56 arg_long (const char *str)
57 {
58 char *endp;
59 long value;
60 size_t len = strlen (str);
61
62 if (!len)
63 {
64 M4ERROR ((warning_status, 0, _("empty string treated as 0")));
65 return 0L;
66 }
67 errno = 0;
68 value = strtol (str, &endp, 10);
69 if (endp - str - len)
70 M4ERROR ((warning_status, 0, _("non-numeric argument %s"), str));
71 else if (c_isspace (*str))
72 M4ERROR ((warning_status, 0, _("leading whitespace ignored")));
73 else if (errno == ERANGE)
74 M4ERROR ((warning_status, 0, _("numeric overflow detected")));
75 return value;
76 }
77
78 /* Parse STR as a double, reporting warnings. */
79 static double
arg_double(const char * str)80 arg_double (const char *str)
81 {
82 char *endp;
83 double value;
84 size_t len = strlen (str);
85
86 if (!len)
87 {
88 M4ERROR ((warning_status, 0, _("empty string treated as 0")));
89 return 0.0;
90 }
91 errno = 0;
92 value = strtod (str, &endp);
93 if (endp - str - len)
94 M4ERROR ((warning_status, 0, _("non-numeric argument %s"), str));
95 else if (c_isspace (*str))
96 M4ERROR ((warning_status, 0, _("leading whitespace ignored")));
97 else if (errno == ERANGE)
98 M4ERROR ((warning_status, 0, _("numeric overflow detected")));
99 return value;
100 }
101
102 #define ARG_INT(argc, argv) \
103 ((argc == 0) ? 0 : \
104 (--argc, argv++, arg_int (TOKEN_DATA_TEXT (argv[-1]))))
105
106 #define ARG_LONG(argc, argv) \
107 ((argc == 0) ? 0 : \
108 (--argc, argv++, arg_long (TOKEN_DATA_TEXT (argv[-1]))))
109
110 #define ARG_STR(argc, argv) \
111 ((argc == 0) ? "" : \
112 (--argc, argv++, TOKEN_DATA_TEXT (argv[-1])))
113
114 #define ARG_DOUBLE(argc, argv) \
115 ((argc == 0) ? 0 : \
116 (--argc, argv++, arg_double (TOKEN_DATA_TEXT (argv[-1]))))
117
118
119 /*------------------------------------------------------------------.
120 | The main formatting function. Output is placed on the obstack |
121 | OBS, the first argument in ARGV is the formatting string, and the |
122 | rest is arguments for the string. Warn rather than invoke |
123 | unspecified behavior in the underlying printf when we do not |
124 | recognize a format. |
125 `------------------------------------------------------------------*/
126
127 void
expand_format(struct obstack * obs,int argc,token_data ** argv)128 expand_format (struct obstack *obs, int argc, token_data **argv)
129 {
130 const char *f; /* format control string */
131 const char *fmt; /* position within f */
132 char fstart[] = "%'+- 0#*.*hhd"; /* current format spec */
133 char *p; /* position within fstart */
134 unsigned char c; /* a simple character */
135
136 /* Flags. */
137 char flags; /* flags to use in fstart */
138 enum {
139 THOUSANDS = 0x01, /* ' */
140 PLUS = 0x02, /* + */
141 MINUS = 0x04, /* - */
142 SPACE = 0x08, /* */
143 ZERO = 0x10, /* 0 */
144 ALT = 0x20, /* # */
145 DONE = 0x40 /* no more flags */
146 };
147
148 /* Precision specifiers. */
149 int width; /* minimum field width */
150 int prec; /* precision */
151 char lflag; /* long flag */
152
153 /* Specifiers we are willing to accept. ok['x'] implies %x is ok.
154 Various modifiers reduce the set, in order to avoid undefined
155 behavior in printf. */
156 char ok[128];
157
158 /* Buffer and stuff. */
159 char *str; /* malloc'd buffer of formatted text */
160 enum {CHAR, INT, LONG, DOUBLE, STR} datatype;
161
162 f = fmt = ARG_STR (argc, argv);
163 memset (ok, 0, sizeof ok);
164 while (1)
165 {
166 const char *percent = strchr (fmt, '%');
167 if (!percent)
168 {
169 obstack_grow (obs, fmt, strlen (fmt));
170 return;
171 }
172 obstack_grow (obs, fmt, percent - fmt);
173 fmt = percent + 1;
174
175 if (*fmt == '%')
176 {
177 obstack_1grow (obs, '%');
178 fmt++;
179 continue;
180 }
181
182 p = fstart + 1; /* % */
183 lflag = 0;
184 ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E']
185 = ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o']
186 = ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
187
188 /* Parse flags. */
189 flags = 0;
190 do
191 {
192 switch (*fmt)
193 {
194 case '\'': /* thousands separator */
195 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E']
196 = ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
197 flags |= THOUSANDS;
198 break;
199
200 case '+': /* mandatory sign */
201 ok['c'] = ok['o'] = ok['s'] = ok['u'] = ok['x'] = ok['X'] = 0;
202 flags |= PLUS;
203 break;
204
205 case ' ': /* space instead of positive sign */
206 ok['c'] = ok['o'] = ok['s'] = ok['u'] = ok['x'] = ok['X'] = 0;
207 flags |= SPACE;
208 break;
209
210 case '0': /* zero padding */
211 ok['c'] = ok['s'] = 0;
212 flags |= ZERO;
213 break;
214
215 case '#': /* alternate output */
216 ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
217 flags |= ALT;
218 break;
219
220 case '-': /* left justification */
221 flags |= MINUS;
222 break;
223
224 default:
225 flags |= DONE;
226 break;
227 }
228 }
229 while (!(flags & DONE) && fmt++);
230 if (flags & THOUSANDS)
231 *p++ = '\'';
232 if (flags & PLUS)
233 *p++ = '+';
234 if (flags & MINUS)
235 *p++ = '-';
236 if (flags & SPACE)
237 *p++ = ' ';
238 if (flags & ZERO)
239 *p++ = '0';
240 if (flags & ALT)
241 *p++ = '#';
242
243 /* Minimum field width; an explicit 0 is the same as not giving
244 the width. */
245 width = 0;
246 *p++ = '*';
247 if (*fmt == '*')
248 {
249 width = ARG_INT (argc, argv);
250 fmt++;
251 }
252 else
253 while (c_isdigit (*fmt))
254 {
255 width = 10 * width + *fmt - '0';
256 fmt++;
257 }
258
259 /* Maximum precision; an explicit negative precision is the same
260 as not giving the precision. A lone '.' is a precision of 0. */
261 prec = -1;
262 *p++ = '.';
263 *p++ = '*';
264 if (*fmt == '.')
265 {
266 ok['c'] = 0;
267 if (*(++fmt) == '*')
268 {
269 prec = ARG_INT (argc, argv);
270 ++fmt;
271 }
272 else
273 {
274 prec = 0;
275 while (c_isdigit (*fmt))
276 {
277 prec = 10 * prec + *fmt - '0';
278 fmt++;
279 }
280 }
281 }
282
283 /* Length modifiers. We don't yet recognize ll, j, t, or z. */
284 if (*fmt == 'l')
285 {
286 *p++ = 'l';
287 lflag = 1;
288 fmt++;
289 ok['c'] = ok['s'] = 0;
290 }
291 else if (*fmt == 'h')
292 {
293 *p++ = 'h';
294 fmt++;
295 if (*fmt == 'h')
296 {
297 *p++ = 'h';
298 fmt++;
299 }
300 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = ok['f'] = ok['F']
301 = ok['g'] = ok['G'] = ok['s'] = 0;
302 }
303
304 c = *fmt++;
305 if (sizeof ok <= c || !ok[c])
306 {
307 M4ERROR ((warning_status, 0,
308 _("Warning: unrecognized specifier in `%s'"), f));
309 if (c == '\0')
310 fmt--;
311 continue;
312 }
313
314 /* Specifiers. We don't yet recognize C, S, n, or p. */
315 switch (c)
316 {
317 case 'c':
318 datatype = CHAR;
319 p -= 2; /* %.*c is undefined, so undo the '.*'. */
320 break;
321
322 case 's':
323 datatype = STR;
324 break;
325
326 case 'd':
327 case 'i':
328 case 'o':
329 case 'x':
330 case 'X':
331 case 'u':
332 datatype = lflag ? LONG : INT;
333 break;
334
335 case 'a':
336 case 'A':
337 case 'e':
338 case 'E':
339 case 'f':
340 case 'F':
341 case 'g':
342 case 'G':
343 datatype = DOUBLE;
344 break;
345
346 default:
347 abort ();
348 }
349 *p++ = c;
350 *p = '\0';
351
352 /* Our constructed format string in fstart is safe. */
353 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
354 # pragma GCC diagnostic push
355 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
356 #endif
357
358 switch (datatype)
359 {
360 case CHAR:
361 str = xasprintf (fstart, width, ARG_INT(argc, argv));
362 break;
363
364 case INT:
365 str = xasprintf (fstart, width, prec, ARG_INT(argc, argv));
366 break;
367
368 case LONG:
369 str = xasprintf (fstart, width, prec, ARG_LONG(argc, argv));
370 break;
371
372 case DOUBLE:
373 str = xasprintf (fstart, width, prec, ARG_DOUBLE(argc, argv));
374 break;
375
376 case STR:
377 str = xasprintf (fstart, width, prec, ARG_STR(argc, argv));
378 break;
379
380 default:
381 abort();
382 }
383 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
384 # pragma GCC diagnostic pop
385 #endif
386
387 /* NULL was returned on failure, such as invalid format string. For
388 now, just silently ignore that bad specifier. */
389 if (str == NULL)
390 continue;
391
392 obstack_grow (obs, str, strlen (str));
393 free (str);
394 }
395 }
396