1 /* Output like sprintf to a buffer of specified size. -*- coding: utf-8 -*-
2 Also takes args differently: pass one pointer to the end
3 of the format string in addition to the format string itself.
4 Copyright (C) 1985, 2001-2021 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
20
21 /* If you think about replacing this with some similar standard C function of
22 the printf family (such as vsnprintf), please note that this function
23 supports the following Emacs-specific features:
24
25 . For %c conversions, it produces a string with the multibyte representation
26 of the (`int') argument, suitable for display in an Emacs buffer.
27
28 . For %s and %c, when field width is specified (e.g., %25s), it accounts for
29 the display width of each character, according to char-width-table. That
30 is, it does not assume that each character takes one column on display.
31 Nor does it assume that each character is a single byte.
32
33 . If the size of the buffer is not enough to produce the formatted string in
34 its entirety, it makes sure that truncation does not chop the last
35 character in the middle of its multibyte sequence, producing an invalid
36 sequence.
37
38 . It accepts a pointer to the end of the format string, so the format string
39 could include embedded null characters.
40
41 . It signals an error if the length of the formatted string is about to
42 overflow ptrdiff_t or size_t, to avoid producing strings longer than what
43 Emacs can handle.
44
45 OTOH, this function supports only a small subset of the standard C formatted
46 output facilities. E.g., %u is not supported, precision is ignored
47 in %s and %c conversions, and %lld does not necessarily work and
48 code should use something like %"pM"d with intmax_t instead.
49 (See below for the detailed documentation of what is supported.)
50 However, this is okay, as this function is supposed to be called
51 from 'error' and similar C functions, and thus does not need to
52 support all the features of 'Fformat_message', which is used by the
53 Lisp 'error' function. */
54
55 /* In the FORMAT argument this function supports ` and ' as directives
56 that output left and right quotes as per ‘text-quoting style’. It
57 also supports the following %-sequences:
58
59 %s means print a string argument.
60 %S is treated as %s, for loose compatibility with `Fformat_message'.
61 %d means print a `signed int' argument in decimal.
62 %o means print an `unsigned int' argument in octal.
63 %x means print an `unsigned int' argument in hex.
64 %e means print a `double' argument in exponential notation.
65 %f means print a `double' argument in decimal-point notation.
66 %g means print a `double' argument in exponential notation
67 or in decimal-point notation, depending on the value;
68 this is often (though not always) the shorter of the two notations.
69 %c means print a `signed int' argument as a single character.
70 %% means produce a literal % character.
71
72 A %-sequence other than %% may contain optional flags, width, precision,
73 and length, as follows:
74
75 %<flags><width><precision><length>character
76
77 where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
78 is empty or l or the value of the pD or pI or PRIdMAX (sans "d") macros.
79 A % that does not introduce a valid %-sequence causes undefined behavior.
80 Bytes in FORMAT other than % are copied through as-is.
81
82 The + flag character inserts a + before any positive number, while a space
83 inserts a space before any positive number; these flags only affect %d, %o,
84 %x, %e, %f, and %g sequences. The - and 0 flags affect the width specifier,
85 as described below. For signed numerical arguments only, the ` ' (space)
86 flag causes the result to be prefixed with a space character if it does not
87 start with a sign (+ or -).
88
89 The l (lower-case letter ell) length modifier is a `long' data type
90 modifier: it is supported for %d, %o, and %x conversions of integral
91 arguments, must immediately precede the conversion specifier, and means that
92 the respective argument is to be treated as `long int' or `unsigned long
93 int'. Similarly, the value of the pD macro means to use ptrdiff_t,
94 the value of the pI macro means to use EMACS_INT or EMACS_UINT, the
95 value of the PRIdMAX etc. macros means to use intmax_t or uintmax_t,
96 and the empty length modifier means `int' or `unsigned int'.
97
98 The width specifier supplies a lower limit for the length of the printed
99 representation. The padding, if any, normally goes on the left, but it goes
100 on the right if the - flag is present. The padding character is normally a
101 space, but (for numerical arguments only) it is 0 if the 0 flag is present.
102 The - flag takes precedence over the 0 flag.
103
104 For %e, %f, and %g sequences, the number after the "." in the precision
105 specifier says how many decimal places to show; if zero, the decimal point
106 itself is omitted. For %d, %o, and %x sequences, the precision specifies
107 the minimum number of digits to appear. Precision specifiers are
108 not supported for other %-sequences. */
109
110 #include <config.h>
111 #include <stdio.h>
112 #include <stdlib.h>
113 #include <float.h>
114 #include <unistd.h>
115 #include <limits.h>
116
117 #include "lisp.h"
118
119 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
120 don't have to include others because CHAR_HEAD_P does not contains
121 another macro. */
122 #include "character.h"
123
124 /* Enough to handle floating point formats with large numbers. */
125 enum { SIZE_BOUND_EXTRA = DBL_MAX_10_EXP + 50 };
126
127 /* Parse FMT as an unsigned decimal integer, putting its value into *VALUE.
128 Return the address of the first byte after the integer.
129 If FMT is not an integer, return FMT and store zero into *VALUE. */
130 static char const *
parse_format_integer(char const * fmt,int * value)131 parse_format_integer (char const *fmt, int *value)
132 {
133 int n = 0;
134 bool overflow = false;
135 for (; '0' <= *fmt && *fmt <= '9'; fmt++)
136 {
137 overflow |= INT_MULTIPLY_WRAPV (n, 10, &n);
138 overflow |= INT_ADD_WRAPV (n, *fmt - '0', &n);
139 }
140 if (overflow || min (PTRDIFF_MAX, SIZE_MAX) - SIZE_BOUND_EXTRA < n)
141 error ("Format width or precision too large");
142 *value = n;
143 return fmt;
144 }
145
146 /* Like doprnt, except FORMAT_END must be non-null.
147 Although this function is never exercised in current Emacs,
148 it is retained in case some future Emacs version
149 contains doprnt callers that need such formats.
150 Having a separate function helps GCC optimize doprnt better. */
151 static ptrdiff_t
doprnt_non_null_end(char * buffer,ptrdiff_t bufsize,char const * format,char const * format_end,va_list ap)152 doprnt_non_null_end (char *buffer, ptrdiff_t bufsize, char const *format,
153 char const *format_end, va_list ap)
154 {
155 USE_SAFE_ALLOCA;
156 ptrdiff_t fmtlen = format_end - format;
157 char *fmt = SAFE_ALLOCA (fmtlen + 1);
158 memcpy (fmt, format, fmtlen);
159 fmt[fmtlen] = 0;
160 ptrdiff_t nbytes = doprnt (buffer, bufsize, fmt, NULL, ap);
161 SAFE_FREE ();
162 return nbytes;
163 }
164
165 /* Generate output from a format-spec FORMAT,
166 terminated at either the first NUL or (if FORMAT_END is non-null
167 and there are no NUL bytes between FORMAT and FORMAT_END)
168 terminated at position FORMAT_END.
169 (*FORMAT_END is not part of the format, but must exist and be readable.)
170 Output goes in BUFFER, which has room for BUFSIZE chars.
171 BUFSIZE must be positive. If the output does not fit, truncate it
172 to fit and return BUFSIZE - 1; if this truncates a multibyte
173 sequence, store '\0' into the sequence's first byte.
174 Returns the number of bytes stored into BUFFER, excluding
175 the terminating null byte. Output is always null-terminated.
176 String arguments are passed as C strings.
177 Integers are passed as C integers.
178
179 FIXME: If FORMAT_END is not at a character boundary
180 doprnt_non_null_end will cut the string in the middle of the
181 character and the returned string will have an incomplete character
182 sequence at the end. We may prefer to cut at a character
183 boundary. */
184
185 ptrdiff_t
doprnt(char * buffer,ptrdiff_t bufsize,const char * format,const char * format_end,va_list ap)186 doprnt (char *buffer, ptrdiff_t bufsize, const char *format,
187 const char *format_end, va_list ap)
188 {
189 if (format_end)
190 return doprnt_non_null_end (buffer, bufsize, format, format_end, ap);
191
192 const char *fmt = format; /* Pointer into format string. */
193 char *bufptr = buffer; /* Pointer into output buffer. */
194
195 /* Use this for sprintf unless we need something really big. */
196 char tembuf[SIZE_BOUND_EXTRA + 50];
197
198 /* Size of sprintf_buffer. */
199 ptrdiff_t size_allocated = sizeof (tembuf);
200
201 /* Buffer to use for sprintf. Either tembuf or same as BIG_BUFFER. */
202 char *sprintf_buffer = tembuf;
203
204 /* Buffer we have got with malloc. */
205 char *big_buffer = NULL;
206
207 Lisp_Object quoting_style = Ftext_quoting_style ();
208
209 bufsize--;
210
211 /* Loop until end of format string or buffer full. */
212 while (*fmt && bufsize > 0)
213 {
214 char const *fmt0 = fmt;
215 char fmtchar = *fmt++;
216 if (fmtchar == '%')
217 {
218 ptrdiff_t width; /* Columns occupied by STRING on display. */
219 enum {
220 pDlen = sizeof pD - 1,
221 pIlen = sizeof pI - 1,
222 pMlen = sizeof PRIdMAX - 2,
223 maxmlen = max (max (1, pDlen), max (pIlen, pMlen))
224 };
225 enum {
226 no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier
227 } length_modifier = no_modifier;
228 static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen };
229 int mlen;
230 char charbuf[MAX_MULTIBYTE_LENGTH + 1]; /* Used for %c. */
231
232 /* Width and precision specified by this %-sequence. */
233 int wid = 0, prec = -1;
234
235 /* FMTSTAR will be a "%*.*X"-like version of this %-sequence.
236 Start by putting '%' into FMTSTAR. */
237 char fmtstar[sizeof "%-+ 0*.*d" + maxmlen];
238 char *string = fmtstar;
239 *string++ = '%';
240
241 /* Copy at most one instance of each flag into FMTSTAR. */
242 bool minusflag = false, plusflag = false, zeroflag = false,
243 spaceflag = false;
244 for (;; fmt++)
245 {
246 *string = *fmt;
247 switch (*fmt)
248 {
249 case '-': string += !minusflag; minusflag = true; continue;
250 case '+': string += !plusflag; plusflag = true; continue;
251 case ' ': string += !spaceflag; spaceflag = true; continue;
252 case '0': string += !zeroflag; zeroflag = true; continue;
253 }
254 break;
255 }
256
257 /* Parse width and precision, putting "*.*" into FMTSTAR. */
258 if ('1' <= *fmt && *fmt <= '9')
259 fmt = parse_format_integer (fmt, &wid);
260 if (*fmt == '.')
261 fmt = parse_format_integer (fmt + 1, &prec);
262 *string++ = '*';
263 *string++ = '.';
264 *string++ = '*';
265
266 /* Check for the length modifiers in textual length order, so
267 that longer modifiers override shorter ones. */
268 for (mlen = 1; mlen <= maxmlen; mlen++)
269 {
270 if (mlen == 1 && *fmt == 'l')
271 length_modifier = long_modifier;
272 if (mlen == pDlen && strncmp (fmt, pD, pDlen) == 0)
273 length_modifier = pD_modifier;
274 if (mlen == pIlen && strncmp (fmt, pI, pIlen) == 0)
275 length_modifier = pI_modifier;
276 if (mlen == pMlen && strncmp (fmt, PRIdMAX, pMlen) == 0)
277 length_modifier = pM_modifier;
278 }
279
280 /* Copy optional length modifier and conversion specifier
281 character into FMTSTAR, and append a NUL. */
282 mlen = modifier_len[length_modifier];
283 string = mempcpy (string, fmt, mlen + 1);
284 fmt += mlen;
285 *string = 0;
286
287 /* An idea of how much space we might need.
288 This might be a field width or a precision; e.g.
289 %1.1000f and %1000.1f both might need 1000+ bytes.
290 Make it large enough to handle floating point formats
291 with large numbers. */
292 ptrdiff_t size_bound = max (wid, prec) + SIZE_BOUND_EXTRA;
293
294 /* Make sure we have that much. */
295 if (size_bound > size_allocated)
296 {
297 if (big_buffer)
298 xfree (big_buffer);
299 big_buffer = xmalloc (size_bound);
300 sprintf_buffer = big_buffer;
301 size_allocated = size_bound;
302 }
303 int minlen = 0;
304 ptrdiff_t tem;
305 switch (*fmt++)
306 {
307 default:
308 error ("Invalid format operation %s", fmt0);
309
310 case 'd':
311 switch (length_modifier)
312 {
313 case no_modifier:
314 {
315 int v = va_arg (ap, int);
316 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
317 }
318 break;
319 case long_modifier:
320 {
321 long v = va_arg (ap, long);
322 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
323 }
324 break;
325 case pD_modifier:
326 signed_pD_modifier:
327 {
328 ptrdiff_t v = va_arg (ap, ptrdiff_t);
329 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
330 }
331 break;
332 case pI_modifier:
333 {
334 EMACS_INT v = va_arg (ap, EMACS_INT);
335 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
336 }
337 break;
338 case pM_modifier:
339 {
340 intmax_t v = va_arg (ap, intmax_t);
341 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
342 }
343 break;
344 default:
345 eassume (false);
346 }
347 /* Now copy into final output, truncating as necessary. */
348 string = sprintf_buffer;
349 goto doit;
350
351 case 'o':
352 case 'x':
353 switch (length_modifier)
354 {
355 case no_modifier:
356 {
357 unsigned v = va_arg (ap, unsigned);
358 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
359 }
360 break;
361 case long_modifier:
362 {
363 unsigned long v = va_arg (ap, unsigned long);
364 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
365 }
366 break;
367 case pD_modifier:
368 goto signed_pD_modifier;
369 case pI_modifier:
370 {
371 EMACS_UINT v = va_arg (ap, EMACS_UINT);
372 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
373 }
374 break;
375 case pM_modifier:
376 {
377 uintmax_t v = va_arg (ap, uintmax_t);
378 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
379 }
380 break;
381 default:
382 eassume (false);
383 }
384 /* Now copy into final output, truncating as necessary. */
385 string = sprintf_buffer;
386 goto doit;
387
388 case 'f':
389 case 'e':
390 case 'g':
391 {
392 double d = va_arg (ap, double);
393 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, d);
394 /* Now copy into final output, truncating as necessary. */
395 string = sprintf_buffer;
396 goto doit;
397 }
398
399 case 'S':
400 case 's':
401 minlen = minusflag ? -wid : wid;
402 string = va_arg (ap, char *);
403 tem = strnlen (string, STRING_BYTES_BOUND + 1);
404 if (tem == STRING_BYTES_BOUND + 1)
405 error ("String for %%s or %%S format is too long");
406 width = strwidth (string, tem);
407 goto doit1;
408
409 /* Copy string into final output, truncating if no room. */
410 doit:
411 eassert (0 <= tem);
412 /* Coming here means STRING contains ASCII only. */
413 if (STRING_BYTES_BOUND < tem)
414 error ("Format width or precision too large");
415 width = tem;
416 doit1:
417 /* We have already calculated:
418 TEM -- length of STRING,
419 WIDTH -- columns occupied by STRING when displayed, and
420 MINLEN -- minimum columns of the output. */
421 if (minlen > 0)
422 {
423 while (minlen > width && bufsize > 0)
424 {
425 *bufptr++ = ' ';
426 bufsize--;
427 minlen--;
428 }
429 minlen = 0;
430 }
431 if (tem > bufsize)
432 {
433 /* Truncate the string at character boundary. */
434 tem = bufsize;
435 do
436 {
437 tem--;
438 if (CHAR_HEAD_P (string[tem]))
439 {
440 if (BYTES_BY_CHAR_HEAD (string[tem]) <= bufsize - tem)
441 tem = bufsize;
442 break;
443 }
444 }
445 while (tem != 0);
446
447 memcpy (bufptr, string, tem);
448 bufptr[tem] = 0;
449 /* Trigger exit from the loop, but make sure we
450 return to the caller a value which will indicate
451 that the buffer was too small. */
452 bufptr += bufsize;
453 bufsize = 0;
454 continue;
455 }
456 memcpy (bufptr, string, tem);
457 bufptr += tem;
458 bufsize -= tem;
459 if (minlen < 0)
460 {
461 while (minlen < - width && bufsize > 0)
462 {
463 *bufptr++ = ' ';
464 bufsize--;
465 minlen++;
466 }
467 minlen = 0;
468 }
469 continue;
470
471 case 'c':
472 {
473 int chr = va_arg (ap, int);
474 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
475 string = charbuf;
476 string[tem] = 0;
477 width = strwidth (string, tem);
478 minlen = minusflag ? -wid : wid;
479 goto doit1;
480 }
481
482 case '%':
483 /* Treat this '%' as normal. */
484 break;
485 }
486 }
487
488 char const *src;
489 ptrdiff_t srclen;
490 if (EQ (quoting_style, Qcurve) && fmtchar == '`')
491 src = uLSQM, srclen = sizeof uLSQM - 1;
492 else if (EQ (quoting_style, Qcurve) && fmtchar == '\'')
493 src = uRSQM, srclen = sizeof uRSQM - 1;
494 else if (! LEADING_CODE_P (fmtchar))
495 {
496 if (EQ (quoting_style, Qstraight) && fmtchar == '`')
497 fmtchar = '\'';
498
499 *bufptr++ = fmtchar;
500 continue;
501 }
502 else
503 {
504 int charlen = BYTES_BY_CHAR_HEAD (fmtchar);
505 src = fmt0;
506
507 /* If the format string ends in the middle of a multibyte
508 character we don't want to skip over the NUL byte. */
509 for (srclen = 1 ; *(src + srclen) != 0 && srclen < charlen ; srclen++);
510
511 fmt = src + srclen;
512 }
513
514 if (bufsize < srclen)
515 {
516 /* Truncate, but return value that will signal to caller
517 that the buffer was too small. */
518 do
519 *bufptr++ = '\0';
520 while (--bufsize != 0);
521 }
522 else
523 {
524 do
525 *bufptr++ = *src++;
526 while (--srclen != 0);
527 }
528 }
529
530 /* If we had to malloc something, free it. */
531 xfree (big_buffer);
532
533 *bufptr = 0; /* Make sure our string ends with a '\0' */
534 return bufptr - buffer;
535 }
536
537 /* Format to an unbounded buffer BUF. This is like sprintf, except it
538 is not limited to returning an 'int' so it doesn't have a silly 2
539 GiB limit on typical 64-bit hosts. However, it is limited to the
540 Emacs-style formats that doprnt supports, and it requotes ` and '
541 as per ‘text-quoting-style’.
542
543 Return the number of bytes put into BUF, excluding the terminating
544 '\0'. */
545 ptrdiff_t
esprintf(char * buf,char const * format,...)546 esprintf (char *buf, char const *format, ...)
547 {
548 ptrdiff_t nbytes;
549 va_list ap;
550 va_start (ap, format);
551 nbytes = doprnt (buf, TYPE_MAXIMUM (ptrdiff_t), format, 0, ap);
552 va_end (ap);
553 return nbytes;
554 }
555
556 #if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT
557
558 /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF
559 and updating *BUFSIZE if the buffer is too small, and otherwise
560 behaving line esprintf. When reallocating, free *BUF unless it is
561 equal to NONHEAPBUF, and if BUFSIZE_MAX is nonnegative then signal
562 memory exhaustion instead of growing the buffer size past
563 BUFSIZE_MAX. */
564 ptrdiff_t
exprintf(char ** buf,ptrdiff_t * bufsize,char * nonheapbuf,ptrdiff_t bufsize_max,char const * format,...)565 exprintf (char **buf, ptrdiff_t *bufsize,
566 char *nonheapbuf, ptrdiff_t bufsize_max,
567 char const *format, ...)
568 {
569 ptrdiff_t nbytes;
570 va_list ap;
571 va_start (ap, format);
572 nbytes = evxprintf (buf, bufsize, nonheapbuf, bufsize_max, format, ap);
573 va_end (ap);
574 return nbytes;
575 }
576
577 #endif
578
579 /* Act like exprintf, except take a va_list. */
580 ptrdiff_t
evxprintf(char ** buf,ptrdiff_t * bufsize,char * nonheapbuf,ptrdiff_t bufsize_max,char const * format,va_list ap)581 evxprintf (char **buf, ptrdiff_t *bufsize,
582 char *nonheapbuf, ptrdiff_t bufsize_max,
583 char const *format, va_list ap)
584 {
585 for (;;)
586 {
587 ptrdiff_t nbytes;
588 va_list ap_copy;
589 va_copy (ap_copy, ap);
590 nbytes = doprnt (*buf, *bufsize, format, 0, ap_copy);
591 va_end (ap_copy);
592 if (nbytes < *bufsize - 1)
593 return nbytes;
594 if (*buf != nonheapbuf)
595 {
596 xfree (*buf);
597 *buf = NULL;
598 }
599 *buf = xpalloc (NULL, bufsize, 1, bufsize_max, 1);
600 }
601 }
602