1 /* Output like sprintf to a buffer of specified size.    -*- coding: utf-8 -*-
2    Also takes args differently: pass one pointer to the end
3    of the format string in addition to the format string itself.
4    Copyright (C) 1985, 2001-2021 Free Software Foundation, Inc.
5 
6 This file is part of GNU Emacs.
7 
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12 
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
20 
21 /* If you think about replacing this with some similar standard C function of
22    the printf family (such as vsnprintf), please note that this function
23    supports the following Emacs-specific features:
24 
25    . For %c conversions, it produces a string with the multibyte representation
26      of the (`int') argument, suitable for display in an Emacs buffer.
27 
28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
29      the display width of each character, according to char-width-table.  That
30      is, it does not assume that each character takes one column on display.
31 
32    . If the size of the buffer is not enough to produce the formatted string in
33      its entirety, it makes sure that truncation does not chop the last
34      character in the middle of its multibyte sequence, producing an invalid
35      sequence.
36 
37    . It accepts a pointer to the end of the format string, so the format string
38      could include embedded NUL characters.
39 
40    . It signals an error if the length of the formatted string is about to
41      overflow ptrdiff_t or size_t, to avoid producing strings longer than what
42      Emacs can handle.
43 
44    OTOH, this function supports only a small subset of the standard C formatted
45    output facilities.  E.g., %u and %ll are not supported, and precision is
46    ignored %s and %c conversions.  (See below for the detailed documentation of
47    what is supported.)  However, this is okay, as this function is supposed to
48    be called from `error' and similar functions, and thus does not need to
49    support features beyond those in `Fformat_message', which is used
50    by `error' on the Lisp level.  */
51 
52 /* In the FORMAT argument this function supports ` and ' as directives
53    that output left and right quotes as per ‘text-quoting style’.  It
54    also supports the following %-sequences:
55 
56    %s means print a string argument.
57    %S is treated as %s, for loose compatibility with `Fformat_message'.
58    %d means print a `signed int' argument in decimal.
59    %o means print an `unsigned int' argument in octal.
60    %x means print an `unsigned int' argument in hex.
61    %e means print a `double' argument in exponential notation.
62    %f means print a `double' argument in decimal-point notation.
63    %g means print a `double' argument in exponential notation
64       or in decimal-point notation, whichever uses fewer characters.
65    %c means print a `signed int' argument as a single character.
66    %% means produce a literal % character.
67 
68    A %-sequence may contain optional flag, width, and precision specifiers, and
69    a length modifier, as follows:
70 
71      %<flags><width><precision><length>character
72 
73    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
74    is empty or l or the value of the pD or pI or PRIdMAX (sans "d") macros.
75    Also, %% in a format stands for a single % in the output.  A % that
76    does not introduce a valid %-sequence causes undefined behavior.
77 
78    The + flag character inserts a + before any positive number, while a space
79    inserts a space before any positive number; these flags only affect %d, %o,
80    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
81    as described below.  For signed numerical arguments only, the ` ' (space)
82    flag causes the result to be prefixed with a space character if it does not
83    start with a sign (+ or -).
84 
85    The l (lower-case letter ell) length modifier is a `long' data type
86    modifier: it is supported for %d, %o, and %x conversions of integral
87    arguments, must immediately precede the conversion specifier, and means that
88    the respective argument is to be treated as `long int' or `unsigned long
89    int'.  Similarly, the value of the pD macro means to use ptrdiff_t,
90    the value of the pI macro means to use EMACS_INT or EMACS_UINT, the
91    value of the PRIdMAX etc. macros means to use intmax_t or uintmax_t,
92    and the empty length modifier means `int' or `unsigned int'.
93 
94    The width specifier supplies a lower limit for the length of the printed
95    representation.  The padding, if any, normally goes on the left, but it goes
96    on the right if the - flag is present.  The padding character is normally a
97    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
98    The - flag takes precedence over the 0 flag.
99 
100    For %e, %f, and %g sequences, the number after the "." in the precision
101    specifier says how many decimal places to show; if zero, the decimal point
102    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
103 
104 #include <config.h>
105 #include <stdio.h>
106 #include <stdlib.h>
107 #include <float.h>
108 #include <unistd.h>
109 #include <limits.h>
110 
111 #include "lisp.h"
112 
113 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
114    don't have to include others because CHAR_HEAD_P does not contains
115    another macro.  */
116 #include "character.h"
117 
118 /* Generate output from a format-spec FORMAT,
119    terminated at position FORMAT_END.
120    (*FORMAT_END is not part of the format, but must exist and be readable.)
121    Output goes in BUFFER, which has room for BUFSIZE chars.
122    BUFSIZE must be positive.  If the output does not fit, truncate it
123    to fit and return BUFSIZE - 1; if this truncates a multibyte
124    sequence, store '\0' into the sequence's first byte.
125    Returns the number of bytes stored into BUFFER, excluding
126    the terminating NUL byte.  Output is always NUL-terminated.
127    String arguments are passed as C strings.
128    Integers are passed as C integers.  */
129 
130 ptrdiff_t
doprnt(char * buffer,ptrdiff_t bufsize,const char * format,const char * format_end,va_list ap)131 doprnt (char *buffer, ptrdiff_t bufsize, const char *format,
132 	const char *format_end, va_list ap)
133 {
134   const char *fmt = format;	/* Pointer into format string.  */
135   char *bufptr = buffer;	/* Pointer into output buffer.  */
136 
137   /* Enough to handle floating point formats with large numbers.  */
138   enum { SIZE_BOUND_EXTRA = DBL_MAX_10_EXP + 50 };
139 
140   /* Use this for sprintf unless we need something really big.  */
141   char tembuf[SIZE_BOUND_EXTRA + 50];
142 
143   /* Size of sprintf_buffer.  */
144   ptrdiff_t size_allocated = sizeof (tembuf);
145 
146   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
147   char *sprintf_buffer = tembuf;
148 
149   /* Buffer we have got with malloc.  */
150   char *big_buffer = NULL;
151 
152   enum text_quoting_style quoting_style = text_quoting_style ();
153   ptrdiff_t tem = -1;
154   char *string;
155   char fixed_buffer[20];	/* Default buffer for small formatting. */
156   char *fmtcpy;
157   int minlen;
158   char charbuf[MAX_MULTIBYTE_LENGTH + 1];	/* Used for %c.  */
159   USE_SAFE_ALLOCA;
160 
161   if (format_end == 0)
162     format_end = format + strlen (format);
163 
164   fmtcpy = (format_end - format < sizeof (fixed_buffer) - 1
165 	    ? fixed_buffer
166 	    : SAFE_ALLOCA (format_end - format + 1));
167 
168   bufsize--;
169 
170   /* Loop until end of format string or buffer full. */
171   while (fmt < format_end && bufsize > 0)
172     {
173       char const *fmt0 = fmt;
174       char fmtchar = *fmt++;
175       if (fmtchar == '%')
176 	{
177 	  ptrdiff_t size_bound = 0;
178 	  ptrdiff_t width;  /* Columns occupied by STRING on display.  */
179 	  enum {
180 	    pDlen = sizeof pD - 1,
181 	    pIlen = sizeof pI - 1,
182 	    pMlen = sizeof PRIdMAX - 2
183 	  };
184 	  enum {
185 	    no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier
186 	  } length_modifier = no_modifier;
187 	  static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen };
188 	  int maxmlen = max (max (1, pDlen), max (pIlen, pMlen));
189 	  int mlen;
190 
191 	  /* Copy this one %-spec into fmtcpy.  */
192 	  string = fmtcpy;
193 	  *string++ = '%';
194 	  while (fmt < format_end)
195 	    {
196 	      *string++ = *fmt;
197 	      if ('0' <= *fmt && *fmt <= '9')
198 		{
199 		  /* Get an idea of how much space we might need.
200 		     This might be a field width or a precision; e.g.
201 		     %1.1000f and %1000.1f both might need 1000+ bytes.
202 		     Parse the width or precision, checking for overflow.  */
203 		  int n = *fmt - '0';
204 		  bool overflow = false;
205 		  while (fmt + 1 < format_end
206 			 && '0' <= fmt[1] && fmt[1] <= '9')
207 		    {
208 		      overflow |= INT_MULTIPLY_WRAPV (n, 10, &n);
209 		      overflow |= INT_ADD_WRAPV (n, fmt[1] - '0', &n);
210 		      *string++ = *++fmt;
211 		    }
212 
213 		  if (overflow
214 		      || min (PTRDIFF_MAX, SIZE_MAX) - SIZE_BOUND_EXTRA < n)
215 		    error ("Format width or precision too large");
216 		  if (size_bound < n)
217 		    size_bound = n;
218 		}
219 	      else if (! (*fmt == '-' || *fmt == ' ' || *fmt == '.'
220 			  || *fmt == '+'))
221 		break;
222 	      fmt++;
223 	    }
224 
225 	  /* Check for the length modifiers in textual length order, so
226 	     that longer modifiers override shorter ones.  */
227 	  for (mlen = 1; mlen <= maxmlen; mlen++)
228 	    {
229 	      if (format_end - fmt < mlen)
230 		break;
231 	      if (mlen == 1 && *fmt == 'l')
232 		length_modifier = long_modifier;
233 	      if (mlen == pDlen && memcmp (fmt, pD, pDlen) == 0)
234 		length_modifier = pD_modifier;
235 	      if (mlen == pIlen && memcmp (fmt, pI, pIlen) == 0)
236 		length_modifier = pI_modifier;
237 	      if (mlen == pMlen && memcmp (fmt, PRIdMAX, pMlen) == 0)
238 		length_modifier = pM_modifier;
239 	    }
240 
241 	  mlen = modifier_len[length_modifier];
242 	  memcpy (string, fmt + 1, mlen);
243 	  string += mlen;
244 	  fmt += mlen;
245 	  *string = 0;
246 
247 	  /* Make the size bound large enough to handle floating point formats
248 	     with large numbers.  */
249 	  size_bound += SIZE_BOUND_EXTRA;
250 
251 	  /* Make sure we have that much.  */
252 	  if (size_bound > size_allocated)
253 	    {
254 	      if (big_buffer)
255 		xfree (big_buffer);
256 	      big_buffer = xmalloc (size_bound);
257 	      sprintf_buffer = big_buffer;
258 	      size_allocated = size_bound;
259 	    }
260 	  minlen = 0;
261 	  switch (*fmt++)
262 	    {
263 	    default:
264 	      error ("Invalid format operation %s", fmtcpy);
265 
266 /*	    case 'b': */
267 	    case 'l':
268 	    case 'd':
269 	      switch (length_modifier)
270 		{
271 		case no_modifier:
272 		  {
273 		    int v = va_arg (ap, int);
274 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
275 		  }
276 		  break;
277 		case long_modifier:
278 		  {
279 		    long v = va_arg (ap, long);
280 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
281 		  }
282 		  break;
283 		case pD_modifier:
284 		signed_pD_modifier:
285 		  {
286 		    ptrdiff_t v = va_arg (ap, ptrdiff_t);
287 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
288 		  }
289 		  break;
290 		case pI_modifier:
291 		  {
292 		    EMACS_INT v = va_arg (ap, EMACS_INT);
293 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
294 		  }
295 		  break;
296 		case pM_modifier:
297 		  {
298 		    intmax_t v = va_arg (ap, intmax_t);
299 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
300 		  }
301 		  break;
302 		}
303 	      /* Now copy into final output, truncating as necessary.  */
304 	      string = sprintf_buffer;
305 	      goto doit;
306 
307 	    case 'o':
308 	    case 'x':
309 	      switch (length_modifier)
310 		{
311 		case no_modifier:
312 		  {
313 		    unsigned v = va_arg (ap, unsigned);
314 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
315 		  }
316 		  break;
317 		case long_modifier:
318 		  {
319 		    unsigned long v = va_arg (ap, unsigned long);
320 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
321 		  }
322 		  break;
323 		case pD_modifier:
324 		  goto signed_pD_modifier;
325 		case pI_modifier:
326 		  {
327 		    EMACS_UINT v = va_arg (ap, EMACS_UINT);
328 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
329 		  }
330 		  break;
331 		case pM_modifier:
332 		  {
333 		    uintmax_t v = va_arg (ap, uintmax_t);
334 		    tem = sprintf (sprintf_buffer, fmtcpy, v);
335 		  }
336 		  break;
337 		}
338 	      /* Now copy into final output, truncating as necessary.  */
339 	      string = sprintf_buffer;
340 	      goto doit;
341 
342 	    case 'f':
343 	    case 'e':
344 	    case 'g':
345 	      {
346 		double d = va_arg (ap, double);
347 		tem = sprintf (sprintf_buffer, fmtcpy, d);
348 		/* Now copy into final output, truncating as necessary.  */
349 		string = sprintf_buffer;
350 		goto doit;
351 	      }
352 
353 	    case 'S':
354 	      string[-1] = 's';
355 	      FALLTHROUGH;
356 	    case 's':
357 	      if (fmtcpy[1] != 's')
358 		minlen = atoi (&fmtcpy[1]);
359 	      string = va_arg (ap, char *);
360 	      tem = strnlen (string, STRING_BYTES_BOUND + 1);
361 	      if (tem == STRING_BYTES_BOUND + 1)
362 		error ("String for %%s or %%S format is too long");
363 	      width = strwidth (string, tem);
364 	      goto doit1;
365 
366 	      /* Copy string into final output, truncating if no room.  */
367 	    doit:
368 	      eassert (0 <= tem);
369 	      /* Coming here means STRING contains ASCII only.  */
370 	      if (STRING_BYTES_BOUND < tem)
371 		error ("Format width or precision too large");
372 	      width = tem;
373 	    doit1:
374 	      /* We have already calculated:
375 		 TEM -- length of STRING,
376 		 WIDTH -- columns occupied by STRING when displayed, and
377 		 MINLEN -- minimum columns of the output.  */
378 	      if (minlen > 0)
379 		{
380 		  while (minlen > width && bufsize > 0)
381 		    {
382 		      *bufptr++ = ' ';
383 		      bufsize--;
384 		      minlen--;
385 		    }
386 		  minlen = 0;
387 		}
388 	      if (tem > bufsize)
389 		{
390 		  /* Truncate the string at character boundary.  */
391 		  tem = bufsize;
392 		  do
393 		    {
394 		      tem--;
395 		      if (CHAR_HEAD_P (string[tem]))
396 			{
397 			  if (BYTES_BY_CHAR_HEAD (string[tem]) <= bufsize - tem)
398 			    tem = bufsize;
399 			  break;
400 			}
401 		    }
402 		  while (tem != 0);
403 
404 		  memcpy (bufptr, string, tem);
405 		  bufptr[tem] = 0;
406 		  /* Trigger exit from the loop, but make sure we
407 		     return to the caller a value which will indicate
408 		     that the buffer was too small.  */
409 		  bufptr += bufsize;
410 		  bufsize = 0;
411 		  continue;
412 		}
413 	      memcpy (bufptr, string, tem);
414 	      bufptr += tem;
415 	      bufsize -= tem;
416 	      if (minlen < 0)
417 		{
418 		  while (minlen < - width && bufsize > 0)
419 		    {
420 		      *bufptr++ = ' ';
421 		      bufsize--;
422 		      minlen++;
423 		    }
424 		  minlen = 0;
425 		}
426 	      continue;
427 
428 	    case 'c':
429 	      {
430 		int chr = va_arg (ap, int);
431 		tem = CHAR_STRING (chr, (unsigned char *) charbuf);
432 		string = charbuf;
433 		string[tem] = 0;
434 		width = strwidth (string, tem);
435 		if (fmtcpy[1] != 'c')
436 		  minlen = atoi (&fmtcpy[1]);
437 		goto doit1;
438 	      }
439 
440 	    case '%':
441 	      /* Treat this '%' as normal.  */
442 	      fmt0 = fmt - 1;
443 	      break;
444 	    }
445 	}
446 
447       char const *src;
448       ptrdiff_t srclen;
449       if (quoting_style == CURVE_QUOTING_STYLE && fmtchar == '`')
450 	src = uLSQM, srclen = sizeof uLSQM - 1;
451       else if (quoting_style == CURVE_QUOTING_STYLE && fmtchar == '\'')
452 	src = uRSQM, srclen = sizeof uRSQM - 1;
453       else if (quoting_style == STRAIGHT_QUOTING_STYLE && fmtchar == '`')
454 	src = "'", srclen = 1;
455       else
456 	{
457 	  while (fmt < format_end && !CHAR_HEAD_P (*fmt))
458 	    fmt++;
459 	  src = fmt0, srclen = fmt - fmt0;
460 	}
461 
462       if (bufsize < srclen)
463 	{
464 	  /* Truncate, but return value that will signal to caller
465 	     that the buffer was too small.  */
466 	  do
467 	    *bufptr++ = '\0';
468 	  while (--bufsize != 0);
469 	}
470       else
471 	{
472 	  do
473 	    *bufptr++ = *src++;
474 	  while (--srclen != 0);
475 	}
476     }
477 
478   /* If we had to malloc something, free it.  */
479   xfree (big_buffer);
480 
481   *bufptr = 0;		/* Make sure our string ends with a '\0' */
482 
483   SAFE_FREE ();
484   return bufptr - buffer;
485 }
486 
487 /* Format to an unbounded buffer BUF.  This is like sprintf, except it
488    is not limited to returning an 'int' so it doesn't have a silly 2
489    GiB limit on typical 64-bit hosts.  However, it is limited to the
490    Emacs-style formats that doprnt supports, and it requotes ` and '
491    as per ‘text-quoting-style’.
492 
493    Return the number of bytes put into BUF, excluding the terminating
494    '\0'.  */
495 ptrdiff_t
esprintf(char * buf,char const * format,...)496 esprintf (char *buf, char const *format, ...)
497 {
498   ptrdiff_t nbytes;
499   va_list ap;
500   va_start (ap, format);
501   nbytes = doprnt (buf, TYPE_MAXIMUM (ptrdiff_t), format, 0, ap);
502   va_end (ap);
503   return nbytes;
504 }
505 
506 #if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT
507 
508 /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF
509    and updating *BUFSIZE if the buffer is too small, and otherwise
510    behaving line esprintf.  When reallocating, free *BUF unless it is
511    equal to NONHEAPBUF, and if BUFSIZE_MAX is nonnegative then signal
512    memory exhaustion instead of growing the buffer size past
513    BUFSIZE_MAX.  */
514 ptrdiff_t
exprintf(char ** buf,ptrdiff_t * bufsize,char const * nonheapbuf,ptrdiff_t bufsize_max,char const * format,...)515 exprintf (char **buf, ptrdiff_t *bufsize,
516 	  char const *nonheapbuf, ptrdiff_t bufsize_max,
517 	  char const *format, ...)
518 {
519   ptrdiff_t nbytes;
520   va_list ap;
521   va_start (ap, format);
522   nbytes = evxprintf (buf, bufsize, nonheapbuf, bufsize_max, format, ap);
523   va_end (ap);
524   return nbytes;
525 }
526 
527 #endif
528 
529 /* Act like exprintf, except take a va_list.  */
530 ptrdiff_t
evxprintf(char ** buf,ptrdiff_t * bufsize,char const * nonheapbuf,ptrdiff_t bufsize_max,char const * format,va_list ap)531 evxprintf (char **buf, ptrdiff_t *bufsize,
532 	   char const *nonheapbuf, ptrdiff_t bufsize_max,
533 	   char const *format, va_list ap)
534 {
535   for (;;)
536     {
537       ptrdiff_t nbytes;
538       va_list ap_copy;
539       va_copy (ap_copy, ap);
540       nbytes = doprnt (*buf, *bufsize, format, 0, ap_copy);
541       va_end (ap_copy);
542       if (nbytes < *bufsize - 1)
543 	return nbytes;
544       if (*buf != nonheapbuf)
545 	{
546 	  xfree (*buf);
547 	  *buf = NULL;
548 	}
549       *buf = xpalloc (NULL, bufsize, 1, bufsize_max, 1);
550     }
551 }
552