1 /* Output like sprintf to a buffer of specified size.    -*- coding: utf-8 -*-
2    Also takes args differently: pass one pointer to the end
3    of the format string in addition to the format string itself.
4    Copyright (C) 1985, 2001-2021 Free Software Foundation, Inc.
5 
6 This file is part of GNU Emacs.
7 
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12 
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
20 
21 /* If you think about replacing this with some similar standard C function of
22    the printf family (such as vsnprintf), please note that this function
23    supports the following Emacs-specific features:
24 
25    . For %c conversions, it produces a string with the multibyte representation
26      of the (`int') argument, suitable for display in an Emacs buffer.
27 
28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
29      the display width of each character, according to char-width-table.  That
30      is, it does not assume that each character takes one column on display.
31      Nor does it assume that each character is a single byte.
32 
33    . If the size of the buffer is not enough to produce the formatted string in
34      its entirety, it makes sure that truncation does not chop the last
35      character in the middle of its multibyte sequence, producing an invalid
36      sequence.
37 
38    . It accepts a pointer to the end of the format string, so the format string
39      could include embedded null characters.
40 
41    . It signals an error if the length of the formatted string is about to
42      overflow ptrdiff_t or size_t, to avoid producing strings longer than what
43      Emacs can handle.
44 
45    OTOH, this function supports only a small subset of the standard C formatted
46    output facilities.  E.g., %u is not supported, precision is ignored
47    in %s and %c conversions, and %lld does not necessarily work and
48    code should use something like %"pM"d with intmax_t instead.
49    (See below for the detailed documentation of what is supported.)
50    However, this is okay, as this function is supposed to be called
51    from 'error' and similar C functions, and thus does not need to
52    support all the features of 'Fformat_message', which is used by the
53    Lisp 'error' function.  */
54 
55 /* In the FORMAT argument this function supports ` and ' as directives
56    that output left and right quotes as per ‘text-quoting style’.  It
57    also supports the following %-sequences:
58 
59    %s means print a string argument.
60    %S is treated as %s, for loose compatibility with `Fformat_message'.
61    %d means print a `signed int' argument in decimal.
62    %o means print an `unsigned int' argument in octal.
63    %x means print an `unsigned int' argument in hex.
64    %e means print a `double' argument in exponential notation.
65    %f means print a `double' argument in decimal-point notation.
66    %g means print a `double' argument in exponential notation
67       or in decimal-point notation, depending on the value;
68       this is often (though not always) the shorter of the two notations.
69    %c means print a `signed int' argument as a single character.
70    %% means produce a literal % character.
71 
72    A %-sequence other than %% may contain optional flags, width, precision,
73    and length, as follows:
74 
75      %<flags><width><precision><length>character
76 
77    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
78    is empty or l or the value of the pD or pI or PRIdMAX (sans "d") macros.
79    A % that does not introduce a valid %-sequence causes undefined behavior.
80    Bytes in FORMAT other than % are copied through as-is.
81 
82    The + flag character inserts a + before any positive number, while a space
83    inserts a space before any positive number; these flags only affect %d, %o,
84    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
85    as described below.  For signed numerical arguments only, the ` ' (space)
86    flag causes the result to be prefixed with a space character if it does not
87    start with a sign (+ or -).
88 
89    The l (lower-case letter ell) length modifier is a `long' data type
90    modifier: it is supported for %d, %o, and %x conversions of integral
91    arguments, must immediately precede the conversion specifier, and means that
92    the respective argument is to be treated as `long int' or `unsigned long
93    int'.  Similarly, the value of the pD macro means to use ptrdiff_t,
94    the value of the pI macro means to use EMACS_INT or EMACS_UINT, the
95    value of the PRIdMAX etc. macros means to use intmax_t or uintmax_t,
96    and the empty length modifier means `int' or `unsigned int'.
97 
98    The width specifier supplies a lower limit for the length of the printed
99    representation.  The padding, if any, normally goes on the left, but it goes
100    on the right if the - flag is present.  The padding character is normally a
101    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
102    The - flag takes precedence over the 0 flag.
103 
104    For %e, %f, and %g sequences, the number after the "." in the precision
105    specifier says how many decimal places to show; if zero, the decimal point
106    itself is omitted.  For %d, %o, and %x sequences, the precision specifies
107    the minimum number of digits to appear.  Precision specifiers are
108    not supported for other %-sequences.  */
109 
110 #include <config.h>
111 #include <stdio.h>
112 #include <stdlib.h>
113 #include <float.h>
114 #include <unistd.h>
115 #include <limits.h>
116 
117 #include "lisp.h"
118 
119 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
120    don't have to include others because CHAR_HEAD_P does not contains
121    another macro.  */
122 #include "character.h"
123 
124 /* Enough to handle floating point formats with large numbers.  */
125 enum { SIZE_BOUND_EXTRA = DBL_MAX_10_EXP + 50 };
126 
127 /* Parse FMT as an unsigned decimal integer, putting its value into *VALUE.
128    Return the address of the first byte after the integer.
129    If FMT is not an integer, return FMT and store zero into *VALUE.  */
130 static char const *
parse_format_integer(char const * fmt,int * value)131 parse_format_integer (char const *fmt, int *value)
132 {
133   int n = 0;
134   bool overflow = false;
135   for (; '0' <= *fmt && *fmt <= '9'; fmt++)
136     {
137       overflow |= INT_MULTIPLY_WRAPV (n, 10, &n);
138       overflow |= INT_ADD_WRAPV (n, *fmt - '0', &n);
139     }
140   if (overflow || min (PTRDIFF_MAX, SIZE_MAX) - SIZE_BOUND_EXTRA < n)
141     error ("Format width or precision too large");
142   *value = n;
143   return fmt;
144 }
145 
146 /* Like doprnt, except FORMAT_END must be non-null.
147    Although this function is never exercised in current Emacs,
148    it is retained in case some future Emacs version
149    contains doprnt callers that need such formats.
150    Having a separate function helps GCC optimize doprnt better.  */
151 static ptrdiff_t
doprnt_non_null_end(char * buffer,ptrdiff_t bufsize,char const * format,char const * format_end,va_list ap)152 doprnt_non_null_end (char *buffer, ptrdiff_t bufsize, char const *format,
153 		     char const *format_end, va_list ap)
154 {
155   USE_SAFE_ALLOCA;
156   ptrdiff_t fmtlen = format_end - format;
157   char *fmt = SAFE_ALLOCA (fmtlen + 1);
158   memcpy (fmt, format, fmtlen);
159   fmt[fmtlen] = 0;
160   ptrdiff_t nbytes = doprnt (buffer, bufsize, fmt, NULL, ap);
161   SAFE_FREE ();
162   return nbytes;
163 }
164 
165 /* Generate output from a format-spec FORMAT,
166    terminated at either the first NUL or (if FORMAT_END is non-null
167    and there are no NUL bytes between FORMAT and FORMAT_END)
168    terminated at position FORMAT_END.
169    (*FORMAT_END is not part of the format, but must exist and be readable.)
170    Output goes in BUFFER, which has room for BUFSIZE chars.
171    BUFSIZE must be positive.  If the output does not fit, truncate it
172    to fit and return BUFSIZE - 1; if this truncates a multibyte
173    sequence, store '\0' into the sequence's first byte.
174    Returns the number of bytes stored into BUFFER, excluding
175    the terminating null byte.  Output is always null-terminated.
176    String arguments are passed as C strings.
177    Integers are passed as C integers.
178 
179    FIXME: If FORMAT_END is not at a character boundary
180    doprnt_non_null_end will cut the string in the middle of the
181    character and the returned string will have an incomplete character
182    sequence at the end.  We may prefer to cut at a character
183    boundary.  */
184 
185 ptrdiff_t
doprnt(char * buffer,ptrdiff_t bufsize,const char * format,const char * format_end,va_list ap)186 doprnt (char *buffer, ptrdiff_t bufsize, const char *format,
187 	const char *format_end, va_list ap)
188 {
189   if (format_end)
190     return doprnt_non_null_end (buffer, bufsize, format, format_end, ap);
191 
192   const char *fmt = format;	/* Pointer into format string.  */
193   char *bufptr = buffer;	/* Pointer into output buffer.  */
194 
195   /* Use this for sprintf unless we need something really big.  */
196   char tembuf[SIZE_BOUND_EXTRA + 50];
197 
198   /* Size of sprintf_buffer.  */
199   ptrdiff_t size_allocated = sizeof (tembuf);
200 
201   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
202   char *sprintf_buffer = tembuf;
203 
204   /* Buffer we have got with malloc.  */
205   char *big_buffer = NULL;
206 
207   Lisp_Object quoting_style = Ftext_quoting_style ();
208 
209   bufsize--;
210 
211   /* Loop until end of format string or buffer full. */
212   while (*fmt && bufsize > 0)
213     {
214       char const *fmt0 = fmt;
215       char fmtchar = *fmt++;
216       if (fmtchar == '%')
217 	{
218 	  ptrdiff_t width;  /* Columns occupied by STRING on display.  */
219 	  enum {
220 	    pDlen = sizeof pD - 1,
221 	    pIlen = sizeof pI - 1,
222 	    pMlen = sizeof PRIdMAX - 2,
223 	    maxmlen = max (max (1, pDlen), max (pIlen, pMlen))
224 	  };
225 	  enum {
226 	    no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier
227 	  } length_modifier = no_modifier;
228 	  static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen };
229 	  int mlen;
230 	  char charbuf[MAX_MULTIBYTE_LENGTH + 1];	/* Used for %c.  */
231 
232 	  /* Width and precision specified by this %-sequence.  */
233 	  int wid = 0, prec = -1;
234 
235 	  /* FMTSTAR will be a "%*.*X"-like version of this %-sequence.
236 	     Start by putting '%' into FMTSTAR.  */
237 	  char fmtstar[sizeof "%-+ 0*.*d" + maxmlen];
238 	  char *string = fmtstar;
239 	  *string++ = '%';
240 
241 	  /* Copy at most one instance of each flag into FMTSTAR.  */
242 	  bool minusflag = false, plusflag = false, zeroflag = false,
243 	    spaceflag = false;
244 	  for (;; fmt++)
245 	    {
246 	      *string = *fmt;
247 	      switch (*fmt)
248 		{
249 		case '-': string += !minusflag; minusflag = true; continue;
250 		case '+': string += !plusflag; plusflag = true; continue;
251 		case ' ': string += !spaceflag; spaceflag = true; continue;
252 		case '0': string += !zeroflag; zeroflag = true; continue;
253 		}
254 	      break;
255 	    }
256 
257 	  /* Parse width and precision, putting "*.*" into FMTSTAR.  */
258 	  if ('1' <= *fmt && *fmt <= '9')
259 	    fmt = parse_format_integer (fmt, &wid);
260 	  if (*fmt == '.')
261 	    fmt = parse_format_integer (fmt + 1, &prec);
262 	  *string++ = '*';
263 	  *string++ = '.';
264 	  *string++ = '*';
265 
266 	  /* Check for the length modifiers in textual length order, so
267 	     that longer modifiers override shorter ones.  */
268 	  for (mlen = 1; mlen <= maxmlen; mlen++)
269 	    {
270 	      if (mlen == 1 && *fmt == 'l')
271 		length_modifier = long_modifier;
272 	      if (mlen == pDlen && strncmp (fmt, pD, pDlen) == 0)
273 		length_modifier = pD_modifier;
274 	      if (mlen == pIlen && strncmp (fmt, pI, pIlen) == 0)
275 		length_modifier = pI_modifier;
276 	      if (mlen == pMlen && strncmp (fmt, PRIdMAX, pMlen) == 0)
277 		length_modifier = pM_modifier;
278 	    }
279 
280 	  /* Copy optional length modifier and conversion specifier
281 	     character into FMTSTAR, and append a NUL.  */
282 	  mlen = modifier_len[length_modifier];
283 	  string = mempcpy (string, fmt, mlen + 1);
284 	  fmt += mlen;
285 	  *string = 0;
286 
287 	  /* An idea of how much space we might need.
288 	     This might be a field width or a precision; e.g.
289 	     %1.1000f and %1000.1f both might need 1000+ bytes.
290 	     Make it large enough to handle floating point formats
291 	     with large numbers.  */
292 	  ptrdiff_t size_bound = max (wid, prec) + SIZE_BOUND_EXTRA;
293 
294 	  /* Make sure we have that much.  */
295 	  if (size_bound > size_allocated)
296 	    {
297 	      if (big_buffer)
298 		xfree (big_buffer);
299 	      big_buffer = xmalloc (size_bound);
300 	      sprintf_buffer = big_buffer;
301 	      size_allocated = size_bound;
302 	    }
303 	  int minlen = 0;
304 	  ptrdiff_t tem;
305 	  switch (*fmt++)
306 	    {
307 	    default:
308 	      error ("Invalid format operation %s", fmt0);
309 
310 	    case 'd':
311 	      switch (length_modifier)
312 		{
313 		case no_modifier:
314 		  {
315 		    int v = va_arg (ap, int);
316 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
317 		  }
318 		  break;
319 		case long_modifier:
320 		  {
321 		    long v = va_arg (ap, long);
322 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
323 		  }
324 		  break;
325 		case pD_modifier:
326 		signed_pD_modifier:
327 		  {
328 		    ptrdiff_t v = va_arg (ap, ptrdiff_t);
329 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
330 		  }
331 		  break;
332 		case pI_modifier:
333 		  {
334 		    EMACS_INT v = va_arg (ap, EMACS_INT);
335 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
336 		  }
337 		  break;
338 		case pM_modifier:
339 		  {
340 		    intmax_t v = va_arg (ap, intmax_t);
341 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
342 		  }
343 		  break;
344 		default:
345 		  eassume (false);
346 		}
347 	      /* Now copy into final output, truncating as necessary.  */
348 	      string = sprintf_buffer;
349 	      goto doit;
350 
351 	    case 'o':
352 	    case 'x':
353 	      switch (length_modifier)
354 		{
355 		case no_modifier:
356 		  {
357 		    unsigned v = va_arg (ap, unsigned);
358 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
359 		  }
360 		  break;
361 		case long_modifier:
362 		  {
363 		    unsigned long v = va_arg (ap, unsigned long);
364 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
365 		  }
366 		  break;
367 		case pD_modifier:
368 		  goto signed_pD_modifier;
369 		case pI_modifier:
370 		  {
371 		    EMACS_UINT v = va_arg (ap, EMACS_UINT);
372 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
373 		  }
374 		  break;
375 		case pM_modifier:
376 		  {
377 		    uintmax_t v = va_arg (ap, uintmax_t);
378 		    tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
379 		  }
380 		  break;
381 		default:
382 		  eassume (false);
383 		}
384 	      /* Now copy into final output, truncating as necessary.  */
385 	      string = sprintf_buffer;
386 	      goto doit;
387 
388 	    case 'f':
389 	    case 'e':
390 	    case 'g':
391 	      {
392 		double d = va_arg (ap, double);
393 		tem = sprintf (sprintf_buffer, fmtstar, wid, prec, d);
394 		/* Now copy into final output, truncating as necessary.  */
395 		string = sprintf_buffer;
396 		goto doit;
397 	      }
398 
399 	    case 'S':
400 	    case 's':
401 	      minlen = minusflag ? -wid : wid;
402 	      string = va_arg (ap, char *);
403 	      tem = strnlen (string, STRING_BYTES_BOUND + 1);
404 	      if (tem == STRING_BYTES_BOUND + 1)
405 		error ("String for %%s or %%S format is too long");
406 	      width = strwidth (string, tem);
407 	      goto doit1;
408 
409 	      /* Copy string into final output, truncating if no room.  */
410 	    doit:
411 	      eassert (0 <= tem);
412 	      /* Coming here means STRING contains ASCII only.  */
413 	      if (STRING_BYTES_BOUND < tem)
414 		error ("Format width or precision too large");
415 	      width = tem;
416 	    doit1:
417 	      /* We have already calculated:
418 		 TEM -- length of STRING,
419 		 WIDTH -- columns occupied by STRING when displayed, and
420 		 MINLEN -- minimum columns of the output.  */
421 	      if (minlen > 0)
422 		{
423 		  while (minlen > width && bufsize > 0)
424 		    {
425 		      *bufptr++ = ' ';
426 		      bufsize--;
427 		      minlen--;
428 		    }
429 		  minlen = 0;
430 		}
431 	      if (tem > bufsize)
432 		{
433 		  /* Truncate the string at character boundary.  */
434 		  tem = bufsize;
435 		  do
436 		    {
437 		      tem--;
438 		      if (CHAR_HEAD_P (string[tem]))
439 			{
440 			  if (BYTES_BY_CHAR_HEAD (string[tem]) <= bufsize - tem)
441 			    tem = bufsize;
442 			  break;
443 			}
444 		    }
445 		  while (tem != 0);
446 
447 		  memcpy (bufptr, string, tem);
448 		  bufptr[tem] = 0;
449 		  /* Trigger exit from the loop, but make sure we
450 		     return to the caller a value which will indicate
451 		     that the buffer was too small.  */
452 		  bufptr += bufsize;
453 		  bufsize = 0;
454 		  continue;
455 		}
456 	      memcpy (bufptr, string, tem);
457 	      bufptr += tem;
458 	      bufsize -= tem;
459 	      if (minlen < 0)
460 		{
461 		  while (minlen < - width && bufsize > 0)
462 		    {
463 		      *bufptr++ = ' ';
464 		      bufsize--;
465 		      minlen++;
466 		    }
467 		  minlen = 0;
468 		}
469 	      continue;
470 
471 	    case 'c':
472 	      {
473 		int chr = va_arg (ap, int);
474 		tem = CHAR_STRING (chr, (unsigned char *) charbuf);
475 		string = charbuf;
476 		string[tem] = 0;
477 		width = strwidth (string, tem);
478 		minlen = minusflag ? -wid : wid;
479 		goto doit1;
480 	      }
481 
482 	    case '%':
483 	      /* Treat this '%' as normal.  */
484 	      break;
485 	    }
486 	}
487 
488       char const *src;
489       ptrdiff_t srclen;
490       if (EQ (quoting_style, Qcurve) && fmtchar == '`')
491 	src = uLSQM, srclen = sizeof uLSQM - 1;
492       else if (EQ (quoting_style, Qcurve) && fmtchar == '\'')
493 	src = uRSQM, srclen = sizeof uRSQM - 1;
494       else if (! LEADING_CODE_P (fmtchar))
495 	{
496 	  if (EQ (quoting_style, Qstraight) && fmtchar == '`')
497 	    fmtchar = '\'';
498 
499 	  *bufptr++ = fmtchar;
500 	  continue;
501 	}
502       else
503         {
504           int charlen = BYTES_BY_CHAR_HEAD (fmtchar);
505           src = fmt0;
506 
507           /* If the format string ends in the middle of a multibyte
508              character we don't want to skip over the NUL byte.  */
509           for (srclen = 1 ; *(src + srclen) != 0 && srclen < charlen ; srclen++);
510 
511           fmt = src + srclen;
512         }
513 
514       if (bufsize < srclen)
515 	{
516 	  /* Truncate, but return value that will signal to caller
517 	     that the buffer was too small.  */
518 	  do
519 	    *bufptr++ = '\0';
520 	  while (--bufsize != 0);
521 	}
522       else
523 	{
524 	  do
525 	    *bufptr++ = *src++;
526 	  while (--srclen != 0);
527 	}
528     }
529 
530   /* If we had to malloc something, free it.  */
531   xfree (big_buffer);
532 
533   *bufptr = 0;		/* Make sure our string ends with a '\0' */
534   return bufptr - buffer;
535 }
536 
537 /* Format to an unbounded buffer BUF.  This is like sprintf, except it
538    is not limited to returning an 'int' so it doesn't have a silly 2
539    GiB limit on typical 64-bit hosts.  However, it is limited to the
540    Emacs-style formats that doprnt supports, and it requotes ` and '
541    as per ‘text-quoting-style’.
542 
543    Return the number of bytes put into BUF, excluding the terminating
544    '\0'.  */
545 ptrdiff_t
esprintf(char * buf,char const * format,...)546 esprintf (char *buf, char const *format, ...)
547 {
548   ptrdiff_t nbytes;
549   va_list ap;
550   va_start (ap, format);
551   nbytes = doprnt (buf, TYPE_MAXIMUM (ptrdiff_t), format, 0, ap);
552   va_end (ap);
553   return nbytes;
554 }
555 
556 #if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT
557 
558 /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF
559    and updating *BUFSIZE if the buffer is too small, and otherwise
560    behaving line esprintf.  When reallocating, free *BUF unless it is
561    equal to NONHEAPBUF, and if BUFSIZE_MAX is nonnegative then signal
562    memory exhaustion instead of growing the buffer size past
563    BUFSIZE_MAX.  */
564 ptrdiff_t
exprintf(char ** buf,ptrdiff_t * bufsize,char * nonheapbuf,ptrdiff_t bufsize_max,char const * format,...)565 exprintf (char **buf, ptrdiff_t *bufsize,
566 	  char *nonheapbuf, ptrdiff_t bufsize_max,
567 	  char const *format, ...)
568 {
569   ptrdiff_t nbytes;
570   va_list ap;
571   va_start (ap, format);
572   nbytes = evxprintf (buf, bufsize, nonheapbuf, bufsize_max, format, ap);
573   va_end (ap);
574   return nbytes;
575 }
576 
577 #endif
578 
579 /* Act like exprintf, except take a va_list.  */
580 ptrdiff_t
evxprintf(char ** buf,ptrdiff_t * bufsize,char * nonheapbuf,ptrdiff_t bufsize_max,char const * format,va_list ap)581 evxprintf (char **buf, ptrdiff_t *bufsize,
582 	   char *nonheapbuf, ptrdiff_t bufsize_max,
583 	   char const *format, va_list ap)
584 {
585   for (;;)
586     {
587       ptrdiff_t nbytes;
588       va_list ap_copy;
589       va_copy (ap_copy, ap);
590       nbytes = doprnt (*buf, *bufsize, format, 0, ap_copy);
591       va_end (ap_copy);
592       if (nbytes < *bufsize - 1)
593 	return nbytes;
594       if (*buf != nonheapbuf)
595 	{
596 	  xfree (*buf);
597 	  *buf = NULL;
598 	}
599       *buf = xpalloc (NULL, bufsize, 1, bufsize_max, 1);
600     }
601 }
602