xref: /freebsd/contrib/file/src/vasprintf.c (revision 06c3fb27)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*###########################################################################
29   #                                                                           #
30   #                                vasprintf                                  #
31   #                                                                           #
32   #               Copyright (c) 2002-2005 David TAILLANDIER                   #
33   #                                                                           #
34   ###########################################################################*/
35 
36 /*
37 
38 This software is distributed under the "modified BSD licence".
39 
40 This software is also released with GNU license (GPL) in another file (same
41 source-code, only license differ).
42 
43 
44 
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions are met:
47 
48 Redistributions of source code must retain the above copyright notice, this
49 list of conditions and the following disclaimer. Redistributions in binary
50 form must reproduce the above copyright notice, this list of conditions and
51 the following disclaimer in the documentation and/or other materials
52 provided with the distribution. The name of the author may not be used to
53 endorse or promote products derived from this software without specific
54 prior written permission.
55 
56 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
57 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
58 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
59 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
61 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
62 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
63 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
64 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
65 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66 
67 ====================
68 
69 Hacked from xnprintf version of 26th February 2005 to provide only
70 vasprintf by Reuben Thomas <rrt@sc3d.org>.
71 
72 ====================
73 
74 
75 'printf' function family use the following format string:
76 
77 %[flag][width][.prec][modifier]type
78 
79 %% is the escape sequence to print a '%'
80 %  followed by an unknown format will print the characters without
81 trying to do any interpretation
82 
83 flag:   none   +     -     #     (blank)
84 width:  n    0n    *
85 prec:   none   .0    .n     .*
86 modifier:    F N L h l ll z t    ('F' and 'N' are ms-dos/16-bit specific)
87 type:  d i o u x X f e g E G c s p n
88 
89 
90 The function needs to allocate memory to store the full text before to
91 actually writing it.  i.e if you want to fnprintf() 1000 characters, the
92 functions will allocate 1000 bytes.
93 This behaviour can be modified: you have to customise the code to flush the
94 internal buffer (writing to screen or file) when it reach a given size. Then
95 the buffer can have a shorter length. But what? If you really need to write
96 HUGE string, don't use printf!
97 During the process, some other memory is allocated (1024 bytes minimum)
98 to handle the output of partial sprintf() calls. If you have only 10000 bytes
99 free in memory, you *may* not be able to nprintf() an 8000 bytes-long text.
100 
101 note: if a buffer overflow occurs, exit() is called. This situation should
102 never appear ... but if you want to be *really* sure, you have to modify the
103 code to handle those situations (only one place to modify).
104 A buffer overflow can only occur if your sprintf() do strange things or when
105 you use strange formats.
106 
107 */
108 #include "file.h"
109 
110 #ifndef	lint
111 FILE_RCSID("@(#)$File: vasprintf.c,v 1.23 2022/09/24 20:30:13 christos Exp $")
112 #endif	/* lint */
113 
114 #include <assert.h>
115 #include <string.h>
116 #include <stdlib.h>
117 #include <stdarg.h>
118 #include <ctype.h>
119 #include <limits.h>
120 #include <stddef.h>
121 
122 #define ALLOC_CHUNK 2048
123 #define ALLOC_SECURITY_MARGIN 1024   /* big value because some platforms have very big 'G' exponent */
124 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
125 #    error  !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
126 #endif
127 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
128 
129 /*
130  *  To save a lot of push/pop, every variable are stored into this
131  *  structure, which is passed among nearly every sub-functions.
132  */
133 typedef struct {
134   const char * src_string;        /* current position into input string */
135   char *       buffer_base;       /* output buffer */
136   char *       dest_string;       /* current position into output string */
137   size_t       buffer_len;        /* length of output buffer */
138   size_t       real_len;          /* real current length of output text */
139   size_t       pseudo_len;        /* total length of output text if it were not limited in size */
140   size_t       maxlen;
141   va_list      vargs;             /* pointer to current position into vargs */
142 } xprintf_struct;
143 
144 /*
145  *  Realloc buffer if needed
146  *  Return value:  0 = ok
147  *               EOF = not enough memory
148  */
149 static int realloc_buff(xprintf_struct *s, size_t len)
150 {
151   char * ptr;
152 
153   if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
154     len += s->real_len + ALLOC_CHUNK;
155     ptr = (char *)realloc((void *)(s->buffer_base), len);
156     if (ptr == NULL) {
157       s->buffer_base = NULL;
158       return EOF;
159     }
160 
161     s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
162     s->buffer_base = ptr;
163     s->buffer_len = len;
164 
165     (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
166   }
167 
168   return 0;
169 }
170 
171 /*
172  *  Prints 'usual' characters    up to next '%'
173  *                            or up to end of text
174  */
175 static int usual_char(xprintf_struct * s)
176 {
177   size_t len;
178 
179   len = strcspn(s->src_string, "%");     /* reaches the next '%' or end of input string */
180   /* note: 'len' is never 0 because the presence of '%' */
181   /* or end-of-line is checked in the calling function  */
182 
183   if (realloc_buff(s,len) == EOF)
184     return EOF;
185 
186   memcpy(s->dest_string, s->src_string, len);
187   s->src_string += len;
188   s->dest_string += len;
189   s->real_len += len;
190   s->pseudo_len += len;
191 
192   return 0;
193 }
194 
195 /*
196  *  Return value: 0 = ok
197  *                EOF = error
198  */
199 static int print_it(xprintf_struct *s, size_t approx_len,
200                     const char *format_string, ...)
201 {
202   va_list varg;
203   int vsprintf_len;
204   size_t len;
205 
206   if (realloc_buff(s,approx_len) == EOF)
207     return EOF;
208 
209   va_start(varg, format_string);
210   vsprintf_len = vsprintf(s->dest_string, format_string, varg);
211   va_end(varg);
212 
213   /* Check for overflow */
214   assert((s->buffer_base)[s->buffer_len - 1] == 1);
215 
216   if (vsprintf_len == EOF) /* must be done *after* overflow-check */
217     return EOF;
218 
219   s->pseudo_len += vsprintf_len;
220   len = strlen(s->dest_string);
221   s->real_len += len;
222   s->dest_string += len;
223 
224   return 0;
225 }
226 
227 /*
228  *  Prints a string (%s)
229  *  We need special handling because:
230  *     a: the length of the string is unknown
231  *     b: when .prec is used, we must not access any extra byte of the
232  *        string (of course, if the original sprintf() does... what the
233  *        hell, not my problem)
234  *
235  *  Return value: 0 = ok
236  *                EOF = error
237  */
238 static int type_s(xprintf_struct *s, int width, int prec,
239                   const char *format_string, const char *arg_string)
240 {
241   size_t string_len;
242 
243   if (arg_string == NULL)
244     return print_it(s, (size_t)6, "(null)", 0);
245 
246   /* hand-made strlen() which stops when 'prec' is reached. */
247   /* if 'prec' is -1 then it is never reached. */
248   string_len = 0;
249   while (arg_string[string_len] != 0 && (size_t)prec != string_len)
250     string_len++;
251 
252   if (width != -1 && string_len < (size_t)width)
253     string_len = (size_t)width;
254 
255   return print_it(s, string_len, format_string, arg_string);
256 }
257 
258 /*
259  *  Read a series of digits. Stop when non-digit is found.
260  *  Return value: the value read (between 0 and 32767).
261  *  Note: no checks are made against overflow. If the string contain a big
262  *  number, then the return value won't be what we want (but, in this case,
263  *  the programmer don't know whatr he wants, then no problem).
264  */
265 static int getint(const char **string)
266 {
267   int i = 0;
268 
269   while (isdigit((unsigned char)**string) != 0) {
270     i = i * 10 + (**string - '0');
271     (*string)++;
272   }
273 
274   if (i < 0 || i > 32767)
275     i = 32767; /* if we have i==-10 this is not because the number is */
276   /* negative; this is because the number is big */
277   return i;
278 }
279 
280 /*
281  *  Read a part of the format string. A part is 'usual characters' (ie "blabla")
282  *  or '%%' escape sequence (to print a single '%') or any combination of
283  *  format specifier (ie "%i" or "%10.2d").
284  *  After the current part is managed, the function returns to caller with
285  *  everything ready to manage the following part.
286  *  The caller must ensure than the string is not empty, i.e. the first byte
287  *  is not zero.
288  *
289  *  Return value:  0 = ok
290  *                 EOF = error
291  */
292 static int dispatch(xprintf_struct *s)
293 {
294   const char *initial_ptr;
295   char format_string[24]; /* max length may be something like  "% +-#032768.32768Ld" */
296   char *format_ptr;
297   int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
298   int width, prec, modifier, approx_width;
299   char type;
300   /* most of those variables are here to rewrite the format string */
301 
302 #define SRCTXT  (s->src_string)
303 #define DESTTXT (s->dest_string)
304 
305   /* incoherent format string. Characters after the '%' will be printed with the next call */
306 #define INCOHERENT()         do {SRCTXT=initial_ptr; return 0;} while (0)     /* do/while to avoid */
307 #define INCOHERENT_TEST()    do {if(*SRCTXT==0)   INCOHERENT();} while (0)    /* a null statement  */
308 
309   /* 'normal' text */
310   if (*SRCTXT != '%')
311     return usual_char(s);
312 
313   /* we then have a '%' */
314   SRCTXT++;
315   /* don't check for end-of-string ; this is done later */
316 
317   /* '%%' escape sequence */
318   if (*SRCTXT == '%') {
319     if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
320       return EOF;
321     *DESTTXT = '%';
322     DESTTXT++;
323     SRCTXT++;
324     (s->real_len)++;
325     (s->pseudo_len)++;
326     return 0;
327   }
328 
329   /* '%' managing */
330   initial_ptr = SRCTXT;   /* save current pointer in case of incorrect */
331   /* 'decoding'. Points just after the '%' so the '%' */
332   /* won't be printed in any case, as required. */
333 
334   /* flag */
335   flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
336 
337   for (;; SRCTXT++) {
338     if (*SRCTXT == ' ')
339       flag_space = 1;
340     else if (*SRCTXT == '+')
341       flag_plus = 1;
342     else if (*SRCTXT == '-')
343       flag_minus = 1;
344     else if (*SRCTXT == '#')
345       flag_sharp = 1;
346     else if (*SRCTXT == '0')
347       flag_zero = 1;
348     else
349       break;
350   }
351 
352   INCOHERENT_TEST();    /* here is the first test for end of string */
353 
354   /* width */
355   if (*SRCTXT == '*') {         /* width given by next argument */
356     SRCTXT++;
357     width = va_arg(s->vargs, int);
358     if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
359       width = 0x3fff;
360   } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
361     width = getint(&SRCTXT);
362   else
363     width = -1;                 /* no width specified */
364 
365   INCOHERENT_TEST();
366 
367   /* .prec */
368   if (*SRCTXT == '.') {
369     SRCTXT++;
370     if (*SRCTXT == '*') {       /* .prec given by next argument */
371       SRCTXT++;
372       prec = va_arg(s->vargs, int);
373       if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
374         prec = 0x3fff;
375     } else {                    /* .prec given as ASCII number */
376       if (isdigit((unsigned char)*SRCTXT) == 0)
377         INCOHERENT();
378       prec = getint(&SRCTXT);
379     }
380     INCOHERENT_TEST();
381   } else
382     prec = -1;                  /* no .prec specified */
383 
384   /* modifier */
385   switch (*SRCTXT) {
386   case 'L':
387   case 'h':
388   case 'l':
389   case 'z':
390   case 't':
391     modifier = *SRCTXT;
392     SRCTXT++;
393     if (modifier=='l' && *SRCTXT=='l') {
394       SRCTXT++;
395       modifier = 'L';  /* 'll' == 'L'      long long == long double */
396     } /* only for compatibility ; not portable */
397     INCOHERENT_TEST();
398     break;
399   default:
400     modifier = -1;              /* no modifier specified */
401     break;
402   }
403 
404   /* type */
405   type = *SRCTXT;
406   if (strchr("diouxXfegEGcspn",type) == NULL)
407     INCOHERENT();               /* unknown type */
408   SRCTXT++;
409 
410   /* rewrite format-string */
411   format_string[0] = '%';
412   format_ptr = &(format_string[1]);
413 
414   if (flag_plus) {
415     *format_ptr = '+';
416     format_ptr++;
417   }
418   if (flag_minus) {
419     *format_ptr = '-';
420     format_ptr++;
421   }
422   if (flag_space) {
423     *format_ptr = ' ';
424     format_ptr++;
425   }
426   if (flag_sharp) {
427     *format_ptr = '#';
428     format_ptr++;
429   }
430   if (flag_zero) {
431     *format_ptr = '0';
432     format_ptr++;
433   } /* '0' *must* be the last one */
434 
435   if (width != -1) {
436     sprintf(format_ptr, "%i", width);
437     format_ptr += strlen(format_ptr);
438   }
439 
440   if (prec != -1) {
441     *format_ptr = '.';
442     format_ptr++;
443     sprintf(format_ptr, "%i", prec);
444     format_ptr += strlen(format_ptr);
445   }
446 
447   if (modifier != -1) {
448     if (modifier == 'L' && strchr("diouxX",type) != NULL) {
449       *format_ptr = 'l';
450       format_ptr++;
451       *format_ptr = 'l';
452       format_ptr++;
453     } else {
454       *format_ptr = modifier;
455       format_ptr++;
456     }
457   }
458 
459   *format_ptr = type;
460   format_ptr++;
461   *format_ptr = 0;
462 
463   /* vague approximation of minimal length if width or prec are specified */
464   approx_width = width + prec;
465   if (approx_width < 0) /* because width == -1 and/or prec == -1 */
466     approx_width = 0;
467 
468   switch (type) {
469     /* int */
470   case 'd':
471   case 'i':
472   case 'o':
473   case 'u':
474   case 'x':
475   case 'X':
476     switch (modifier) {
477     case -1 :
478       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
479     case 'L':
480       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
481     case 'l':
482       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
483     case 'h':
484       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
485     case 'z':
486       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
487     case 't':
488       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
489       /* 'int' instead of 'short int' because default promotion is 'int' */
490     default:
491       INCOHERENT();
492     }
493 
494     /* char */
495   case 'c':
496     if (modifier != -1)
497       INCOHERENT();
498     return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
499     /* 'int' instead of 'char' because default promotion is 'int' */
500 
501     /* math */
502   case 'e':
503   case 'f':
504   case 'g':
505   case 'E':
506   case 'G':
507     switch (modifier) {
508     case -1 : /* because of default promotion, no modifier means 'l' */
509     case 'l':
510       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
511     case 'L':
512       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
513     default:
514       INCOHERENT();
515     }
516 
517     /* string */
518   case 's':
519     return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
520 
521     /* pointer */
522   case 'p':
523     if (modifier == -1)
524       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
525     INCOHERENT();
526 
527     /* store */
528   case 'n':
529     if (modifier == -1) {
530       int * p;
531       p = va_arg(s->vargs, int *);
532       if (p != NULL) {
533         *p = s->pseudo_len;
534         return 0;
535       }
536       return EOF;
537     }
538     INCOHERENT();
539 
540   } /* switch */
541 
542   INCOHERENT();                 /* unknown type */
543 
544 #undef INCOHERENT
545 #undef INCOHERENT_TEST
546 #undef SRCTXT
547 #undef DESTTXT
548 }
549 
550 /*
551  *  Return value: number of *virtually* written characters
552  *                EOF = error
553  */
554 static int core(xprintf_struct *s)
555 {
556   size_t save_len;
557   char *dummy_base;
558 
559   /* basic checks */
560   if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
561     return EOF;           /* error for example if value is (int)-10 */
562   s->maxlen--;      /* because initial maxlen counts final 0 */
563   /* note: now 'maxlen' _can_ be zero */
564 
565   if (s->src_string == NULL)
566     s->src_string = "(null)";
567 
568   /* struct init and memory allocation */
569   s->buffer_base = NULL;
570   s->buffer_len = 0;
571   s->real_len = 0;
572   s->pseudo_len = 0;
573   if (realloc_buff(s, (size_t)0) == EOF)
574     return EOF;
575   s->dest_string = s->buffer_base;
576 
577   /* process source string */
578   for (;;) {
579     /* up to end of source string */
580     if (*(s->src_string) == 0) {
581       *(s->dest_string) = '\0';    /* final NUL */
582       break;
583     }
584 
585     if (dispatch(s) == EOF)
586       goto free_EOF;
587 
588     /* up to end of dest string */
589     if (s->real_len >= s->maxlen) {
590       (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
591       break;
592     }
593   }
594 
595   /* for (v)asnprintf */
596   dummy_base = s->buffer_base + s->real_len;
597   save_len = s->real_len;
598 
599   /* process the remaining of source string to compute 'pseudo_len'. We
600    * overwrite again and again, starting at 'dummy_base' because we don't
601    * need the text, only char count. */
602   while(*(s->src_string) != 0) { /* up to end of source string */
603     s->real_len = 0;
604     s->dest_string = dummy_base;
605     if (dispatch(s) == EOF)
606       goto free_EOF;
607   }
608 
609   s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
610   if (s->buffer_base == NULL)
611     return EOF; /* should rarely happen because we shrink the buffer */
612   return s->pseudo_len;
613 
614  free_EOF:
615   free(s->buffer_base);
616   return EOF;
617 }
618 
619 int vasprintf(char **ptr, const char *format_string, va_list vargs)
620 {
621   xprintf_struct s;
622   int retval;
623 
624   memset(&s, 0, sizeof(s));
625   s.src_string = format_string;
626 #ifdef va_copy
627   va_copy (s.vargs, vargs);
628 #else
629 # ifdef __va_copy
630   __va_copy (s.vargs, vargs);
631 # else
632 #  ifdef WIN32
633   s.vargs = vargs;
634 #  else
635   memcpy (&s.vargs, &vargs, sizeof (s.vargs));
636 #  endif /* WIN32 */
637 # endif /* __va_copy */
638 #endif /* va_copy */
639   s.maxlen = (size_t)INT_MAX;
640 
641   retval = core(&s);
642   va_end(s.vargs);
643   if (retval == EOF) {
644     *ptr = NULL;
645     return EOF;
646   }
647 
648   *ptr = s.buffer_base;
649   return retval;
650 }
651