xref: /netbsd/external/bsd/file/dist/src/vasprintf.c (revision 6550d01e)
1 /*	$NetBSD: vasprintf.c,v 1.1.1.1 2009/05/08 16:35:05 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*###########################################################################
31   #                                                                           #
32   #                                vasprintf                                  #
33   #                                                                           #
34   #               Copyright (c) 2002-2005 David TAILLANDIER                   #
35   #                                                                           #
36   ###########################################################################*/
37 
38 /*
39 
40 This software is distributed under the "modified BSD licence".
41 
42 This software is also released with GNU license (GPL) in another file (same
43 source-code, only license differ).
44 
45 
46 
47 Redistribution and use in source and binary forms, with or without
48 modification, are permitted provided that the following conditions are met:
49 
50 Redistributions of source code must retain the above copyright notice, this
51 list of conditions and the following disclaimer. Redistributions in binary
52 form must reproduce the above copyright notice, this list of conditions and
53 the following disclaimer in the documentation and/or other materials
54 provided with the distribution. The name of the author may not be used to
55 endorse or promote products derived from this software without specific
56 prior written permission.
57 
58 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
59 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
60 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
61 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
62 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
64 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
65 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
66 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
67 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 
69 ====================
70 
71 Hacked from xnprintf version of 26th February 2005 to provide only
72 vasprintf by Reuben Thomas <rrt@sc3d.org>.
73 
74 ====================
75 
76 
77 'printf' function family use the following format string:
78 
79 %[flag][width][.prec][modifier]type
80 
81 %% is the escape sequence to print a '%'
82 %  followed by an unknown format will print the characters without
83 trying to do any interpretation
84 
85 flag:   none   +     -     #     (blank)
86 width:  n    0n    *
87 prec:   none   .0    .n     .*
88 modifier:    F N L h l ll    ('F' and 'N' are ms-dos/16-bit specific)
89 type:  d i o u x X f e g E G c s p n
90 
91 
92 The function needs to allocate memory to store the full text before to
93 actually writting it.  i.e if you want to fnprintf() 1000 characters, the
94 functions will allocate 1000 bytes.
95 This behaviour can be modified: you have to customise the code to flush the
96 internal buffer (writing to screen or file) when it reach a given size. Then
97 the buffer can have a shorter length. But what? If you really need to write
98 HUGE string, don't use printf!
99 During the process, some other memory is allocated (1024 bytes minimum)
100 to handle the output of partial sprintf() calls. If you have only 10000 bytes
101 free in memory, you *may* not be able to nprintf() a 8000 bytes-long text.
102 
103 note: if a buffer overflow occurs, exit() is called. This situation should
104 never appear ... but if you want to be *really* sure, you have to modify the
105 code to handle those situations (only one place to modify).
106 A buffer overflow can only occur if your sprintf() do strange things or when
107 you use strange formats.
108 
109 */
110 #include "file.h"
111 
112 #ifndef	lint
113 #if 0
114 FILE_RCSID("@(#)$File: vasprintf.c,v 1.7 2009/02/03 20:27:52 christos Exp $")
115 #else
116 __RCSID("$NetBSD: vasprintf.c,v 1.1.1.1 2009/05/08 16:35:05 christos Exp $");
117 #endif
118 #endif	/* lint */
119 
120 #include <assert.h>
121 #include <string.h>
122 #include <stdlib.h>
123 #include <stdarg.h>
124 #include <ctype.h>
125 #ifdef HAVE_LIMITS_H
126 #include <limits.h>
127 #endif
128 
129 #define ALLOC_CHUNK 2048
130 #define ALLOC_SECURITY_MARGIN 1024   /* big value because some platforms have very big 'G' exponent */
131 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
132 #    error  !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
133 #endif
134 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
135 
136 /*
137  *  To save a lot of push/pop, every variable are stored into this
138  *  structure, which is passed among nearly every sub-functions.
139  */
140 typedef struct {
141   const char * src_string;        /* current position into intput string */
142   char *       buffer_base;       /* output buffer */
143   char *       dest_string;       /* current position into output string */
144   size_t       buffer_len;        /* length of output buffer */
145   size_t       real_len;          /* real current length of output text */
146   size_t       pseudo_len;        /* total length of output text if it were not limited in size */
147   size_t       maxlen;
148   va_list      vargs;             /* pointer to current position into vargs */
149   char *       sprintf_string;
150   FILE *       fprintf_file;
151 } xprintf_struct;
152 
153 /*
154  *  Realloc buffer if needed
155  *  Return value:  0 = ok
156  *               EOF = not enought memory
157  */
158 static int realloc_buff(xprintf_struct *s, size_t len)
159 {
160   char * ptr;
161 
162   if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
163     len += s->real_len + ALLOC_CHUNK;
164     ptr = (char *)realloc((void *)(s->buffer_base), len);
165     if (ptr == NULL) {
166       s->buffer_base = NULL;
167       return EOF;
168     }
169 
170     s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
171     s->buffer_base = ptr;
172     s->buffer_len = len;
173 
174     (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
175   }
176 
177   return 0;
178 }
179 
180 /*
181  *  Prints 'usual' characters    up to next '%'
182  *                            or up to end of text
183  */
184 static int usual_char(xprintf_struct * s)
185 {
186   size_t len;
187 
188   len = strcspn(s->src_string, "%");     /* reachs the next '%' or end of input string */
189   /* note: 'len' is never 0 because the presence of '%' */
190   /* or end-of-line is checked in the calling function  */
191 
192   if (realloc_buff(s,len) == EOF)
193     return EOF;
194 
195   memcpy(s->dest_string, s->src_string, len);
196   s->src_string += len;
197   s->dest_string += len;
198   s->real_len += len;
199   s->pseudo_len += len;
200 
201   return 0;
202 }
203 
204 /*
205  *  Return value: 0 = ok
206  *                EOF = error
207  */
208 static int print_it(xprintf_struct *s, size_t approx_len,
209                     const char *format_string, ...)
210 {
211   va_list varg;
212   int vsprintf_len;
213   size_t len;
214 
215   if (realloc_buff(s,approx_len) == EOF)
216     return EOF;
217 
218   va_start(varg, format_string);
219   vsprintf_len = vsprintf(s->dest_string, format_string, varg);
220   va_end(varg);
221 
222   /* Check for overflow */
223   assert((s->buffer_base)[s->buffer_len - 1] == 1);
224 
225   if (vsprintf_len == EOF) /* must be done *after* overflow-check */
226     return EOF;
227 
228   s->pseudo_len += vsprintf_len;
229   len = strlen(s->dest_string);
230   s->real_len += len;
231   s->dest_string += len;
232 
233   return 0;
234 }
235 
236 /*
237  *  Prints a string (%s)
238  *  We need special handling because:
239  *     a: the length of the string is unknown
240  *     b: when .prec is used, we must not access any extra byte of the
241  *        string (of course, if the original sprintf() does... what the
242  *        hell, not my problem)
243  *
244  *  Return value: 0 = ok
245  *                EOF = error
246  */
247 static int type_s(xprintf_struct *s, int width, int prec,
248                   const char *format_string, const char *arg_string)
249 {
250   size_t string_len;
251 
252   if (arg_string == NULL)
253     return print_it(s, (size_t)6, "(null)", 0);
254 
255   /* hand-made strlen() whitch stops when 'prec' is reached. */
256   /* if 'prec' is -1 then it is never reached. */
257   string_len = 0;
258   while (arg_string[string_len] != 0 && (size_t)prec != string_len)
259     string_len++;
260 
261   if (width != -1 && string_len < (size_t)width)
262     string_len = (size_t)width;
263 
264   return print_it(s, string_len, format_string, arg_string);
265 }
266 
267 /*
268  *  Read a serie of digits. Stop when non-digit is found.
269  *  Return value: the value read (between 0 and 32767).
270  *  Note: no checks are made against overflow. If the string contain a big
271  *  number, then the return value won't be what we want (but, in this case,
272  *  the programmer don't know whatr he wants, then no problem).
273  */
274 static int getint(const char **string)
275 {
276   int i = 0;
277 
278   while (isdigit((unsigned char)**string) != 0) {
279     i = i * 10 + (**string - '0');
280     (*string)++;
281   }
282 
283   if (i < 0 || i > 32767)
284     i = 32767; /* if we have i==-10 this is not because the number is */
285   /* negative; this is because the number is big */
286   return i;
287 }
288 
289 /*
290  *  Read a part of the format string. A part is 'usual characters' (ie "blabla")
291  *  or '%%' escape sequence (to print a single '%') or any combination of
292  *  format specifier (ie "%i" or "%10.2d").
293  *  After the current part is managed, the function returns to caller with
294  *  everything ready to manage the following part.
295  *  The caller must ensure than the string is not empty, i.e. the first byte
296  *  is not zero.
297  *
298  *  Return value:  0 = ok
299  *                 EOF = error
300  */
301 static int dispatch(xprintf_struct *s)
302 {
303   const char *initial_ptr;
304   char format_string[24]; /* max length may be something like  "% +-#032768.32768Ld" */
305   char *format_ptr;
306   int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
307   int width, prec, modifier, approx_width;
308   char type;
309   /* most of those variables are here to rewrite the format string */
310 
311 #define SRCTXT  (s->src_string)
312 #define DESTTXT (s->dest_string)
313 
314   /* incoherent format string. Characters after the '%' will be printed with the next call */
315 #define INCOHERENT()         do {SRCTXT=initial_ptr; return 0;} while (0)     /* do/while to avoid */
316 #define INCOHERENT_TEST()    do {if(*SRCTXT==0)   INCOHERENT();} while (0)    /* a null statement  */
317 
318   /* 'normal' text */
319   if (*SRCTXT != '%')
320     return usual_char(s);
321 
322   /* we then have a '%' */
323   SRCTXT++;
324   /* don't check for end-of-string ; this is done later */
325 
326   /* '%%' escape sequence */
327   if (*SRCTXT == '%') {
328     if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
329       return EOF;
330     *DESTTXT = '%';
331     DESTTXT++;
332     SRCTXT++;
333     (s->real_len)++;
334     (s->pseudo_len)++;
335     return 0;
336   }
337 
338   /* '%' managing */
339   initial_ptr = SRCTXT;   /* save current pointer in case of incorrect */
340   /* 'decoding'. Points just after the '%' so the '%' */
341   /* won't be printed in any case, as required. */
342 
343   /* flag */
344   flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
345 
346   for (;; SRCTXT++) {
347     if (*SRCTXT == ' ')
348       flag_space = 1;
349     else if (*SRCTXT == '+')
350       flag_plus = 1;
351     else if (*SRCTXT == '-')
352       flag_minus = 1;
353     else if (*SRCTXT == '#')
354       flag_sharp = 1;
355     else if (*SRCTXT == '0')
356       flag_zero = 1;
357     else
358       break;
359   }
360 
361   INCOHERENT_TEST();    /* here is the first test for end of string */
362 
363   /* width */
364   if (*SRCTXT == '*') {         /* width given by next argument */
365     SRCTXT++;
366     width = va_arg(s->vargs, int);
367     if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
368       width = 0x3fff;
369   } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
370     width = getint(&SRCTXT);
371   else
372     width = -1;                 /* no width specified */
373 
374   INCOHERENT_TEST();
375 
376   /* .prec */
377   if (*SRCTXT == '.') {
378     SRCTXT++;
379     if (*SRCTXT == '*') {       /* .prec given by next argument */
380       SRCTXT++;
381       prec = va_arg(s->vargs, int);
382       if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
383         prec = 0x3fff;
384     } else {                    /* .prec given as ASCII number */
385       if (isdigit((unsigned char)*SRCTXT) == 0)
386         INCOHERENT();
387       prec = getint(&SRCTXT);
388     }
389     INCOHERENT_TEST();
390   } else
391     prec = -1;                  /* no .prec specified */
392 
393   /* modifier */
394   if (*SRCTXT == 'L' || *SRCTXT == 'h' || *SRCTXT == 'l') {
395     modifier = *SRCTXT;
396     SRCTXT++;
397     if (modifier=='l' && *SRCTXT=='l') {
398       SRCTXT++;
399       modifier = 'L';  /* 'll' == 'L'      long long == long double */
400     } /* only for compatibility ; not portable */
401     INCOHERENT_TEST();
402   } else
403     modifier = -1;              /* no modifier specified */
404 
405   /* type */
406   type = *SRCTXT;
407   if (strchr("diouxXfegEGcspn",type) == NULL)
408     INCOHERENT();               /* unknown type */
409   SRCTXT++;
410 
411   /* rewrite format-string */
412   format_string[0] = '%';
413   format_ptr = &(format_string[1]);
414 
415   if (flag_plus) {
416     *format_ptr = '+';
417     format_ptr++;
418   }
419   if (flag_minus) {
420     *format_ptr = '-';
421     format_ptr++;
422   }
423   if (flag_space) {
424     *format_ptr = ' ';
425     format_ptr++;
426   }
427   if (flag_sharp) {
428     *format_ptr = '#';
429     format_ptr++;
430   }
431   if (flag_zero) {
432     *format_ptr = '0';
433     format_ptr++;
434   } /* '0' *must* be the last one */
435 
436   if (width != -1) {
437     sprintf(format_ptr, "%i", width);
438     format_ptr += strlen(format_ptr);
439   }
440 
441   if (prec != -1) {
442     *format_ptr = '.';
443     format_ptr++;
444     sprintf(format_ptr, "%i", prec);
445     format_ptr += strlen(format_ptr);
446   }
447 
448   if (modifier != -1) {
449     if (modifier == 'L' && strchr("diouxX",type) != NULL) {
450       *format_ptr = 'l';
451       format_ptr++;
452       *format_ptr = 'l';
453       format_ptr++;
454     } else {
455       *format_ptr = modifier;
456       format_ptr++;
457     }
458   }
459 
460   *format_ptr = type;
461   format_ptr++;
462   *format_ptr = 0;
463 
464   /* vague approximation of minimal length if width or prec are specified */
465   approx_width = width + prec;
466   if (approx_width < 0) /* because width == -1 and/or prec == -1 */
467     approx_width = 0;
468 
469   switch (type) {
470     /* int */
471   case 'd':
472   case 'i':
473   case 'o':
474   case 'u':
475   case 'x':
476   case 'X':
477     switch (modifier) {
478     case -1 :
479       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
480     case 'L':
481       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
482     case 'l':
483       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
484     case 'h':
485       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
486       /* 'int' instead of 'short int' because default promotion is 'int' */
487     default:
488       INCOHERENT();
489     }
490 
491     /* char */
492   case 'c':
493     if (modifier != -1)
494       INCOHERENT();
495     return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
496     /* 'int' instead of 'char' because default promotion is 'int' */
497 
498     /* math */
499   case 'e':
500   case 'f':
501   case 'g':
502   case 'E':
503   case 'G':
504     switch (modifier) {
505     case -1 : /* because of default promotion, no modifier means 'l' */
506     case 'l':
507       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
508     case 'L':
509       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
510     default:
511       INCOHERENT();
512     }
513 
514     /* string */
515   case 's':
516     return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
517 
518     /* pointer */
519   case 'p':
520     if (modifier == -1)
521       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
522     INCOHERENT();
523 
524     /* store */
525   case 'n':
526     if (modifier == -1) {
527       int * p;
528       p = va_arg(s->vargs, int *);
529       if (p != NULL) {
530         *p = s->pseudo_len;
531         return 0;
532       }
533       return EOF;
534     }
535     INCOHERENT();
536 
537   } /* switch */
538 
539   INCOHERENT();                 /* unknown type */
540 
541 #undef INCOHERENT
542 #undef INCOHERENT_TEST
543 #undef SRCTXT
544 #undef DESTTXT
545 }
546 
547 /*
548  *  Return value: number of *virtually* written characters
549  *                EOF = error
550  */
551 static int core(xprintf_struct *s)
552 {
553   size_t len, save_len;
554   char *dummy_base;
555 
556   /* basic checks */
557   if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
558     return EOF;           /* error for example if value is (int)-10 */
559   s->maxlen--;      /* because initial maxlen counts final 0 */
560   /* note: now 'maxlen' _can_ be zero */
561 
562   if (s->src_string == NULL)
563     s->src_string = "(null)";
564 
565   /* struct init and memory allocation */
566   s->buffer_base = NULL;
567   s->buffer_len = 0;
568   s->real_len = 0;
569   s->pseudo_len = 0;
570   if (realloc_buff(s, (size_t)0) == EOF)
571     return EOF;
572   s->dest_string = s->buffer_base;
573 
574   /* process source string */
575   for (;;) {
576     /* up to end of source string */
577     if (*(s->src_string) == 0) {
578       *(s->dest_string) = 0;    /* final 0 */
579       len = s->real_len + 1;
580       break;
581     }
582 
583     if (dispatch(s) == EOF)
584       goto free_EOF;
585 
586     /* up to end of dest string */
587     if (s->real_len >= s->maxlen) {
588       (s->buffer_base)[s->maxlen] = 0; /* final 0 */
589       len = s->maxlen + 1;
590       break;
591     }
592   }
593 
594   /* for (v)asnprintf */
595   dummy_base = s->buffer_base;
596   save_len = 0;                 /* just to avoid a compiler warning */
597 
598   dummy_base = s->buffer_base + s->real_len;
599   save_len = s->real_len;
600 
601   /* process the remaining of source string to compute 'pseudo_len'. We
602    * overwrite again and again, starting at 'dummy_base' because we don't
603    * need the text, only char count. */
604   while(*(s->src_string) != 0) { /* up to end of source string */
605     s->real_len = 0;
606     s->dest_string = dummy_base;
607     if (dispatch(s) == EOF)
608       goto free_EOF;
609   }
610 
611   s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
612   if (s->buffer_base == NULL)
613     return EOF; /* should rarely happen because we shrink the buffer */
614   return s->pseudo_len;
615 
616  free_EOF:
617   if (s->buffer_base != NULL)
618     free(s->buffer_base);
619   return EOF;
620 }
621 
622 int vasprintf(char **ptr, const char *format_string, va_list vargs)
623 {
624   xprintf_struct s;
625   int retval;
626 
627   s.src_string = format_string;
628 #ifdef va_copy
629   va_copy (s.vargs, vargs);
630 #else
631 #ifdef __va_copy
632   __va_copy (s.vargs, vargs);
633 #else
634   memcpy (&s.vargs, vargs, sizeof (va_list));
635 #endif /* __va_copy */
636 #endif /* va_copy */
637   s.maxlen = (size_t)INT_MAX;
638 
639   retval = core(&s);
640   va_end(s.vargs);
641   if (retval == EOF) {
642     *ptr = NULL;
643     return EOF;
644   }
645 
646   *ptr = s.buffer_base;
647   return retval;
648 }
649