1 /* $NetBSD: vasprintf.c,v 1.1.1.10 2022/09/24 20:07:55 christos Exp $ */
2
3 /*
4 * Copyright (c) Ian F. Darwin 1986-1995.
5 * Software written by Ian F. Darwin and others;
6 * maintained 1995-present by Christos Zoulas and others.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice immediately at the beginning of the file, without modification,
13 * this list of conditions, and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30 /*###########################################################################
31 # #
32 # vasprintf #
33 # #
34 # Copyright (c) 2002-2005 David TAILLANDIER #
35 # #
36 ###########################################################################*/
37
38 /*
39
40 This software is distributed under the "modified BSD licence".
41
42 This software is also released with GNU license (GPL) in another file (same
43 source-code, only license differ).
44
45
46
47 Redistribution and use in source and binary forms, with or without
48 modification, are permitted provided that the following conditions are met:
49
50 Redistributions of source code must retain the above copyright notice, this
51 list of conditions and the following disclaimer. Redistributions in binary
52 form must reproduce the above copyright notice, this list of conditions and
53 the following disclaimer in the documentation and/or other materials
54 provided with the distribution. The name of the author may not be used to
55 endorse or promote products derived from this software without specific
56 prior written permission.
57
58 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
59 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
60 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
61 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
62 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
64 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
65 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
66 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
67 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68
69 ====================
70
71 Hacked from xnprintf version of 26th February 2005 to provide only
72 vasprintf by Reuben Thomas <rrt@sc3d.org>.
73
74 ====================
75
76
77 'printf' function family use the following format string:
78
79 %[flag][width][.prec][modifier]type
80
81 %% is the escape sequence to print a '%'
82 % followed by an unknown format will print the characters without
83 trying to do any interpretation
84
85 flag: none + - # (blank)
86 width: n 0n *
87 prec: none .0 .n .*
88 modifier: F N L h l ll z t ('F' and 'N' are ms-dos/16-bit specific)
89 type: d i o u x X f e g E G c s p n
90
91
92 The function needs to allocate memory to store the full text before to
93 actually writing it. i.e if you want to fnprintf() 1000 characters, the
94 functions will allocate 1000 bytes.
95 This behaviour can be modified: you have to customise the code to flush the
96 internal buffer (writing to screen or file) when it reach a given size. Then
97 the buffer can have a shorter length. But what? If you really need to write
98 HUGE string, don't use printf!
99 During the process, some other memory is allocated (1024 bytes minimum)
100 to handle the output of partial sprintf() calls. If you have only 10000 bytes
101 free in memory, you *may* not be able to nprintf() an 8000 bytes-long text.
102
103 note: if a buffer overflow occurs, exit() is called. This situation should
104 never appear ... but if you want to be *really* sure, you have to modify the
105 code to handle those situations (only one place to modify).
106 A buffer overflow can only occur if your sprintf() do strange things or when
107 you use strange formats.
108
109 */
110 #include "file.h"
111
112 #ifndef lint
113 #if 0
114 FILE_RCSID("@(#)$File: vasprintf.c,v 1.21 2022/09/16 13:49:39 christos Exp $")
115 #else
116 __RCSID("$NetBSD: vasprintf.c,v 1.1.1.10 2022/09/24 20:07:55 christos Exp $");
117 #endif
118 #endif /* lint */
119
120 #include <assert.h>
121 #include <string.h>
122 #include <stdlib.h>
123 #include <stdarg.h>
124 #include <ctype.h>
125 #include <limits.h>
126 #include <stddef.h>
127
128 #define ALLOC_CHUNK 2048
129 #define ALLOC_SECURITY_MARGIN 1024 /* big value because some platforms have very big 'G' exponent */
130 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
131 # error !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
132 #endif
133 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
134
135 /*
136 * To save a lot of push/pop, every variable are stored into this
137 * structure, which is passed among nearly every sub-functions.
138 */
139 typedef struct {
140 const char * src_string; /* current position into input string */
141 char * buffer_base; /* output buffer */
142 char * dest_string; /* current position into output string */
143 size_t buffer_len; /* length of output buffer */
144 size_t real_len; /* real current length of output text */
145 size_t pseudo_len; /* total length of output text if it were not limited in size */
146 size_t maxlen;
147 va_list vargs; /* pointer to current position into vargs */
148 } xprintf_struct;
149
150 /*
151 * Realloc buffer if needed
152 * Return value: 0 = ok
153 * EOF = not enough memory
154 */
realloc_buff(xprintf_struct * s,size_t len)155 static int realloc_buff(xprintf_struct *s, size_t len)
156 {
157 char * ptr;
158
159 if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
160 len += s->real_len + ALLOC_CHUNK;
161 ptr = (char *)realloc((void *)(s->buffer_base), len);
162 if (ptr == NULL) {
163 s->buffer_base = NULL;
164 return EOF;
165 }
166
167 s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
168 s->buffer_base = ptr;
169 s->buffer_len = len;
170
171 (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
172 }
173
174 return 0;
175 }
176
177 /*
178 * Prints 'usual' characters up to next '%'
179 * or up to end of text
180 */
usual_char(xprintf_struct * s)181 static int usual_char(xprintf_struct * s)
182 {
183 size_t len;
184
185 len = strcspn(s->src_string, "%"); /* reaches the next '%' or end of input string */
186 /* note: 'len' is never 0 because the presence of '%' */
187 /* or end-of-line is checked in the calling function */
188
189 if (realloc_buff(s,len) == EOF)
190 return EOF;
191
192 memcpy(s->dest_string, s->src_string, len);
193 s->src_string += len;
194 s->dest_string += len;
195 s->real_len += len;
196 s->pseudo_len += len;
197
198 return 0;
199 }
200
201 /*
202 * Return value: 0 = ok
203 * EOF = error
204 */
print_it(xprintf_struct * s,size_t approx_len,const char * format_string,...)205 static int print_it(xprintf_struct *s, size_t approx_len,
206 const char *format_string, ...)
207 {
208 va_list varg;
209 int vsprintf_len;
210 size_t len;
211
212 if (realloc_buff(s,approx_len) == EOF)
213 return EOF;
214
215 va_start(varg, format_string);
216 vsprintf_len = vsprintf(s->dest_string, format_string, varg);
217 va_end(varg);
218
219 /* Check for overflow */
220 assert((s->buffer_base)[s->buffer_len - 1] == 1);
221
222 if (vsprintf_len == EOF) /* must be done *after* overflow-check */
223 return EOF;
224
225 s->pseudo_len += vsprintf_len;
226 len = strlen(s->dest_string);
227 s->real_len += len;
228 s->dest_string += len;
229
230 return 0;
231 }
232
233 /*
234 * Prints a string (%s)
235 * We need special handling because:
236 * a: the length of the string is unknown
237 * b: when .prec is used, we must not access any extra byte of the
238 * string (of course, if the original sprintf() does... what the
239 * hell, not my problem)
240 *
241 * Return value: 0 = ok
242 * EOF = error
243 */
type_s(xprintf_struct * s,int width,int prec,const char * format_string,const char * arg_string)244 static int type_s(xprintf_struct *s, int width, int prec,
245 const char *format_string, const char *arg_string)
246 {
247 size_t string_len;
248
249 if (arg_string == NULL)
250 return print_it(s, (size_t)6, "(null)", 0);
251
252 /* hand-made strlen() which stops when 'prec' is reached. */
253 /* if 'prec' is -1 then it is never reached. */
254 string_len = 0;
255 while (arg_string[string_len] != 0 && (size_t)prec != string_len)
256 string_len++;
257
258 if (width != -1 && string_len < (size_t)width)
259 string_len = (size_t)width;
260
261 return print_it(s, string_len, format_string, arg_string);
262 }
263
264 /*
265 * Read a series of digits. Stop when non-digit is found.
266 * Return value: the value read (between 0 and 32767).
267 * Note: no checks are made against overflow. If the string contain a big
268 * number, then the return value won't be what we want (but, in this case,
269 * the programmer don't know whatr he wants, then no problem).
270 */
getint(const char ** string)271 static int getint(const char **string)
272 {
273 int i = 0;
274
275 while (isdigit((unsigned char)**string) != 0) {
276 i = i * 10 + (**string - '0');
277 (*string)++;
278 }
279
280 if (i < 0 || i > 32767)
281 i = 32767; /* if we have i==-10 this is not because the number is */
282 /* negative; this is because the number is big */
283 return i;
284 }
285
286 /*
287 * Read a part of the format string. A part is 'usual characters' (ie "blabla")
288 * or '%%' escape sequence (to print a single '%') or any combination of
289 * format specifier (ie "%i" or "%10.2d").
290 * After the current part is managed, the function returns to caller with
291 * everything ready to manage the following part.
292 * The caller must ensure than the string is not empty, i.e. the first byte
293 * is not zero.
294 *
295 * Return value: 0 = ok
296 * EOF = error
297 */
dispatch(xprintf_struct * s)298 static int dispatch(xprintf_struct *s)
299 {
300 const char *initial_ptr;
301 char format_string[24]; /* max length may be something like "% +-#032768.32768Ld" */
302 char *format_ptr;
303 int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
304 int width, prec, modifier, approx_width;
305 char type;
306 /* most of those variables are here to rewrite the format string */
307
308 #define SRCTXT (s->src_string)
309 #define DESTTXT (s->dest_string)
310
311 /* incoherent format string. Characters after the '%' will be printed with the next call */
312 #define INCOHERENT() do {SRCTXT=initial_ptr; return 0;} while (0) /* do/while to avoid */
313 #define INCOHERENT_TEST() do {if(*SRCTXT==0) INCOHERENT();} while (0) /* a null statement */
314
315 /* 'normal' text */
316 if (*SRCTXT != '%')
317 return usual_char(s);
318
319 /* we then have a '%' */
320 SRCTXT++;
321 /* don't check for end-of-string ; this is done later */
322
323 /* '%%' escape sequence */
324 if (*SRCTXT == '%') {
325 if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
326 return EOF;
327 *DESTTXT = '%';
328 DESTTXT++;
329 SRCTXT++;
330 (s->real_len)++;
331 (s->pseudo_len)++;
332 return 0;
333 }
334
335 /* '%' managing */
336 initial_ptr = SRCTXT; /* save current pointer in case of incorrect */
337 /* 'decoding'. Points just after the '%' so the '%' */
338 /* won't be printed in any case, as required. */
339
340 /* flag */
341 flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
342
343 for (;; SRCTXT++) {
344 if (*SRCTXT == ' ')
345 flag_space = 1;
346 else if (*SRCTXT == '+')
347 flag_plus = 1;
348 else if (*SRCTXT == '-')
349 flag_minus = 1;
350 else if (*SRCTXT == '#')
351 flag_sharp = 1;
352 else if (*SRCTXT == '0')
353 flag_zero = 1;
354 else
355 break;
356 }
357
358 INCOHERENT_TEST(); /* here is the first test for end of string */
359
360 /* width */
361 if (*SRCTXT == '*') { /* width given by next argument */
362 SRCTXT++;
363 width = va_arg(s->vargs, int);
364 if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
365 width = 0x3fff;
366 } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
367 width = getint(&SRCTXT);
368 else
369 width = -1; /* no width specified */
370
371 INCOHERENT_TEST();
372
373 /* .prec */
374 if (*SRCTXT == '.') {
375 SRCTXT++;
376 if (*SRCTXT == '*') { /* .prec given by next argument */
377 SRCTXT++;
378 prec = va_arg(s->vargs, int);
379 if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
380 prec = 0x3fff;
381 } else { /* .prec given as ASCII number */
382 if (isdigit((unsigned char)*SRCTXT) == 0)
383 INCOHERENT();
384 prec = getint(&SRCTXT);
385 }
386 INCOHERENT_TEST();
387 } else
388 prec = -1; /* no .prec specified */
389
390 /* modifier */
391 switch (*SRCTXT) {
392 case 'L':
393 case 'h':
394 case 'l':
395 case 'z':
396 case 't':
397 modifier = *SRCTXT;
398 SRCTXT++;
399 if (modifier=='l' && *SRCTXT=='l') {
400 SRCTXT++;
401 modifier = 'L'; /* 'll' == 'L' long long == long double */
402 } /* only for compatibility ; not portable */
403 INCOHERENT_TEST();
404 break;
405 default:
406 modifier = -1; /* no modifier specified */
407 break;
408 }
409
410 /* type */
411 type = *SRCTXT;
412 if (strchr("diouxXfegEGcspn",type) == NULL)
413 INCOHERENT(); /* unknown type */
414 SRCTXT++;
415
416 /* rewrite format-string */
417 format_string[0] = '%';
418 format_ptr = &(format_string[1]);
419
420 if (flag_plus) {
421 *format_ptr = '+';
422 format_ptr++;
423 }
424 if (flag_minus) {
425 *format_ptr = '-';
426 format_ptr++;
427 }
428 if (flag_space) {
429 *format_ptr = ' ';
430 format_ptr++;
431 }
432 if (flag_sharp) {
433 *format_ptr = '#';
434 format_ptr++;
435 }
436 if (flag_zero) {
437 *format_ptr = '0';
438 format_ptr++;
439 } /* '0' *must* be the last one */
440
441 if (width != -1) {
442 sprintf(format_ptr, "%i", width);
443 format_ptr += strlen(format_ptr);
444 }
445
446 if (prec != -1) {
447 *format_ptr = '.';
448 format_ptr++;
449 sprintf(format_ptr, "%i", prec);
450 format_ptr += strlen(format_ptr);
451 }
452
453 if (modifier != -1) {
454 if (modifier == 'L' && strchr("diouxX",type) != NULL) {
455 *format_ptr = 'l';
456 format_ptr++;
457 *format_ptr = 'l';
458 format_ptr++;
459 } else {
460 *format_ptr = modifier;
461 format_ptr++;
462 }
463 }
464
465 *format_ptr = type;
466 format_ptr++;
467 *format_ptr = 0;
468
469 /* vague approximation of minimal length if width or prec are specified */
470 approx_width = width + prec;
471 if (approx_width < 0) /* because width == -1 and/or prec == -1 */
472 approx_width = 0;
473
474 switch (type) {
475 /* int */
476 case 'd':
477 case 'i':
478 case 'o':
479 case 'u':
480 case 'x':
481 case 'X':
482 switch (modifier) {
483 case -1 :
484 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
485 case 'L':
486 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
487 case 'l':
488 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
489 case 'h':
490 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
491 case 'z':
492 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
493 case 't':
494 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
495 /* 'int' instead of 'short int' because default promotion is 'int' */
496 default:
497 INCOHERENT();
498 }
499
500 /* char */
501 case 'c':
502 if (modifier != -1)
503 INCOHERENT();
504 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
505 /* 'int' instead of 'char' because default promotion is 'int' */
506
507 /* math */
508 case 'e':
509 case 'f':
510 case 'g':
511 case 'E':
512 case 'G':
513 switch (modifier) {
514 case -1 : /* because of default promotion, no modifier means 'l' */
515 case 'l':
516 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
517 case 'L':
518 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
519 default:
520 INCOHERENT();
521 }
522
523 /* string */
524 case 's':
525 return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
526
527 /* pointer */
528 case 'p':
529 if (modifier == -1)
530 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
531 INCOHERENT();
532
533 /* store */
534 case 'n':
535 if (modifier == -1) {
536 int * p;
537 p = va_arg(s->vargs, int *);
538 if (p != NULL) {
539 *p = s->pseudo_len;
540 return 0;
541 }
542 return EOF;
543 }
544 INCOHERENT();
545
546 } /* switch */
547
548 INCOHERENT(); /* unknown type */
549
550 #undef INCOHERENT
551 #undef INCOHERENT_TEST
552 #undef SRCTXT
553 #undef DESTTXT
554 }
555
556 /*
557 * Return value: number of *virtually* written characters
558 * EOF = error
559 */
core(xprintf_struct * s)560 static int core(xprintf_struct *s)
561 {
562 size_t save_len;
563 char *dummy_base;
564
565 /* basic checks */
566 if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
567 return EOF; /* error for example if value is (int)-10 */
568 s->maxlen--; /* because initial maxlen counts final 0 */
569 /* note: now 'maxlen' _can_ be zero */
570
571 if (s->src_string == NULL)
572 s->src_string = "(null)";
573
574 /* struct init and memory allocation */
575 s->buffer_base = NULL;
576 s->buffer_len = 0;
577 s->real_len = 0;
578 s->pseudo_len = 0;
579 if (realloc_buff(s, (size_t)0) == EOF)
580 return EOF;
581 s->dest_string = s->buffer_base;
582
583 /* process source string */
584 for (;;) {
585 /* up to end of source string */
586 if (*(s->src_string) == 0) {
587 *(s->dest_string) = '\0'; /* final NUL */
588 break;
589 }
590
591 if (dispatch(s) == EOF)
592 goto free_EOF;
593
594 /* up to end of dest string */
595 if (s->real_len >= s->maxlen) {
596 (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
597 break;
598 }
599 }
600
601 /* for (v)asnprintf */
602 dummy_base = s->buffer_base + s->real_len;
603 save_len = s->real_len;
604
605 /* process the remaining of source string to compute 'pseudo_len'. We
606 * overwrite again and again, starting at 'dummy_base' because we don't
607 * need the text, only char count. */
608 while(*(s->src_string) != 0) { /* up to end of source string */
609 s->real_len = 0;
610 s->dest_string = dummy_base;
611 if (dispatch(s) == EOF)
612 goto free_EOF;
613 }
614
615 s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
616 if (s->buffer_base == NULL)
617 return EOF; /* should rarely happen because we shrink the buffer */
618 return s->pseudo_len;
619
620 free_EOF:
621 free(s->buffer_base);
622 return EOF;
623 }
624
vasprintf(char ** ptr,const char * format_string,va_list vargs)625 int vasprintf(char **ptr, const char *format_string, va_list vargs)
626 {
627 xprintf_struct s;
628 int retval;
629
630 memset(&s, 0, sizeof(s));
631 s.src_string = format_string;
632 #ifdef va_copy
633 va_copy (s.vargs, vargs);
634 #else
635 # ifdef __va_copy
636 __va_copy (s.vargs, vargs);
637 # else
638 # ifdef WIN32
639 s.vargs = vargs;
640 # else
641 memcpy (&s.vargs, &vargs, sizeof (s.vargs));
642 # endif /* WIN32 */
643 # endif /* __va_copy */
644 #endif /* va_copy */
645 s.maxlen = (size_t)INT_MAX;
646
647 retval = core(&s);
648 va_end(s.vargs);
649 if (retval == EOF) {
650 *ptr = NULL;
651 return EOF;
652 }
653
654 *ptr = s.buffer_base;
655 return retval;
656 }
657