1 /* fmt/multi.c - Format multiple items.
2  * Copyright (C) 2005  Bruce Guenter <bruce@untroubled.org>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
17  */
18 #include <errno.h>
19 #include <stdarg.h>
20 #include <string.h>
21 #include "fmt.h"
22 #include "fmt.h"
23 #include "fmt.h"
24 #include "str.h"
25 
26 /** Format multiple items.
27 
28 The \c format string used to describe the multiple items is related to
29 what is used with printf and related functions, but has one critical
30 difference: instead of formatted items being escaped, literal text must
31 be escaped.  This solves the largest security issue with using
32 printf-style format strings, which is the possibility of accidentally
33 treating untrusted text as the format string.
34 
35 The format string is composed of zero or more format items.  Each item
36 is composed of the following parts:
37 
38 <b>Zero or more flag characters:</b>
39 
40 <dl>
41 
42 <dt>\c # <dd>Use an "alternate form" to convert the value.  For octal
43 conversion, the result is prefixed with a \c 0 .  For hexadecimal
44 conversion, the result is prefixed with either \c 0x or \c 0X depending
45 on the conversion type.
46 
47 <dt>\c - <dd>(not implemented) Pad on the right (left justified) instead
48 of on the left (default right justified).
49 
50 <dt>\c 0 <dd>Pad the result with zeros instead of spaces.
51 
52 </dl>
53 
54 <b>Field width.</b>
55 
56 The option field width is a decimal digit string specifying the minimum
57 field width.  If the converted value has fewer characters than the field
58 width, it will be padded out to the field width.
59 
60 <b>Length modifier:</b>
61 
62 <dl>
63 
64 <dt>\c l <dd>The following integer conversion uses a \c long type.
65 
66 <dt>\c ll <dd>The following integer conversion uses a \c long \c long
67 type.
68 
69 </dl>
70 
71 <b>Conversion specifier.</b>
72 
73 <dl>
74 
75 <dt>\c d <dt>\c i <dd>The \c int argument is converted to a signed
76 decimal string.
77 
78 <dt>\c o <dt>\c u <dt>\c x <dt>\c X <dd>The \c unsigned \c int argument
79 is converted to a unsigned octal, unsigned decimal, lowercase unsigned
80 hexadecimal, or uppercase unsigned hexadecimal string respectively.
81 
82 <dt>\c c <dd>The \c int argument is converted to an unsigned char.
83 
84 <dt>\c s <dd>The \c const \c char* argument is converted.
85 
86 <dt>\c S <dd>The \c const \c str* argument is converted.
87 
88 <dt>\c p <dd>The \c void* argument is converted to a hexadecimal string.
89 
90 <dt>\c m <dd>The result of \c strerror(errno) is formatted.
91 
92 <dt>\c \\ <dd>The next character literal from the format string is
93 converted as with \c c conversion.
94 
95 <dt>\c {string} <dd>The literal string enclosed by the parenthesis is
96 converted as with \c s conversion.
97 
98 <dt>\c @ <dd>Formats an arbitrary object using two arguments: The first
99 ::fmt_function argument is used to format the following \c void*
100 argument.  The ::fmt_function is passed the output buffer, the data
101 pointer, width, and pad character, and is expected to return the number
102 of bytes formatted.
103 
104 </dl>
105 */
fmt_multi(char * buffer,const char * format,...)106 unsigned fmt_multi(char* buffer, const char* format, ...)
107 {
108   va_list ap;
109   unsigned i;
110   va_start(ap, format);
111   i = fmt_multiv(buffer, format, ap);
112   va_end(ap);
113   return i;
114 }
115 
fmt_ullnumwa(char * buffer,unsigned long long u,unsigned width,char pad,unsigned base,const char * digits,const char * prefix)116 static unsigned fmt_ullnumwa(char* buffer, unsigned long long u,
117 			     unsigned width, char pad,
118 			     unsigned base, const char* digits,
119 			     const char* prefix)
120 {
121   unsigned prefixlen = 0;
122   if (prefix != 0) {
123     prefixlen = strlen(prefix);
124     width = (width > prefixlen) ? width - prefixlen : 0;
125     if (buffer != 0)
126       for (; *prefix != 0; ++prefix, ++buffer)
127 	*buffer = *prefix;
128   }
129   return prefixlen + fmt_ullnumw(buffer, u, width, pad, base, digits);
130 }
131 
fmt_unumwa(char * buffer,unsigned long u,unsigned width,char pad,unsigned base,const char * digits,const char * prefix)132 static unsigned fmt_unumwa(char* buffer, unsigned long u,
133 			   unsigned width, char pad,
134 			   unsigned base, const char* digits,
135 			   const char* prefix)
136 {
137   unsigned prefixlen = 0;
138   if (prefix != 0) {
139     prefixlen = strlen(prefix);
140     width = (width > prefixlen) ? width - prefixlen : 0;
141     if (buffer != 0)
142       for (; *prefix != 0; ++prefix, ++buffer)
143 	*buffer = *prefix;
144   }
145   return prefixlen + fmt_unumw(buffer, u, width, pad, base, digits);
146 }
147 
148 /** Format multiple items, using a va_list.
149 
150 This is the core function used to format multiple items.
151 */
fmt_multiv(char * buffer,const char * format,va_list ap)152 unsigned fmt_multiv(char* buffer, const char* format, va_list ap)
153 {
154   unsigned length;
155 
156   for (length = 0; *format != 0; ++format) {
157     int pad = ' ';
158     int islong = 0;
159     int islonglong = 0;
160     int altfmt = 0;
161     int leftadj = 0;
162     unsigned width;
163     unsigned ilength;
164     long long value = 0;
165     const char* altstr;
166     char conv;
167     const str* strp;
168     const void* voidp;
169     fmt_function fn;
170 
171     for (; *format != 0; ++format) {
172       switch (*format) {
173       case '#': altfmt = 1; continue;
174       case '-': leftadj = 1; continue;
175       case '0': pad = '0'; continue;
176       }
177       break;
178     }
179     for (width = 0; *format >= '0' && *format <= '9'; ++format)
180       width = (width * 10) + (*format - '0');
181     while (*format == 'l') {
182       ++format;
183       ++islong;
184     }
185     islonglong = islong >= 2;
186     conv = *format;
187     switch (conv) {
188     case 'i': case 'd': case 'o': case 'u': case 'x': case 'X':
189       value = islonglong
190 	? va_arg(ap, long long)
191 	: islong
192 	? va_arg(ap, long)
193 	: va_arg(ap, int);
194     }
195     switch (conv) {
196     case 'i':
197     case 'd':
198       ilength = islonglong
199 	? fmt_sllnumw(buffer, value, width, pad, 10, fmt_lcase_digits)
200 	: fmt_snumw(buffer, value, width, pad, 10, fmt_lcase_digits);
201       break;
202     case 'o':
203       altstr = altfmt ? "0" : 0;
204       ilength = islonglong
205 	? fmt_ullnumwa(buffer, value, width, pad, 8, fmt_lcase_digits, altstr)
206 	: fmt_unumwa(buffer, value, width, pad, 8, fmt_lcase_digits, altstr);
207       break;
208     case 'u':
209       ilength = islonglong
210 	? fmt_ullnumw(buffer, value, width, pad, 10, fmt_lcase_digits)
211 	: fmt_unumw(buffer, value, width, pad, 10, fmt_lcase_digits);
212       break;
213     case 'x':
214       altstr = altfmt ? "0x" : 0;
215       ilength = islonglong
216 	? fmt_ullnumwa(buffer, value, width, pad, 16, fmt_lcase_digits, altstr)
217 	: fmt_unumwa(buffer, value, width, pad, 16, fmt_lcase_digits, altstr);
218       break;
219     case 'X':
220       altstr = altfmt ? "0X" : 0;
221       ilength = islonglong
222 	? fmt_ullnumwa(buffer, value, width, pad, 16, fmt_ucase_digits, altstr)
223 	: fmt_unumwa(buffer, value, width, pad, 16, fmt_ucase_digits, altstr);
224       break;
225     case 'c':
226       ilength = fmt_char(buffer, va_arg(ap, int), width, pad);
227       break;
228     case 's':
229       ilength = fmt_chars(buffer, va_arg(ap, const char*), width, pad);
230       break;
231     case 'S':
232       strp = va_arg(ap, const str*);
233       ilength = fmt_mem(buffer, strp->s, strp->len, width, pad);
234       break;
235     case 'p':
236       ilength = fmt_unumwa(buffer, (unsigned long)va_arg(ap, void*),
237 			   width, pad, 16, fmt_lcase_digits, "0x");
238       break;
239     case 'm':
240       ilength = fmt_chars(buffer, strerror(errno), width, pad);
241       break;
242     case '@':
243       fn = va_arg(ap, fmt_function);
244       voidp = va_arg(ap, const void*);
245       ilength = fn(buffer, voidp, width, pad);
246       break;
247     case '\\':
248       ilength = fmt_char(buffer, *++format, width, pad);
249       break;
250     case '{':
251       {
252 	const char* start = ++format;
253 	for (; *format != 0 && *format != '}'; ++format)
254 	  ;
255 	ilength = fmt_mem(buffer, start, format-start, width, pad);
256       }
257       break;
258     default:
259       ilength = 0;
260     }
261     if (buffer != 0)
262       buffer += ilength;
263     length += ilength;
264   }
265   return length;
266 }
267 
268 #ifdef SELFTEST_MAIN
fmt_bool(char * buffer,const void * data,unsigned width,char pad)269 unsigned fmt_bool(char* buffer, const void* data, unsigned width, char pad)
270 {
271   return fmt_chars(buffer, data ? "true" : "false", width, pad);
272 }
273 
testit(const char * format,...)274 void testit(const char* format, ...)
275 {
276   char buffer[100];
277   unsigned length;
278   va_list ap;
279 
280   va_start(ap, format);
281   length = fmt_multiv(0, format, ap);
282   va_end(ap);
283   obuf_putu(&outbuf, length);
284   obuf_putc(&outbuf, ':');
285 
286   va_start(ap, format);
287   length = fmt_multiv(buffer, format, ap);
288   va_end(ap);
289   obuf_putu(&outbuf, length);
290   obuf_putc(&outbuf, ':');
291   obuf_write(&outbuf, buffer, length);
292   obuf_endl(&outbuf);
293 }
294 
295 MAIN
296 {
297   testit("s{|}1s{|zzz|}5s\\|05s", "one", "two", "nine", "ten");
298   testit("c\\|1c\\|5c\\|05c", 'a', 'b', 'c', 'd');
299   testit("09{foo}");
300   testit("o\\|x\\|X\\|i\\|d", 95, 95, 95, 95, 95);
301   testit("#o\\|#x\\|#X\\|#i\\|#d", 95, 95, 95, 95, 95);
302   testit("p", (void*)12345678);
303   testit("o\\|lo\\|llo", 505050505U, 505050505UL, 5050505050505050505ULL);
304   testit("u\\|lu\\|llu", 505050505U, 505050505UL, 5050505050505050505ULL);
305   testit("x\\|lx\\|llx", 505050505U, 505050505UL, 5050505050505050505ULL);
306   testit("X\\|lX\\|llX", 505050505U, 505050505UL, 5050505050505050505ULL);
307   testit("i\\|li\\|lli", 505050505U, 505050505UL, 5050505050505050505ULL);
308   testit("i\\|li\\|lli", -505050505U, -505050505UL, -5050505050505050505ULL);
309   testit("10@\\|010@", fmt_bool, (void*)1, fmt_bool, (void*)0);
310 }
311 #endif
312 #ifdef SELFTEST_EXP
313 23:23:one|two|zzz| nine|00ten
314 15:15:a|b|    c|0000d
315 9:9:000000foo
316 15:15:137|5f|5F|95|95
317 20:20:0137|0x5f|0X5F|95|95
318 8:8:0xbc614e
319 43:43:3606472611|3606472611|430267771265752674611
320 39:39:505050505|505050505|5050505050505050505
321 34:34:1e1a7589|1e1a7589|4616ff95afab7989
322 34:34:1E1A7589|1E1A7589|4616FF95AFAB7989
323 39:39:505050505|505050505|5050505050505050505
324 42:42:-505050505|-505050505|-5050505050505050505
325 21:21:      true|00000false
326 #endif
327