1 /* Copyright (C) 2001-2012 by George Williams */
2 /*
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions are met:
5
6 * Redistributions of source code must retain the above copyright notice, this
7 * list of conditions and the following disclaimer.
8
9 * Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
12
13 * The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #include "ustring.h"
28 #include "utype.h"
29 #include <stddef.h>
30 #include <stdarg.h>
31
32 /* unicode printf. Expect arguments to be given using <num>$ notation */
33 /* But there's no way I'm going to implement all of printf now. I'll do what I */
34 /* think is important and leave the rest for later. Maybe */
35 /* args begin with 1 */
36
37 enum arg_type { at_int, at_double, at_ustr, at_astr, at_iptr };
38
39 struct args {
40 unsigned int is_alt:1; /* # flag */
41 unsigned int is_zeropad:1; /* 0 flag */
42 unsigned int is_leftadj:1; /* - flag */
43 unsigned int is_blank:1; /* " " flag */
44 unsigned int is_signed:1; /* + flag */
45 unsigned int is_thousand:1; /* ' flag */
46 unsigned int is_short:1; /* h */
47 unsigned int is_long:1; /* l */
48 unsigned int hasformat:1; /* else it's a precision/fieldwidth */
49 char format;
50 int fieldwidth, precision;
51 enum arg_type arg_type;
52 long ival;
53 const unichar_t *uval;
54 double dval;
55 };
56
57 struct state {
58 int argmax;
59 struct args *args;
60 unichar_t *opt, *end;
61 int cnt;
62 };
63
64 #define addchar(state,ch) (++((state)->cnt),(state)->opt<(state)->end?*((state)->opt)++ = (ch): 0)
65
isspec(int ch)66 static int isspec(int ch) {
67 char *str = "%npSscaAgGfFeEouxXdi";
68
69 while ( *str && *str!=ch ) ++str;
70 return( *str==ch );
71 }
72
padvalue(struct state * state,int arg,unichar_t * txt,int fieldwidth)73 static void padvalue(struct state *state,int arg,unichar_t *txt,int fieldwidth) {
74 int len=0, padc;
75
76 padc = state->args[arg].is_zeropad?'0':' ';
77 if ( fieldwidth>0 ) {
78 len = u_strlen(txt);
79 if ( !state->args[arg].is_leftadj ) {
80 while ( len<fieldwidth ) {
81 addchar(state,padc);
82 ++len;
83 }
84 }
85 }
86 while ( *txt ) {
87 addchar(state,*txt);
88 ++txt;
89 }
90 while ( len<fieldwidth ) {
91 addchar(state,padc);
92 ++len;
93 }
94 }
95
padstr(struct state * state,int arg,const unichar_t * txt,int fieldwidth,int precision)96 static void padstr(struct state *state,int arg,const unichar_t *txt,int fieldwidth, int precision) {
97 int len=0, padc,i;
98
99 if ( fieldwidth>0 ) {
100 len = precision>0?precision:u_strlen(txt);
101 padc = state->args[arg].is_zeropad?'0':' ';
102 if ( !state->args[arg].is_leftadj ) {
103 while ( len<fieldwidth ) {
104 addchar(state,padc);
105 ++len;
106 }
107 }
108 }
109 for ( i=0; *txt && (precision==0 || i<precision); ++i, ++txt )
110 addchar(state,*txt);
111 while ( len<fieldwidth ) {
112 addchar(state,padc);
113 ++len;
114 }
115 }
116
formatarg(struct state * state,int arg)117 static void formatarg(struct state *state,int arg) {
118 static char *hex = "0123456789abcdef", *HEX="0123456789ABCDEF";
119 char *trans;
120 int radix, neg; unsigned long val;
121 unichar_t buf[20], *pt;
122 char cbuf[20];
123 int i, precision, fieldwidth;
124
125 if ( arg<0 || arg>=state->argmax )
126 return;
127 if (( precision = state->args[arg].precision )<0 )
128 precision = state->args[-state->args[arg].precision-1].ival;
129 if (( fieldwidth = state->args[arg].fieldwidth )<0 )
130 fieldwidth = state->args[-state->args[arg].fieldwidth-1].ival;
131 if ( fieldwidth<0 ) {
132 fieldwidth = -fieldwidth;
133 state->args[arg].is_leftadj = true;
134 }
135
136 switch ( state->args[arg].format ) {
137 case 'n':
138 *((int *) (state->args[arg].uval)) = state->cnt;
139 break;
140 case 'c':
141 buf[0] = state->args[arg].ival;
142 buf[1] = '\0';
143 padvalue(state,arg,buf,fieldwidth);
144 break;
145 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
146 trans = state->args[arg].format=='X'?HEX:hex;
147 pt = buf+sizeof(buf)/sizeof(buf[0])-1;
148 *pt-- = '\0';
149 neg = false;
150 radix = state->args[arg].format=='d' || state->args[arg].format=='i' ||
151 state->args[arg].format=='u'?10:
152 state->args[arg].format=='o'?8:16;
153 val = state->args[arg].ival;
154 if ( state->args[arg].ival<0 &&
155 (state->args[arg].format=='d' || state->args[arg].format=='i')) {
156 neg = true;
157 val = -val;
158 }
159 for ( i=0; val!=0 || i<precision; ++i ) {
160 if ( radix==10 && state->args[arg].is_thousand && i!=0 && i%3==0 )
161 *pt-- = ','; /* !!!!! locale !!!!! */
162 *pt-- = trans[val%radix];
163 val /= radix;
164 }
165 if ( state->args[arg].is_alt ) {
166 if ( radix==8 && pt[1]!='0' )
167 *pt-- = '0';
168 else if ( radix==16 && state->args[arg].ival!=0 ) {
169 *pt-- = state->args[arg].format;
170 *pt-- = '0';
171 }
172 }
173 if ( state->args[arg].format=='d' || state->args[arg].format=='i' ) {
174 if ( neg )
175 *pt-- = '-';
176 else if ( state->args[arg].is_signed )
177 *pt-- = '+';
178 else if ( state->args[arg].is_blank )
179 *pt-- = ' ';
180 }
181 padvalue(state,arg,pt+1,fieldwidth);
182 break;
183 case 's':
184 if ( state->args[arg].uval == NULL ) {
185 static unichar_t null[] = { '<','n','u','l','l','>', '\0' };
186 padstr(state,arg,null,fieldwidth,precision);
187 } else if ( state->args[arg].is_short ) {
188 unichar_t *temp = def2u_copy((char *) (state->args[arg].uval));
189 padstr(state,arg,temp,fieldwidth,precision);
190 free(temp);
191 } else
192 padstr(state,arg,state->args[arg].uval,fieldwidth,precision);
193 break;
194 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case 'a': case 'A':
195 /* This doesn't really do a good job!!!! */
196 switch ( state->args[arg].format ) {
197 case 'e': case 'E':
198 sprintf(cbuf,"%e",state->args[arg].dval);
199 break;
200 case 'f': case 'F':
201 sprintf(cbuf,"%f",state->args[arg].dval);
202 break;
203 case 'g': case 'G':
204 sprintf(cbuf,"%g",state->args[arg].dval);
205 break;
206 case 'a': case 'A':
207 sprintf(cbuf,"%a",state->args[arg].dval);
208 break;
209 }
210 uc_strcpy(buf,cbuf);
211 padvalue(state,arg,buf,fieldwidth);
212 break;
213 /* a 'p' conversion is converted into the equivalent 'x' conversion earlier */
214 }
215 }
216
u_vsnprintf(unichar_t * str,int len,const unichar_t * format,va_list ap)217 int u_vsnprintf(unichar_t *str, int len, const unichar_t *format, va_list ap ) {
218 struct state state;
219 struct args args[20], temp;
220 const unichar_t *pt;
221 int argmax = 0, arg, ac, val, hadarg;
222
223 memset(&state,'\0',sizeof(state));
224 memset(args,'\0',sizeof(args));
225 ac = 0;
226 for ( pt=format; *pt; ) {
227 if ( *pt!='%' )
228 ++pt;
229 else if ( pt[1]=='%' )
230 pt += 2;
231 else {
232 for ( ++pt, arg=0; isdigit(*pt); ++pt )
233 arg = 10*arg + tovalue(*pt);
234 ++ac;
235 if ( *pt=='$' ) {
236 if ( arg>argmax ) argmax = arg;
237 } else {
238 if ( ac>argmax ) argmax = ac;
239 }
240 while ( *pt && !isspec(*pt)) {
241 if ( *pt=='*' ) {
242 ++ac;
243 ++pt;
244 for ( ++pt, arg=0; isdigit(*pt); ++pt )
245 arg = 10*arg + tovalue(*pt);
246 if ( *pt=='$' ) {
247 if ( arg>argmax ) argmax = arg;
248 } else {
249 if ( ac>argmax ) argmax = ac;
250 }
251 }
252 ++pt;
253 }
254 }
255 }
256 state.argmax = argmax;
257 if ( argmax>sizeof(args)/sizeof(args[0]) )
258 state.args = (struct args *) xcalloc(argmax,sizeof(struct args));
259 else
260 state.args = args;
261 state.opt = str; state.end = str+len;
262
263 ac = 1;
264 for ( pt=format; *pt; ) {
265 if ( *pt!='%' )
266 ++pt;
267 else if ( pt[1]=='%' )
268 pt+=2;
269 else {
270 ++pt;
271 memset(&temp,'\0',sizeof(temp));
272 hadarg = 0;
273 if ( isdigit(*pt)) {
274 for ( arg=0; isdigit(*pt); ++pt )
275 arg = 10*arg + tovalue(*pt);
276 if ( *pt=='$' ) {
277 hadarg = true;
278 ++pt;
279 } else
280 temp.fieldwidth = arg;
281 }
282 while ( 1 ) {
283 if ( *pt=='#' ) temp.is_alt=true;
284 else if ( *pt=='0' ) temp.is_zeropad=true;
285 else if ( *pt=='-' ) temp.is_leftadj=true;
286 else if ( *pt==' ' ) temp.is_blank=true;
287 else if ( *pt=='+' ) temp.is_signed=true;
288 else if ( *pt=='\'' ) temp.is_thousand=true;
289 else
290 break;
291 ++pt;
292 }
293 if ( *pt=='*' ) {
294 temp.fieldwidth = -ac++;
295 for ( ++pt, val=0; isdigit(*pt); ++pt )
296 val = 10*val + tovalue(*pt);
297 if ( *pt=='$' ) temp.fieldwidth = -val;
298 } else if ( isdigit(*pt)) {
299 while ( isdigit(*pt)) {
300 temp.fieldwidth = 10*temp.fieldwidth + tovalue(*pt);
301 ++pt;
302 }
303 }
304 temp.precision = 0x800000;
305 if ( *pt=='.' ) {
306 ++pt;
307 if ( *pt=='*' ) {
308 temp.precision = -ac++;
309 for ( ++pt, val=0; isdigit(*pt); ++pt )
310 val = 10*val + tovalue(*pt);
311 if ( *pt=='$' ) temp.precision = -val;
312 } else if ( isdigit(*pt)) {
313 temp.precision = 0;
314 while ( isdigit(*pt)) {
315 temp.precision = 10*temp.precision + tovalue(*pt);
316 ++pt;
317 }
318 }
319 }
320 if ( *pt=='h' ) { temp.is_short=true; ++pt; }
321 else if ( *pt=='l' ) { temp.is_long=true; ++pt; }
322 if ( temp.fieldwidth<0 )
323 state.args[-temp.fieldwidth-1].arg_type = at_int;
324 if ( temp.precision<0 )
325 state.args[-temp.precision-1].arg_type = at_int;
326 temp.format = *pt++;
327 temp.hasformat = true;
328 if ( temp.format=='d' || temp.format=='i' || temp.format=='o' ||
329 temp.format=='u' || temp.format=='x' || temp.format=='X' ||
330 temp.format=='c' ) {
331 temp.arg_type = at_int;
332 if ( temp.precision == (int) 0x800000 ) temp.precision = 1;
333 } else if ( temp.format=='e' || temp.format=='E' || temp.format=='f' ||
334 temp.format=='F' || temp.format=='g' || temp.format=='G' ) {
335 temp.arg_type = at_double;
336 if ( temp.precision == (int) 0x800000 ) temp.precision = 6;
337 } else if ( temp.format=='a' || temp.format=='A' ) {
338 /* aA hex conversion of double */
339 temp.arg_type = at_double;
340 if ( temp.precision == (int) 0x800000 ) temp.precision = 2*sizeof(double)-2;
341 } else if ( temp.format=='s' && temp.is_short )
342 temp.arg_type = at_astr;
343 else if ( temp.format=='s' )
344 temp.arg_type = at_ustr;
345 else if ( temp.format=='p' ) {
346 temp.arg_type = at_int;
347 temp.format = 'x';
348 temp.is_alt = true;
349 if ( sizeof(int) < sizeof( void * ) )
350 temp.is_long = true;
351 } else if ( temp.format=='n' )
352 temp.arg_type = at_iptr;
353 if ( !hadarg ) arg = ac;
354 ++ac;
355 state.args[arg-1] = temp;
356 }
357 }
358
359 /* Now read the args in order */
360 for ( arg=0; arg<argmax; ++arg ) {
361 switch ( state.args[arg].arg_type ) {
362 case at_int:
363 if ( state.args[arg].is_long )
364 state.args[arg].ival = va_arg(ap,long);
365 else
366 state.args[arg].ival = va_arg(ap,int);
367 break;
368 case at_double:
369 state.args[arg].dval = va_arg(ap,double);
370 break;
371 case at_ustr:
372 state.args[arg].uval = va_arg(ap,unichar_t *);
373 break;
374 case at_astr:
375 state.args[arg].uval = (unichar_t *) va_arg(ap,char *);
376 break;
377 case at_iptr:
378 state.args[arg].uval = (unichar_t *) va_arg(ap,int *);
379 break;
380 default:
381 /* Shouldn't get here, if we do, skip one arg */
382 (void) va_arg(ap,int);
383 break;
384 }
385 }
386
387 ac = 1;
388 for ( pt=format; *pt; ) {
389 if ( *pt!='%' ) {
390 addchar(&state,*pt);
391 ++pt;
392 } else if ( pt[1]=='%' ) {
393 addchar(&state,'%');
394 pt+=2;
395 } else {
396 for ( ++pt, arg=0; isdigit(*pt); ++pt )
397 arg = 10*arg + tovalue(*pt);
398 if ( *pt!='$' ) {
399 arg = ac;
400 if ( !state.args[arg-1].hasformat ) ++arg;
401 if ( !state.args[arg-1].hasformat ) ++arg;
402 }
403 if ( state.args[arg-1].fieldwidth<0 ) ++ac;
404 if ( state.args[arg-1].precision<0 && state.args[arg-1].precision!= (int) 0x800000)
405 ++ac;
406 ++ac;
407 while ( *pt && !isspec(*pt)) ++pt;
408 formatarg(&state,arg-1);
409 ++pt;
410 }
411 }
412 addchar(&state,'\0');
413 if ( state.args!=args ) free(state.args);
414 return( state.cnt-1 ); /* don't include trailing nul */
415 }
416
u_snprintf(unichar_t * str,int len,const unichar_t * format,...)417 int u_snprintf(unichar_t *str, int len, const unichar_t *format, ... ) {
418 va_list ap;
419 int ret;
420
421 va_start(ap,format);
422 ret = u_vsnprintf(str,len,format,ap);
423 va_end(ap);
424 return( ret );
425 }
426
u_sprintf(unichar_t * str,const unichar_t * format,...)427 int u_sprintf(unichar_t *str, const unichar_t *format, ... ) {
428 va_list ap;
429 int ret;
430
431 va_start(ap,format);
432 ret = u_vsnprintf(str,0x10000,format,ap);
433 va_end(ap);
434 return( ret );
435 }
436