1 /* Copyright (C) 2001-2012 by George Williams */
2 /*
3  * Redistribution and use in source and binary forms, with or without
4  * modification, are permitted provided that the following conditions are met:
5 
6  * Redistributions of source code must retain the above copyright notice, this
7  * list of conditions and the following disclaimer.
8 
9  * Redistributions in binary form must reproduce the above copyright notice,
10  * this list of conditions and the following disclaimer in the documentation
11  * and/or other materials provided with the distribution.
12 
13  * The name of the author may not be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15 
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include "ustring.h"
28 #include "utype.h"
29 #include <stddef.h>
30 #include <stdarg.h>
31 
32 /* unicode printf. Expect arguments to be given using <num>$ notation */
33 /* But there's no way I'm going to implement all of printf now. I'll do what I */
34 /*  think is important and leave the rest for later. Maybe */
35 /* args begin with 1 */
36 
37 enum arg_type { at_int, at_double, at_ustr, at_astr, at_iptr };
38 
39 struct args {
40     unsigned int is_alt:1;			/* # flag */
41     unsigned int is_zeropad:1;		/* 0 flag */
42     unsigned int is_leftadj:1;		/* - flag */
43     unsigned int is_blank:1;		/* " " flag */
44     unsigned int is_signed:1;		/* + flag */
45     unsigned int is_thousand:1;		/* ' flag */
46     unsigned int is_short:1;		/* h */
47     unsigned int is_long:1; 		/* l */
48     unsigned int hasformat:1;		/* else it's a precision/fieldwidth */
49     char format;
50     int fieldwidth, precision;
51     enum arg_type arg_type;
52     long ival;
53     const unichar_t *uval;
54     double dval;
55 };
56 
57 struct state {
58     int argmax;
59     struct args *args;
60     unichar_t *opt, *end;
61     int cnt;
62 };
63 
64 #define addchar(state,ch) (++((state)->cnt),(state)->opt<(state)->end?*((state)->opt)++ = (ch): 0)
65 
isspec(int ch)66 static int isspec(int ch) {
67     char *str = "%npSscaAgGfFeEouxXdi";
68 
69     while ( *str && *str!=ch ) ++str;
70 return( *str==ch );
71 }
72 
padvalue(struct state * state,int arg,unichar_t * txt,int fieldwidth)73 static void padvalue(struct state *state,int arg,unichar_t *txt,int fieldwidth) {
74     int len=0, padc;
75 
76     padc = state->args[arg].is_zeropad?'0':' ';
77     if ( fieldwidth>0 ) {
78 	len = u_strlen(txt);
79 	if ( !state->args[arg].is_leftadj ) {
80 	    while ( len<fieldwidth ) {
81 		addchar(state,padc);
82 		++len;
83 	    }
84 	}
85     }
86     while ( *txt ) {
87 	addchar(state,*txt);
88 	++txt;
89     }
90     while ( len<fieldwidth ) {
91 	addchar(state,padc);
92 	++len;
93     }
94 }
95 
padstr(struct state * state,int arg,const unichar_t * txt,int fieldwidth,int precision)96 static void padstr(struct state *state,int arg,const unichar_t *txt,int fieldwidth, int precision) {
97     int len=0, padc,i;
98 
99     if ( fieldwidth>0 ) {
100 	len = precision>0?precision:u_strlen(txt);
101 	padc = state->args[arg].is_zeropad?'0':' ';
102 	if ( !state->args[arg].is_leftadj ) {
103 	    while ( len<fieldwidth ) {
104 		addchar(state,padc);
105 		++len;
106 	    }
107 	}
108     }
109     for ( i=0; *txt && (precision==0 || i<precision); ++i, ++txt )
110 	addchar(state,*txt);
111     while ( len<fieldwidth ) {
112 	addchar(state,padc);
113 	++len;
114     }
115 }
116 
formatarg(struct state * state,int arg)117 static void formatarg(struct state *state,int arg) {
118     static char *hex = "0123456789abcdef", *HEX="0123456789ABCDEF";
119     char *trans;
120     int radix, neg; unsigned long val;
121     unichar_t buf[20], *pt;
122     char cbuf[20];
123     int i, precision, fieldwidth;
124 
125     if ( arg<0 || arg>=state->argmax )
126 return;
127     if (( precision = state->args[arg].precision )<0 )
128 	precision = state->args[-state->args[arg].precision-1].ival;
129     if (( fieldwidth = state->args[arg].fieldwidth )<0 )
130 	fieldwidth = state->args[-state->args[arg].fieldwidth-1].ival;
131     if ( fieldwidth<0 ) {
132 	fieldwidth = -fieldwidth;
133 	state->args[arg].is_leftadj = true;
134     }
135 
136     switch ( state->args[arg].format ) {
137       case 'n':
138 	*((int *) (state->args[arg].uval)) = state->cnt;
139       break;
140       case 'c':
141         buf[0] = state->args[arg].ival;
142 	buf[1] = '\0';
143 	padvalue(state,arg,buf,fieldwidth);
144       break;
145       case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
146 	trans = state->args[arg].format=='X'?HEX:hex;
147 	pt = buf+sizeof(buf)/sizeof(buf[0])-1;
148 	*pt-- = '\0';
149 	neg = false;
150 	radix = state->args[arg].format=='d' || state->args[arg].format=='i' ||
151 		    state->args[arg].format=='u'?10:
152 		state->args[arg].format=='o'?8:16;
153 	val = state->args[arg].ival;
154 	if ( state->args[arg].ival<0 &&
155 		(state->args[arg].format=='d' || state->args[arg].format=='i')) {
156 	    neg = true;
157 	    val = -val;
158 	}
159 	for ( i=0; val!=0 || i<precision; ++i ) {
160 	    if ( radix==10 && state->args[arg].is_thousand && i!=0 && i%3==0 )
161 		*pt-- = ',';			/* !!!!! locale !!!!! */
162 	    *pt-- = trans[val%radix];
163 	    val /= radix;
164 	}
165 	if ( state->args[arg].is_alt ) {
166 	    if ( radix==8 && pt[1]!='0' )
167 		*pt-- = '0';
168 	    else if ( radix==16 && state->args[arg].ival!=0 ) {
169 		*pt-- = state->args[arg].format;
170 		*pt-- = '0';
171 	    }
172 	}
173 	if ( state->args[arg].format=='d' || state->args[arg].format=='i' ) {
174 	    if ( neg )
175 		*pt-- = '-';
176 	    else if ( state->args[arg].is_signed )
177 		*pt-- = '+';
178 	    else if ( state->args[arg].is_blank )
179 		*pt-- = ' ';
180 	}
181 	padvalue(state,arg,pt+1,fieldwidth);
182       break;
183       case 's':
184 	if ( state->args[arg].uval == NULL ) {
185 	    static unichar_t null[] = { '<','n','u','l','l','>', '\0' };
186 	    padstr(state,arg,null,fieldwidth,precision);
187 	} else if ( state->args[arg].is_short ) {
188 	    unichar_t *temp = def2u_copy((char *) (state->args[arg].uval));
189 	    padstr(state,arg,temp,fieldwidth,precision);
190 	    free(temp);
191 	} else
192 	    padstr(state,arg,state->args[arg].uval,fieldwidth,precision);
193       break;
194       case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case 'a': case 'A':
195 	/* This doesn't really do a good job!!!! */
196 	switch ( state->args[arg].format ) {
197 	  case 'e': case 'E':
198 	    sprintf(cbuf,"%e",state->args[arg].dval);
199 	  break;
200 	  case 'f': case 'F':
201 	    sprintf(cbuf,"%f",state->args[arg].dval);
202 	  break;
203 	  case 'g': case 'G':
204 	    sprintf(cbuf,"%g",state->args[arg].dval);
205 	  break;
206 	  case 'a': case 'A':
207 	    sprintf(cbuf,"%a",state->args[arg].dval);
208 	  break;
209         }
210 	uc_strcpy(buf,cbuf);
211 	padvalue(state,arg,buf,fieldwidth);
212       break;
213       /* a 'p' conversion is converted into the equivalent 'x' conversion earlier */
214     }
215 }
216 
u_vsnprintf(unichar_t * str,int len,const unichar_t * format,va_list ap)217 int u_vsnprintf(unichar_t *str, int len, const unichar_t *format, va_list ap ) {
218     struct state state;
219     struct args args[20], temp;
220     const unichar_t *pt;
221     int argmax = 0, arg, ac, val, hadarg;
222 
223     memset(&state,'\0',sizeof(state));
224     memset(args,'\0',sizeof(args));
225     ac = 0;
226     for ( pt=format; *pt;  ) {
227 	if ( *pt!='%' )
228 	    ++pt;
229 	else if ( pt[1]=='%' )
230 	    pt += 2;
231 	else {
232 	    for ( ++pt, arg=0; isdigit(*pt); ++pt )
233 		arg = 10*arg + tovalue(*pt);
234 	    ++ac;
235 	    if ( *pt=='$' ) {
236 		if ( arg>argmax ) argmax = arg;
237 	    } else {
238 		if ( ac>argmax ) argmax = ac;
239 	    }
240 	    while ( *pt && !isspec(*pt)) {
241 		if ( *pt=='*' ) {
242 		    ++ac;
243 		    ++pt;
244 		    for ( ++pt, arg=0; isdigit(*pt); ++pt )
245 			arg = 10*arg + tovalue(*pt);
246 		    if ( *pt=='$' ) {
247 			if ( arg>argmax ) argmax = arg;
248 		    } else {
249 			if ( ac>argmax ) argmax = ac;
250 		    }
251 		}
252 		++pt;
253 	    }
254 	}
255     }
256     state.argmax = argmax;
257     if ( argmax>sizeof(args)/sizeof(args[0]) )
258 	state.args = (struct args *) xcalloc(argmax,sizeof(struct args));
259     else
260 	state.args = args;
261     state.opt = str; state.end = str+len;
262 
263     ac = 1;
264     for ( pt=format; *pt; ) {
265 	if ( *pt!='%' )
266 	    ++pt;
267 	else if ( pt[1]=='%' )
268 	    pt+=2;
269 	else {
270 	    ++pt;
271 	    memset(&temp,'\0',sizeof(temp));
272 	    hadarg = 0;
273 	    if ( isdigit(*pt)) {
274 		for ( arg=0; isdigit(*pt); ++pt )
275 		    arg = 10*arg + tovalue(*pt);
276 		if ( *pt=='$' ) {
277 		    hadarg = true;
278 		    ++pt;
279 		} else
280 		    temp.fieldwidth = arg;
281 	    }
282 	    while ( 1 ) {
283 		if ( *pt=='#' ) temp.is_alt=true;
284 		else if ( *pt=='0' ) temp.is_zeropad=true;
285 		else if ( *pt=='-' ) temp.is_leftadj=true;
286 		else if ( *pt==' ' ) temp.is_blank=true;
287 		else if ( *pt=='+' ) temp.is_signed=true;
288 		else if ( *pt=='\'' ) temp.is_thousand=true;
289 		else
290 	    break;
291 		++pt;
292 	    }
293 	    if ( *pt=='*' ) {
294 		temp.fieldwidth = -ac++;
295 		for ( ++pt, val=0; isdigit(*pt); ++pt )
296 		    val = 10*val + tovalue(*pt);
297 		if ( *pt=='$' ) temp.fieldwidth = -val;
298 	    } else if ( isdigit(*pt)) {
299 		while ( isdigit(*pt)) {
300 		    temp.fieldwidth = 10*temp.fieldwidth + tovalue(*pt);
301 		    ++pt;
302 		}
303 	    }
304 	    temp.precision = 0x800000;
305 	    if ( *pt=='.' ) {
306 		++pt;
307 		if ( *pt=='*' ) {
308 		    temp.precision = -ac++;
309 		    for ( ++pt, val=0; isdigit(*pt); ++pt )
310 			val = 10*val + tovalue(*pt);
311 		    if ( *pt=='$' ) temp.precision = -val;
312 		} else if ( isdigit(*pt)) {
313 		    temp.precision = 0;
314 		    while ( isdigit(*pt)) {
315 			temp.precision = 10*temp.precision + tovalue(*pt);
316 			++pt;
317 		    }
318 		}
319 	    }
320 	    if ( *pt=='h' ) { temp.is_short=true; ++pt; }
321 	    else if ( *pt=='l' ) { temp.is_long=true; ++pt; }
322 	    if ( temp.fieldwidth<0 )
323 		state.args[-temp.fieldwidth-1].arg_type = at_int;
324 	    if ( temp.precision<0 )
325 		state.args[-temp.precision-1].arg_type = at_int;
326 	    temp.format = *pt++;
327 	    temp.hasformat = true;
328 	    if ( temp.format=='d' || temp.format=='i' || temp.format=='o' ||
329 		    temp.format=='u' || temp.format=='x' || temp.format=='X' ||
330 		    temp.format=='c' ) {
331 		temp.arg_type = at_int;
332 		if ( temp.precision == (int) 0x800000 ) temp.precision = 1;
333 	    } else if ( temp.format=='e' || temp.format=='E' || temp.format=='f' ||
334 		    temp.format=='F' || temp.format=='g' || temp.format=='G' ) {
335 		temp.arg_type = at_double;
336 		if ( temp.precision == (int) 0x800000 ) temp.precision = 6;
337 	    } else if ( temp.format=='a' || temp.format=='A' ) {
338 		    /* aA hex conversion of double */
339 		temp.arg_type = at_double;
340 		if ( temp.precision == (int) 0x800000 ) temp.precision = 2*sizeof(double)-2;
341 	    } else if ( temp.format=='s' && temp.is_short )
342 		temp.arg_type = at_astr;
343 	    else if ( temp.format=='s' )
344 		temp.arg_type = at_ustr;
345 	    else if ( temp.format=='p' ) {
346 		temp.arg_type = at_int;
347 		temp.format = 'x';
348 		temp.is_alt = true;
349 		if ( sizeof(int) < sizeof( void * ) )
350 		    temp.is_long = true;
351 	    } else if ( temp.format=='n' )
352 		temp.arg_type = at_iptr;
353 	    if ( !hadarg ) arg = ac;
354 	    ++ac;
355 	    state.args[arg-1] = temp;
356 	}
357     }
358 
359     /* Now read the args in order */
360     for ( arg=0; arg<argmax; ++arg ) {
361 	switch ( state.args[arg].arg_type ) {
362 	  case at_int:
363 	    if ( state.args[arg].is_long )
364 		state.args[arg].ival = va_arg(ap,long);
365 	    else
366 		state.args[arg].ival = va_arg(ap,int);
367 	  break;
368 	  case at_double:
369 	    state.args[arg].dval = va_arg(ap,double);
370 	  break;
371 	  case at_ustr:
372 	    state.args[arg].uval = va_arg(ap,unichar_t *);
373 	  break;
374 	  case at_astr:
375 	    state.args[arg].uval = (unichar_t *) va_arg(ap,char *);
376 	  break;
377 	  case at_iptr:
378 	    state.args[arg].uval = (unichar_t *) va_arg(ap,int *);
379 	  break;
380 	  default:
381 	    /* Shouldn't get here, if we do, skip one arg */
382 	    (void) va_arg(ap,int);
383 	  break;
384 	}
385     }
386 
387     ac = 1;
388     for ( pt=format; *pt; ) {
389 	if ( *pt!='%' ) {
390 	    addchar(&state,*pt);
391 	    ++pt;
392 	} else if ( pt[1]=='%' ) {
393 	    addchar(&state,'%');
394 	    pt+=2;
395 	} else {
396 	    for ( ++pt, arg=0; isdigit(*pt); ++pt )
397 		arg = 10*arg + tovalue(*pt);
398 	    if ( *pt!='$' ) {
399 		arg = ac;
400 		if ( !state.args[arg-1].hasformat ) ++arg;
401 		if ( !state.args[arg-1].hasformat ) ++arg;
402 	    }
403 	    if ( state.args[arg-1].fieldwidth<0 ) ++ac;
404 	    if ( state.args[arg-1].precision<0 && state.args[arg-1].precision!= (int) 0x800000)
405 		++ac;
406 	    ++ac;
407 	    while ( *pt && !isspec(*pt)) ++pt;
408 	    formatarg(&state,arg-1);
409 	    ++pt;
410 	}
411     }
412     addchar(&state,'\0');
413     if ( state.args!=args ) free(state.args);
414 return( state.cnt-1 );		/* don't include trailing nul */
415 }
416 
u_snprintf(unichar_t * str,int len,const unichar_t * format,...)417 int u_snprintf(unichar_t *str, int len, const unichar_t *format, ... ) {
418     va_list ap;
419     int ret;
420 
421     va_start(ap,format);
422     ret = u_vsnprintf(str,len,format,ap);
423     va_end(ap);
424 return( ret );
425 }
426 
u_sprintf(unichar_t * str,const unichar_t * format,...)427 int u_sprintf(unichar_t *str, const unichar_t *format, ... ) {
428     va_list ap;
429     int ret;
430 
431     va_start(ap,format);
432     ret = u_vsnprintf(str,0x10000,format,ap);
433     va_end(ap);
434 return( ret );
435 }
436