1 /*
2 ** String formatting.
3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4 */
5 
6 #include <stdio.h>
7 
8 #define lj_strfmt_c
9 #define LUA_CORE
10 
11 #include "lj_obj.h"
12 #include "lj_buf.h"
13 #include "lj_str.h"
14 #include "lj_state.h"
15 #include "lj_char.h"
16 #include "lj_strfmt.h"
17 
18 /* -- Format parser ------------------------------------------------------- */
19 
20 static const uint8_t strfmt_map[('x'-'A')+1] = {
21   STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
22   0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
23   0,0,0,0,0,0,
24   STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
25   0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
26 };
27 
lj_strfmt_parse(FormatState * fs)28 SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
29 {
30   const uint8_t *p = fs->p, *e = fs->e;
31   fs->str = (const char *)p;
32   for (; p < e; p++) {
33     if (*p == '%') {  /* Escape char? */
34       if (p[1] == '%') {  /* '%%'? */
35 	fs->p = ++p+1;
36 	goto retlit;
37       } else {
38 	SFormat sf = 0;
39 	uint32_t c;
40 	if (p != (const uint8_t *)fs->str)
41 	  break;
42 	for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
43 	  /* Parse flags. */
44 	  if (*p == '-') sf |= STRFMT_F_LEFT;
45 	  else if (*p == '+') sf |= STRFMT_F_PLUS;
46 	  else if (*p == '0') sf |= STRFMT_F_ZERO;
47 	  else if (*p == ' ') sf |= STRFMT_F_SPACE;
48 	  else if (*p == '#') sf |= STRFMT_F_ALT;
49 	  else break;
50 	}
51 	if ((uint32_t)*p - '0' < 10) {  /* Parse width. */
52 	  uint32_t width = (uint32_t)*p++ - '0';
53 	  if ((uint32_t)*p - '0' < 10)
54 	    width = (uint32_t)*p++ - '0' + width*10;
55 	  sf |= (width << STRFMT_SH_WIDTH);
56 	}
57 	if (*p == '.') {  /* Parse precision. */
58 	  uint32_t prec = 0;
59 	  p++;
60 	  if ((uint32_t)*p - '0' < 10) {
61 	    prec = (uint32_t)*p++ - '0';
62 	    if ((uint32_t)*p - '0' < 10)
63 	      prec = (uint32_t)*p++ - '0' + prec*10;
64 	  }
65 	  sf |= ((prec+1) << STRFMT_SH_PREC);
66 	}
67 	/* Parse conversion. */
68 	c = (uint32_t)*p - 'A';
69 	if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
70 	  uint32_t sx = strfmt_map[c];
71 	  if (sx) {
72 	    fs->p = p+1;
73 	    return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
74 	  }
75 	}
76 	/* Return error location. */
77 	if (*p >= 32) p++;
78 	fs->len = (MSize)(p - (const uint8_t *)fs->str);
79 	fs->p = fs->e;
80 	return STRFMT_ERR;
81       }
82     }
83   }
84   fs->p = p;
85 retlit:
86   fs->len = (MSize)(p - (const uint8_t *)fs->str);
87   return fs->len ? STRFMT_LIT : STRFMT_EOF;
88 }
89 
90 /* -- Raw conversions ----------------------------------------------------- */
91 
92 #define WINT_R(x, sh, sc) \
93   { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
94 
95 /* Write integer to buffer. */
lj_strfmt_wint(char * p,int32_t k)96 char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
97 {
98   uint32_t u = (uint32_t)k;
99   if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
100   if (u < 10000) {
101     if (u < 10) goto dig1;
102     if (u < 100) goto dig2;
103     if (u < 1000) goto dig3;
104   } else {
105     uint32_t v = u / 10000; u -= v * 10000;
106     if (v < 10000) {
107       if (v < 10) goto dig5;
108       if (v < 100) goto dig6;
109       if (v < 1000) goto dig7;
110     } else {
111       uint32_t w = v / 10000; v -= w * 10000;
112       if (w >= 10) WINT_R(w, 10, 10)
113       *p++ = (char)('0'+w);
114     }
115     WINT_R(v, 23, 1000)
116     dig7: WINT_R(v, 12, 100)
117     dig6: WINT_R(v, 10, 10)
118     dig5: *p++ = (char)('0'+v);
119   }
120   WINT_R(u, 23, 1000)
121   dig3: WINT_R(u, 12, 100)
122   dig2: WINT_R(u, 10, 10)
123   dig1: *p++ = (char)('0'+u);
124   return p;
125 }
126 #undef WINT_R
127 
128 /* Write pointer to buffer. */
lj_strfmt_wptr(char * p,const void * v)129 char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
130 {
131   ptrdiff_t x = (ptrdiff_t)v;
132   MSize i, n = STRFMT_MAXBUF_PTR;
133   if (x == 0) {
134     *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
135     return p;
136   }
137 #if LJ_64
138   /* Shorten output for 64 bit pointers. */
139   n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
140 #endif
141   p[0] = '0';
142   p[1] = 'x';
143   for (i = n-1; i >= 2; i--, x >>= 4)
144     p[i] = "0123456789abcdef"[(x & 15)];
145   return p+n;
146 }
147 
148 /* Write ULEB128 to buffer. */
lj_strfmt_wuleb128(char * p,uint32_t v)149 char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
150 {
151   for (; v >= 0x80; v >>= 7)
152     *p++ = (char)((v & 0x7f) | 0x80);
153   *p++ = (char)v;
154   return p;
155 }
156 
157 /* Return string or write number to tmp buffer and return pointer to start. */
lj_strfmt_wstrnum(lua_State * L,cTValue * o,MSize * lenp)158 const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
159 {
160   SBuf *sb;
161   if (tvisstr(o)) {
162     *lenp = strV(o)->len;
163     return strVdata(o);
164   } else if (tvisint(o)) {
165     sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
166   } else if (tvisnum(o)) {
167     sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
168   } else {
169     return NULL;
170   }
171   *lenp = sbuflen(sb);
172   return sbufB(sb);
173 }
174 
175 /* -- Unformatted conversions to buffer ----------------------------------- */
176 
177 /* Add integer to buffer. */
lj_strfmt_putint(SBuf * sb,int32_t k)178 SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
179 {
180   setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
181   return sb;
182 }
183 
184 #if LJ_HASJIT
185 /* Add number to buffer. */
lj_strfmt_putnum(SBuf * sb,cTValue * o)186 SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
187 {
188   return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
189 }
190 #endif
191 
lj_strfmt_putptr(SBuf * sb,const void * v)192 SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
193 {
194   setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
195   return sb;
196 }
197 
198 /* Add quoted string to buffer. */
lj_strfmt_putquoted(SBuf * sb,GCstr * str)199 SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
200 {
201   const char *s = strdata(str);
202   MSize len = str->len;
203   lj_buf_putb(sb, '"');
204   while (len--) {
205     uint32_t c = (uint32_t)(uint8_t)*s++;
206     char *p = lj_buf_more(sb, 4);
207     if (c == '"' || c == '\\' || c == '\n') {
208       *p++ = '\\';
209     } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
210       uint32_t d;
211       *p++ = '\\';
212       if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
213 	*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
214 	goto tens;
215       } else if (c >= 10) {
216       tens:
217 	d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
218       }
219       c += '0';
220     }
221     *p++ = (char)c;
222     setsbufP(sb, p);
223   }
224   lj_buf_putb(sb, '"');
225   return sb;
226 }
227 
228 /* -- Formatted conversions to buffer ------------------------------------- */
229 
230 /* Add formatted char to buffer. */
lj_strfmt_putfchar(SBuf * sb,SFormat sf,int32_t c)231 SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
232 {
233   MSize width = STRFMT_WIDTH(sf);
234   char *p = lj_buf_more(sb, width > 1 ? width : 1);
235   if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
236   while (width-- > 1) *p++ = ' ';
237   if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
238   setsbufP(sb, p);
239   return sb;
240 }
241 
242 /* Add formatted string to buffer. */
lj_strfmt_putfstr(SBuf * sb,SFormat sf,GCstr * str)243 SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
244 {
245   MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
246   MSize width = STRFMT_WIDTH(sf);
247   char *p = lj_buf_more(sb, width > len ? width : len);
248   if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
249   while (width-- > len) *p++ = ' ';
250   if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
251   setsbufP(sb, p);
252   return sb;
253 }
254 
255 /* Add formatted signed/unsigned integer to buffer. */
lj_strfmt_putfxint(SBuf * sb,SFormat sf,uint64_t k)256 SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
257 {
258   char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
259 #ifdef LUA_USE_ASSERT
260   char *ps;
261 #endif
262   MSize prefix = 0, len, prec, pprec, width, need;
263 
264   /* Figure out signed prefixes. */
265   if (STRFMT_TYPE(sf) == STRFMT_INT) {
266     if ((int64_t)k < 0) {
267       k = (uint64_t)-(int64_t)k;
268       prefix = 256 + '-';
269     } else if ((sf & STRFMT_F_PLUS)) {
270       prefix = 256 + '+';
271     } else if ((sf & STRFMT_F_SPACE)) {
272       prefix = 256 + ' ';
273     }
274   }
275 
276   /* Convert number and store to fixed-size buffer in reverse order. */
277   prec = STRFMT_PREC(sf);
278   if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
279   if (k == 0) {  /* Special-case zero argument. */
280     if (prec != 0 ||
281 	(sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
282       *--q = '0';
283   } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) {  /* Decimal. */
284     uint32_t k2;
285     while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
286     k2 = (uint32_t)k;
287     do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
288   } else if ((sf & STRFMT_T_HEX)) {  /* Hex. */
289     const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
290 						 "0123456789abcdef";
291     do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
292     if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
293   } else {  /* Octal. */
294     do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
295     if ((sf & STRFMT_F_ALT)) *--q = '0';
296   }
297 
298   /* Calculate sizes. */
299   len = (MSize)(buf + sizeof(buf) - q);
300   if ((int32_t)len >= (int32_t)prec) prec = len;
301   width = STRFMT_WIDTH(sf);
302   pprec = prec + (prefix >> 8);
303   need = width > pprec ? width : pprec;
304   p = lj_buf_more(sb, need);
305 #ifdef LUA_USE_ASSERT
306   ps = p;
307 #endif
308 
309   /* Format number with leading/trailing whitespace and zeros. */
310   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
311     while (width-- > pprec) *p++ = ' ';
312   if (prefix) {
313     if ((char)prefix >= 'X') *p++ = '0';
314     *p++ = (char)prefix;
315   }
316   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
317     while (width-- > pprec) *p++ = '0';
318   while (prec-- > len) *p++ = '0';
319   while (q < buf + sizeof(buf)) *p++ = *q++;  /* Add number itself. */
320   if ((sf & STRFMT_F_LEFT))
321     while (width-- > pprec) *p++ = ' ';
322 
323   lua_assert(need == (MSize)(p - ps));
324   setsbufP(sb, p);
325   return sb;
326 }
327 
328 /* Add number formatted as signed integer to buffer. */
lj_strfmt_putfnum_int(SBuf * sb,SFormat sf,lua_Number n)329 SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
330 {
331   int64_t k = (int64_t)n;
332   if (checki32(k) && sf == STRFMT_INT)
333     return lj_strfmt_putint(sb, (int32_t)k);  /* Shortcut for plain %d. */
334   else
335     return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
336 }
337 
338 /* Add number formatted as unsigned integer to buffer. */
lj_strfmt_putfnum_uint(SBuf * sb,SFormat sf,lua_Number n)339 SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
340 {
341   int64_t k;
342   if (n >= 9223372036854775808.0)
343     k = (int64_t)(n - 18446744073709551616.0);
344   else
345     k = (int64_t)n;
346   return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
347 }
348 
349 /* -- Conversions to strings ---------------------------------------------- */
350 
351 /* Convert integer to string. */
lj_strfmt_int(lua_State * L,int32_t k)352 GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
353 {
354   char buf[STRFMT_MAXBUF_INT];
355   MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
356   return lj_str_new(L, buf, len);
357 }
358 
359 /* Convert integer or number to string. */
lj_strfmt_number(lua_State * L,cTValue * o)360 GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
361 {
362   return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
363 }
364 
365 #if LJ_HASJIT
366 /* Convert char value to string. */
lj_strfmt_char(lua_State * L,int c)367 GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
368 {
369   char buf[1];
370   buf[0] = c;
371   return lj_str_new(L, buf, 1);
372 }
373 #endif
374 
375 /* Raw conversion of object to string. */
lj_strfmt_obj(lua_State * L,cTValue * o)376 GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
377 {
378   if (tvisstr(o)) {
379     return strV(o);
380   } else if (tvisnumber(o)) {
381     return lj_strfmt_number(L, o);
382   } else if (tvisnil(o)) {
383     return lj_str_newlit(L, "nil");
384   } else if (tvisfalse(o)) {
385     return lj_str_newlit(L, "false");
386   } else if (tvistrue(o)) {
387     return lj_str_newlit(L, "true");
388   } else {
389     char buf[8+2+2+16], *p = buf;
390     p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
391     *p++ = ':'; *p++ = ' ';
392     if (tvisfunc(o) && isffunc(funcV(o))) {
393       p = lj_buf_wmem(p, "builtin#", 8);
394       p = lj_strfmt_wint(p, funcV(o)->c.ffid);
395     } else {
396       p = lj_strfmt_wptr(p, lj_obj_ptr(o));
397     }
398     return lj_str_new(L, buf, (size_t)(p - buf));
399   }
400 }
401 
402 /* -- Internal string formatting ------------------------------------------ */
403 
404 /*
405 ** These functions are only used for lua_pushfstring(), lua_pushvfstring()
406 ** and for internal string formatting (e.g. error messages). Caveat: unlike
407 ** string.format(), only a limited subset of formats and flags are supported!
408 **
409 ** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
410 ** - %d %u %o %x with full formatting, 32 bit integers only.
411 ** - %f and other FP formats are really %.14g.
412 ** - %s %c %p without formatting.
413 */
414 
415 /* Push formatted message as a string object to Lua stack. va_list variant. */
lj_strfmt_pushvf(lua_State * L,const char * fmt,va_list argp)416 const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
417 {
418   SBuf *sb = lj_buf_tmp_(L);
419   FormatState fs;
420   SFormat sf;
421   GCstr *str;
422   lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
423   while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
424     switch (STRFMT_TYPE(sf)) {
425     case STRFMT_LIT:
426       lj_buf_putmem(sb, fs.str, fs.len);
427       break;
428     case STRFMT_INT:
429       lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
430       break;
431     case STRFMT_UINT:
432       lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
433       break;
434     case STRFMT_NUM:
435       lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
436       break;
437     case STRFMT_STR: {
438       const char *s = va_arg(argp, char *);
439       if (s == NULL) s = "(null)";
440       lj_buf_putmem(sb, s, (MSize)strlen(s));
441       break;
442       }
443     case STRFMT_CHAR:
444       lj_buf_putb(sb, va_arg(argp, int));
445       break;
446     case STRFMT_PTR:
447       lj_strfmt_putptr(sb, va_arg(argp, void *));
448       break;
449     case STRFMT_ERR:
450     default:
451       lj_buf_putb(sb, '?');
452       lua_assert(0);
453       break;
454     }
455   }
456   str = lj_buf_str(L, sb);
457   setstrV(L, L->top, str);
458   incr_top(L);
459   return strdata(str);
460 }
461 
462 /* Push formatted message as a string object to Lua stack. Vararg variant. */
lj_strfmt_pushf(lua_State * L,const char * fmt,...)463 const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
464 {
465   const char *msg;
466   va_list argp;
467   va_start(argp, fmt);
468   msg = lj_strfmt_pushvf(L, fmt, argp);
469   va_end(argp);
470   return msg;
471 }
472 
473