1 /*
2 ** Bytecode reader.
3 ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4 */
5 
6 #define lj_bcread_c
7 #define LUA_CORE
8 
9 #include "lj_obj.h"
10 #include "lj_gc.h"
11 #include "lj_err.h"
12 #include "lj_buf.h"
13 #include "lj_str.h"
14 #include "lj_tab.h"
15 #include "lj_bc.h"
16 #if LJ_HASFFI
17 #include "lj_ctype.h"
18 #include "lj_cdata.h"
19 #include "lualib.h"
20 #endif
21 #include "lj_lex.h"
22 #include "lj_bcdump.h"
23 #include "lj_state.h"
24 #include "lj_strfmt.h"
25 
26 /* Reuse some lexer fields for our own purposes. */
27 #define bcread_flags(ls)	ls->level
28 #define bcread_swap(ls) \
29   ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE)
30 #define bcread_oldtop(L, ls)	restorestack(L, ls->lastline)
31 #define bcread_savetop(L, ls, top) \
32   ls->lastline = (BCLine)savestack(L, (top))
33 
34 /* -- Input buffer handling ----------------------------------------------- */
35 
36 /* Throw reader error. */
bcread_error(LexState * ls,ErrMsg em)37 static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38 {
39   lua_State *L = ls->L;
40   const char *name = ls->chunkarg;
41   if (*name == BCDUMP_HEAD1) name = "(binary)";
42   else if (*name == '@' || *name == '=') name++;
43   lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
44   lj_err_throw(L, LUA_ERRSYNTAX);
45 }
46 
47 /* Refill buffer. */
bcread_fill(LexState * ls,MSize len,int need)48 static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
49 {
50   lj_assertLS(len != 0, "empty refill");
51   if (len > LJ_MAX_BUF || ls->c < 0)
52     bcread_error(ls, LJ_ERR_BCBAD);
53   do {
54     const char *buf;
55     size_t sz;
56     char *p = ls->sb.b;
57     MSize n = (MSize)(ls->pe - ls->p);
58     if (n) {  /* Copy remainder to buffer. */
59       if (sbuflen(&ls->sb)) {  /* Move down in buffer. */
60 	lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
61 	if (ls->p != p) memmove(p, ls->p, n);
62       } else {  /* Copy from buffer provided by reader. */
63 	p = lj_buf_need(&ls->sb, len);
64 	memcpy(p, ls->p, n);
65       }
66       ls->p = p;
67       ls->pe = p + n;
68     }
69     ls->sb.w = p + n;
70     buf = ls->rfunc(ls->L, ls->rdata, &sz);  /* Get more data from reader. */
71     if (buf == NULL || sz == 0) {  /* EOF? */
72       if (need) bcread_error(ls, LJ_ERR_BCBAD);
73       ls->c = -1;  /* Only bad if we get called again. */
74       break;
75     }
76     if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
77     if (n) {  /* Append to buffer. */
78       n += (MSize)sz;
79       p = lj_buf_need(&ls->sb, n < len ? len : n);
80       memcpy(ls->sb.w, buf, sz);
81       ls->sb.w = p + n;
82       ls->p = p;
83       ls->pe = p + n;
84     } else {  /* Return buffer provided by reader. */
85       ls->p = buf;
86       ls->pe = buf + sz;
87     }
88   } while ((MSize)(ls->pe - ls->p) < len);
89 }
90 
91 /* Need a certain number of bytes. */
bcread_need(LexState * ls,MSize len)92 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
93 {
94   if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
95     bcread_fill(ls, len, 1);
96 }
97 
98 /* Want to read up to a certain number of bytes, but may need less. */
bcread_want(LexState * ls,MSize len)99 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
100 {
101   if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
102     bcread_fill(ls, len, 0);
103 }
104 
105 /* Return memory block from buffer. */
bcread_mem(LexState * ls,MSize len)106 static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
107 {
108   uint8_t *p = (uint8_t *)ls->p;
109   ls->p += len;
110   lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
111   return p;
112 }
113 
114 /* Copy memory block from buffer. */
bcread_block(LexState * ls,void * q,MSize len)115 static void bcread_block(LexState *ls, void *q, MSize len)
116 {
117   memcpy(q, bcread_mem(ls, len), len);
118 }
119 
120 /* Read byte from buffer. */
bcread_byte(LexState * ls)121 static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
122 {
123   lj_assertLS(ls->p < ls->pe, "buffer read overflow");
124   return (uint32_t)(uint8_t)*ls->p++;
125 }
126 
127 /* Read ULEB128 value from buffer. */
bcread_uleb128(LexState * ls)128 static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
129 {
130   uint32_t v = lj_buf_ruleb128(&ls->p);
131   lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
132   return v;
133 }
134 
135 /* Read top 32 bits of 33 bit ULEB128 value from buffer. */
bcread_uleb128_33(LexState * ls)136 static uint32_t bcread_uleb128_33(LexState *ls)
137 {
138   const uint8_t *p = (const uint8_t *)ls->p;
139   uint32_t v = (*p++ >> 1);
140   if (LJ_UNLIKELY(v >= 0x40)) {
141     int sh = -1;
142     v &= 0x3f;
143     do {
144      v |= ((*p & 0x7f) << (sh += 7));
145    } while (*p++ >= 0x80);
146   }
147   ls->p = (char *)p;
148   lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
149   return v;
150 }
151 
152 /* -- Bytecode reader ----------------------------------------------------- */
153 
154 /* Read debug info of a prototype. */
bcread_dbg(LexState * ls,GCproto * pt,MSize sizedbg)155 static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
156 {
157   void *lineinfo = (void *)proto_lineinfo(pt);
158   bcread_block(ls, lineinfo, sizedbg);
159   /* Swap lineinfo if the endianess differs. */
160   if (bcread_swap(ls) && pt->numline >= 256) {
161     MSize i, n = pt->sizebc-1;
162     if (pt->numline < 65536) {
163       uint16_t *p = (uint16_t *)lineinfo;
164       for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8));
165     } else {
166       uint32_t *p = (uint32_t *)lineinfo;
167       for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]);
168     }
169   }
170 }
171 
172 /* Find pointer to varinfo. */
bcread_varinfo(GCproto * pt)173 static const void *bcread_varinfo(GCproto *pt)
174 {
175   const uint8_t *p = proto_uvinfo(pt);
176   MSize n = pt->sizeuv;
177   if (n) while (*p++ || --n) ;
178   return p;
179 }
180 
181 /* Read a single constant key/value of a template table. */
bcread_ktabk(LexState * ls,TValue * o)182 static void bcread_ktabk(LexState *ls, TValue *o)
183 {
184   MSize tp = bcread_uleb128(ls);
185   if (tp >= BCDUMP_KTAB_STR) {
186     MSize len = tp - BCDUMP_KTAB_STR;
187     const char *p = (const char *)bcread_mem(ls, len);
188     setstrV(ls->L, o, lj_str_new(ls->L, p, len));
189   } else if (tp == BCDUMP_KTAB_INT) {
190     setintV(o, (int32_t)bcread_uleb128(ls));
191   } else if (tp == BCDUMP_KTAB_NUM) {
192     o->u32.lo = bcread_uleb128(ls);
193     o->u32.hi = bcread_uleb128(ls);
194   } else {
195     lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
196     setpriV(o, ~tp);
197   }
198 }
199 
200 /* Read a template table. */
bcread_ktab(LexState * ls)201 static GCtab *bcread_ktab(LexState *ls)
202 {
203   MSize narray = bcread_uleb128(ls);
204   MSize nhash = bcread_uleb128(ls);
205   GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash));
206   if (narray) {  /* Read array entries. */
207     MSize i;
208     TValue *o = tvref(t->array);
209     for (i = 0; i < narray; i++, o++)
210       bcread_ktabk(ls, o);
211   }
212   if (nhash) {  /* Read hash entries. */
213     MSize i;
214     for (i = 0; i < nhash; i++) {
215       TValue key;
216       bcread_ktabk(ls, &key);
217       lj_assertLS(!tvisnil(&key), "nil key");
218       bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
219     }
220   }
221   return t;
222 }
223 
224 /* Read GC constants of a prototype. */
bcread_kgc(LexState * ls,GCproto * pt,MSize sizekgc)225 static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
226 {
227   MSize i;
228   GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
229   for (i = 0; i < sizekgc; i++, kr++) {
230     MSize tp = bcread_uleb128(ls);
231     if (tp >= BCDUMP_KGC_STR) {
232       MSize len = tp - BCDUMP_KGC_STR;
233       const char *p = (const char *)bcread_mem(ls, len);
234       setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len)));
235     } else if (tp == BCDUMP_KGC_TAB) {
236       setgcref(*kr, obj2gco(bcread_ktab(ls)));
237 #if LJ_HASFFI
238     } else if (tp != BCDUMP_KGC_CHILD) {
239       CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE :
240 		   tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64;
241       CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8;
242       GCcdata *cd = lj_cdata_new_(ls->L, id, sz);
243       TValue *p = (TValue *)cdataptr(cd);
244       setgcref(*kr, obj2gco(cd));
245       p[0].u32.lo = bcread_uleb128(ls);
246       p[0].u32.hi = bcread_uleb128(ls);
247       if (tp == BCDUMP_KGC_COMPLEX) {
248 	p[1].u32.lo = bcread_uleb128(ls);
249 	p[1].u32.hi = bcread_uleb128(ls);
250       }
251 #endif
252     } else {
253       lua_State *L = ls->L;
254       lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp);
255       if (L->top <= bcread_oldtop(L, ls))  /* Stack underflow? */
256 	bcread_error(ls, LJ_ERR_BCBAD);
257       L->top--;
258       setgcref(*kr, obj2gco(protoV(L->top)));
259     }
260   }
261 }
262 
263 /* Read number constants of a prototype. */
bcread_knum(LexState * ls,GCproto * pt,MSize sizekn)264 static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
265 {
266   MSize i;
267   TValue *o = mref(pt->k, TValue);
268   for (i = 0; i < sizekn; i++, o++) {
269     int isnum = (ls->p[0] & 1);
270     uint32_t lo = bcread_uleb128_33(ls);
271     if (isnum) {
272       o->u32.lo = lo;
273       o->u32.hi = bcread_uleb128(ls);
274     } else {
275       setintV(o, lo);
276     }
277   }
278 }
279 
280 /* Read bytecode instructions. */
bcread_bytecode(LexState * ls,GCproto * pt,MSize sizebc)281 static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
282 {
283   BCIns *bc = proto_bc(pt);
284   bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
285 		   pt->framesize, 0);
286   bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
287   /* Swap bytecode instructions if the endianess differs. */
288   if (bcread_swap(ls)) {
289     MSize i;
290     for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]);
291   }
292 }
293 
294 /* Read upvalue refs. */
bcread_uv(LexState * ls,GCproto * pt,MSize sizeuv)295 static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
296 {
297   if (sizeuv) {
298     uint16_t *uv = proto_uv(pt);
299     bcread_block(ls, uv, sizeuv*2);
300     /* Swap upvalue refs if the endianess differs. */
301     if (bcread_swap(ls)) {
302       MSize i;
303       for (i = 0; i < sizeuv; i++)
304 	uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8));
305     }
306   }
307 }
308 
309 /* Read a prototype. */
lj_bcread_proto(LexState * ls)310 GCproto *lj_bcread_proto(LexState *ls)
311 {
312   GCproto *pt;
313   MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
314   MSize ofsk, ofsuv, ofsdbg;
315   MSize sizedbg = 0;
316   BCLine firstline = 0, numline = 0;
317 
318   /* Read prototype header. */
319   flags = bcread_byte(ls);
320   numparams = bcread_byte(ls);
321   framesize = bcread_byte(ls);
322   sizeuv = bcread_byte(ls);
323   sizekgc = bcread_uleb128(ls);
324   sizekn = bcread_uleb128(ls);
325   sizebc = bcread_uleb128(ls) + 1;
326   if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
327     sizedbg = bcread_uleb128(ls);
328     if (sizedbg) {
329       firstline = bcread_uleb128(ls);
330       numline = bcread_uleb128(ls);
331     }
332   }
333 
334   /* Calculate total size of prototype including all colocated arrays. */
335   sizept = (MSize)sizeof(GCproto) +
336 	   sizebc*(MSize)sizeof(BCIns) +
337 	   sizekgc*(MSize)sizeof(GCRef);
338   sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1);
339   ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue);
340   ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2;
341   ofsdbg = sizept; sizept += sizedbg;
342 
343   /* Allocate prototype object and initialize its fields. */
344   pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept);
345   pt->gct = ~LJ_TPROTO;
346   pt->numparams = (uint8_t)numparams;
347   pt->framesize = (uint8_t)framesize;
348   pt->sizebc = sizebc;
349   setmref(pt->k, (char *)pt + ofsk);
350   setmref(pt->uv, (char *)pt + ofsuv);
351   pt->sizekgc = 0;  /* Set to zero until fully initialized. */
352   pt->sizekn = sizekn;
353   pt->sizept = sizept;
354   pt->sizeuv = (uint8_t)sizeuv;
355   pt->flags = (uint8_t)flags;
356   pt->trace = 0;
357   setgcref(pt->chunkname, obj2gco(ls->chunkname));
358 
359   /* Close potentially uninitialized gap between bc and kgc. */
360   *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0;
361 
362   /* Read bytecode instructions and upvalue refs. */
363   bcread_bytecode(ls, pt, sizebc);
364   bcread_uv(ls, pt, sizeuv);
365 
366   /* Read constants. */
367   bcread_kgc(ls, pt, sizekgc);
368   pt->sizekgc = sizekgc;
369   bcread_knum(ls, pt, sizekn);
370 
371   /* Read and initialize debug info. */
372   pt->firstline = firstline;
373   pt->numline = numline;
374   if (sizedbg) {
375     MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2);
376     setmref(pt->lineinfo, (char *)pt + ofsdbg);
377     setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
378     bcread_dbg(ls, pt, sizedbg);
379     setmref(pt->varinfo, bcread_varinfo(pt));
380   } else {
381     setmref(pt->lineinfo, NULL);
382     setmref(pt->uvinfo, NULL);
383     setmref(pt->varinfo, NULL);
384   }
385   return pt;
386 }
387 
388 /* Read and check header of bytecode dump. */
bcread_header(LexState * ls)389 static int bcread_header(LexState *ls)
390 {
391   uint32_t flags;
392   bcread_want(ls, 3+5+5);
393   if (bcread_byte(ls) != BCDUMP_HEAD2 ||
394       bcread_byte(ls) != BCDUMP_HEAD3 ||
395       bcread_byte(ls) != BCDUMP_VERSION) return 0;
396   bcread_flags(ls) = flags = bcread_uleb128(ls);
397   if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
398   if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
399   if ((flags & BCDUMP_F_FFI)) {
400 #if LJ_HASFFI
401     lua_State *L = ls->L;
402     ctype_loadffi(L);
403 #else
404     return 0;
405 #endif
406   }
407   if ((flags & BCDUMP_F_STRIP)) {
408     ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
409   } else {
410     MSize len = bcread_uleb128(ls);
411     bcread_need(ls, len);
412     ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
413   }
414   return 1;  /* Ok. */
415 }
416 
417 /* Read a bytecode dump. */
lj_bcread(LexState * ls)418 GCproto *lj_bcread(LexState *ls)
419 {
420   lua_State *L = ls->L;
421   lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header");
422   bcread_savetop(L, ls, L->top);
423   lj_buf_reset(&ls->sb);
424   /* Check for a valid bytecode dump header. */
425   if (!bcread_header(ls))
426     bcread_error(ls, LJ_ERR_BCFMT);
427   for (;;) {  /* Process all prototypes in the bytecode dump. */
428     GCproto *pt;
429     MSize len;
430     const char *startp;
431     /* Read length. */
432     if (ls->p < ls->pe && ls->p[0] == 0) {  /* Shortcut EOF. */
433       ls->p++;
434       break;
435     }
436     bcread_want(ls, 5);
437     len = bcread_uleb128(ls);
438     if (!len) break;  /* EOF */
439     bcread_need(ls, len);
440     startp = ls->p;
441     pt = lj_bcread_proto(ls);
442     if (ls->p != startp + len)
443       bcread_error(ls, LJ_ERR_BCBAD);
444     setprotoV(L, L->top, pt);
445     incr_top(L);
446   }
447   if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
448     bcread_error(ls, LJ_ERR_BCBAD);
449   /* Pop off last prototype. */
450   L->top--;
451   return protoV(L->top);
452 }
453 
454