1 /*
2 ** Snapshot handling.
3 ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4 */
5 
6 #define lj_snap_c
7 #define LUA_CORE
8 
9 #include "lj_obj.h"
10 
11 #if LJ_HASJIT
12 
13 #include "lj_gc.h"
14 #include "lj_tab.h"
15 #include "lj_state.h"
16 #include "lj_frame.h"
17 #include "lj_bc.h"
18 #include "lj_ir.h"
19 #include "lj_jit.h"
20 #include "lj_iropt.h"
21 #include "lj_trace.h"
22 #include "lj_snap.h"
23 #include "lj_target.h"
24 #if LJ_HASFFI
25 #include "lj_ctype.h"
26 #include "lj_cdata.h"
27 #endif
28 
29 /* Some local macros to save typing. Undef'd at the end. */
30 #define IR(ref)		(&J->cur.ir[(ref)])
31 
32 /* Pass IR on to next optimization in chain (FOLD). */
33 #define emitir(ot, a, b)	(lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
34 
35 /* Emit raw IR without passing through optimizations. */
36 #define emitir_raw(ot, a, b)	(lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
37 
38 /* -- Snapshot buffer allocation ------------------------------------------ */
39 
40 /* Grow snapshot buffer. */
lj_snap_grow_buf_(jit_State * J,MSize need)41 void lj_snap_grow_buf_(jit_State *J, MSize need)
42 {
43   MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
44   if (need > maxsnap)
45     lj_trace_err(J, LJ_TRERR_SNAPOV);
46   lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
47   J->cur.snap = J->snapbuf;
48 }
49 
50 /* Grow snapshot map buffer. */
lj_snap_grow_map_(jit_State * J,MSize need)51 void lj_snap_grow_map_(jit_State *J, MSize need)
52 {
53   if (need < 2*J->sizesnapmap)
54     need = 2*J->sizesnapmap;
55   else if (need < 64)
56     need = 64;
57   J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
58 		    J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
59   J->cur.snapmap = J->snapmapbuf;
60   J->sizesnapmap = need;
61 }
62 
63 /* -- Snapshot generation ------------------------------------------------- */
64 
65 /* Add all modified slots to the snapshot. */
snapshot_slots(jit_State * J,SnapEntry * map,BCReg nslots)66 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
67 {
68   IRRef retf = J->chain[IR_RETF];  /* Limits SLOAD restore elimination. */
69   BCReg s;
70   MSize n = 0;
71   for (s = 0; s < nslots; s++) {
72     TRef tr = J->slot[s];
73     IRRef ref = tref_ref(tr);
74     if (ref) {
75       SnapEntry sn = SNAP_TR(s, tr);
76       IRIns *ir = IR(ref);
77       if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
78 	  ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
79 	/* No need to snapshot unmodified non-inherited slots. */
80 	if (!(ir->op2 & IRSLOAD_INHERIT))
81 	  continue;
82 	/* No need to restore readonly slots and unmodified non-parent slots. */
83 	if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
84 	    (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
85 	  sn |= SNAP_NORESTORE;
86       }
87       if (LJ_SOFTFP && irt_isnum(ir->t))
88 	sn |= SNAP_SOFTFPNUM;
89       map[n++] = sn;
90     }
91   }
92   return n;
93 }
94 
95 /* Add frame links at the end of the snapshot. */
snapshot_framelinks(jit_State * J,SnapEntry * map)96 static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
97 {
98   cTValue *frame = J->L->base - 1;
99   cTValue *lim = J->L->base - J->baseslot;
100   cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
101   MSize f = 0;
102   map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
103   while (frame > lim) {  /* Backwards traversal of all frames above base. */
104     if (frame_islua(frame)) {
105       map[f++] = SNAP_MKPC(frame_pc(frame));
106       frame = frame_prevl(frame);
107       if (frame + funcproto(frame_func(frame))->framesize > ftop)
108 	ftop = frame + funcproto(frame_func(frame))->framesize;
109     } else if (frame_iscont(frame)) {
110       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
111       map[f++] = SNAP_MKPC(frame_contpc(frame));
112       frame = frame_prevd(frame);
113     } else {
114       lua_assert(!frame_isc(frame));
115       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
116       frame = frame_prevd(frame);
117     }
118   }
119   lua_assert(f == (MSize)(1 + J->framedepth));
120   return (BCReg)(ftop - lim);
121 }
122 
123 /* Take a snapshot of the current stack. */
snapshot_stack(jit_State * J,SnapShot * snap,MSize nsnapmap)124 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
125 {
126   BCReg nslots = J->baseslot + J->maxslot;
127   MSize nent;
128   SnapEntry *p;
129   /* Conservative estimate. */
130   lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
131   p = &J->cur.snapmap[nsnapmap];
132   nent = snapshot_slots(J, p, nslots);
133   snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
134   snap->mapofs = (uint16_t)nsnapmap;
135   snap->ref = (IRRef1)J->cur.nins;
136   snap->nent = (uint8_t)nent;
137   snap->nslots = (uint8_t)nslots;
138   snap->count = 0;
139   J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
140 }
141 
142 /* Add or merge a snapshot. */
lj_snap_add(jit_State * J)143 void lj_snap_add(jit_State *J)
144 {
145   MSize nsnap = J->cur.nsnap;
146   MSize nsnapmap = J->cur.nsnapmap;
147   /* Merge if no ins. inbetween or if requested and no guard inbetween. */
148   if (J->mergesnap ? !irt_isguard(J->guardemit) :
149       (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
150     if (nsnap == 1) {  /* But preserve snap #0 PC. */
151       emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
152       goto nomerge;
153     }
154     nsnapmap = J->cur.snap[--nsnap].mapofs;
155   } else {
156   nomerge:
157     lj_snap_grow_buf(J, nsnap+1);
158     J->cur.nsnap = (uint16_t)(nsnap+1);
159   }
160   J->mergesnap = 0;
161   J->guardemit.irt = 0;
162   snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
163 }
164 
165 /* -- Snapshot modification ----------------------------------------------- */
166 
167 #define SNAP_USEDEF_SLOTS	(LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
168 
169 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
snap_usedef(jit_State * J,uint8_t * udf,const BCIns * pc,BCReg maxslot)170 static BCReg snap_usedef(jit_State *J, uint8_t *udf,
171 			 const BCIns *pc, BCReg maxslot)
172 {
173   BCReg s;
174   GCobj *o;
175 
176   if (maxslot == 0) return 0;
177 #ifdef LUAJIT_USE_VALGRIND
178   /* Avoid errors for harmless reads beyond maxslot. */
179   memset(udf, 1, SNAP_USEDEF_SLOTS);
180 #else
181   memset(udf, 1, maxslot);
182 #endif
183 
184   /* Treat open upvalues as used. */
185   o = gcref(J->L->openupval);
186   while (o) {
187     if (uvval(gco2uv(o)) < J->L->base) break;
188     udf[uvval(gco2uv(o)) - J->L->base] = 0;
189     o = gcref(o->gch.nextgc);
190   }
191 
192 #define USE_SLOT(s)		udf[(s)] &= ~1
193 #define DEF_SLOT(s)		udf[(s)] *= 3
194 
195   /* Scan through following bytecode and check for uses/defs. */
196   lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
197   for (;;) {
198     BCIns ins = *pc++;
199     BCOp op = bc_op(ins);
200     switch (bcmode_b(op)) {
201     case BCMvar: USE_SLOT(bc_b(ins)); break;
202     default: break;
203     }
204     switch (bcmode_c(op)) {
205     case BCMvar: USE_SLOT(bc_c(ins)); break;
206     case BCMrbase:
207       lua_assert(op == BC_CAT);
208       for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
209       for (; s < maxslot; s++) DEF_SLOT(s);
210       break;
211     case BCMjump:
212     handle_jump: {
213       BCReg minslot = bc_a(ins);
214       if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
215       else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
216       else if (op == BC_UCLO) { pc += bc_j(ins); break; }
217       for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
218       return minslot < maxslot ? minslot : maxslot;
219       }
220     case BCMlit:
221       if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
222 	goto handle_jump;
223       } else if (bc_isret(op)) {
224 	BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
225 	for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
226 	for (; s < top; s++) USE_SLOT(s);
227 	for (; s < maxslot; s++) DEF_SLOT(s);
228 	return 0;
229       }
230       break;
231     case BCMfunc: return maxslot;  /* NYI: will abort, anyway. */
232     default: break;
233     }
234     switch (bcmode_a(op)) {
235     case BCMvar: USE_SLOT(bc_a(ins)); break;
236     case BCMdst:
237        if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
238        break;
239     case BCMbase:
240       if (op >= BC_CALLM && op <= BC_VARG) {
241 	BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
242 		    maxslot : (bc_a(ins) + bc_c(ins));
243 	s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
244 	for (; s < top; s++) USE_SLOT(s);
245 	for (; s < maxslot; s++) DEF_SLOT(s);
246 	if (op == BC_CALLT || op == BC_CALLMT) {
247 	  for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
248 	  return 0;
249 	}
250       } else if (op == BC_KNIL) {
251 	for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
252       } else if (op == BC_TSETM) {
253 	for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
254       }
255       break;
256     default: break;
257     }
258     lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
259   }
260 
261 #undef USE_SLOT
262 #undef DEF_SLOT
263 
264   return 0;  /* unreachable */
265 }
266 
267 /* Purge dead slots before the next snapshot. */
lj_snap_purge(jit_State * J)268 void lj_snap_purge(jit_State *J)
269 {
270   uint8_t udf[SNAP_USEDEF_SLOTS];
271   BCReg maxslot = J->maxslot;
272   BCReg s = snap_usedef(J, udf, J->pc, maxslot);
273   for (; s < maxslot; s++)
274     if (udf[s] != 0)
275       J->base[s] = 0;  /* Purge dead slots. */
276 }
277 
278 /* Shrink last snapshot. */
lj_snap_shrink(jit_State * J)279 void lj_snap_shrink(jit_State *J)
280 {
281   SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
282   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
283   MSize n, m, nlim, nent = snap->nent;
284   uint8_t udf[SNAP_USEDEF_SLOTS];
285   BCReg maxslot = J->maxslot;
286   BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
287   BCReg baseslot = J->baseslot;
288   maxslot += baseslot;
289   minslot += baseslot;
290   snap->nslots = (uint8_t)maxslot;
291   for (n = m = 0; n < nent; n++) {  /* Remove unused slots from snapshot. */
292     BCReg s = snap_slot(map[n]);
293     if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
294       map[m++] = map[n];  /* Only copy used slots. */
295   }
296   snap->nent = (uint8_t)m;
297   nlim = J->cur.nsnapmap - snap->mapofs - 1;
298   while (n <= nlim) map[m++] = map[n++];  /* Move PC + frame links down. */
299   J->cur.nsnapmap = (uint16_t)(snap->mapofs + m);  /* Free up space in map. */
300 }
301 
302 /* -- Snapshot access ----------------------------------------------------- */
303 
304 /* Initialize a Bloom Filter with all renamed refs.
305 ** There are very few renames (often none), so the filter has
306 ** very few bits set. This makes it suitable for negative filtering.
307 */
snap_renamefilter(GCtrace * T,SnapNo lim)308 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
309 {
310   BloomFilter rfilt = 0;
311   IRIns *ir;
312   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
313     if (ir->op2 <= lim)
314       bloomset(rfilt, ir->op1);
315   return rfilt;
316 }
317 
318 /* Process matching renames to find the original RegSP. */
snap_renameref(GCtrace * T,SnapNo lim,IRRef ref,RegSP rs)319 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
320 {
321   IRIns *ir;
322   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
323     if (ir->op1 == ref && ir->op2 <= lim)
324       rs = ir->prev;
325   return rs;
326 }
327 
328 /* Copy RegSP from parent snapshot to the parent links of the IR. */
lj_snap_regspmap(GCtrace * T,SnapNo snapno,IRIns * ir)329 IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
330 {
331   SnapShot *snap = &T->snap[snapno];
332   SnapEntry *map = &T->snapmap[snap->mapofs];
333   BloomFilter rfilt = snap_renamefilter(T, snapno);
334   MSize n = 0;
335   IRRef ref = 0;
336   for ( ; ; ir++) {
337     uint32_t rs;
338     if (ir->o == IR_SLOAD) {
339       if (!(ir->op2 & IRSLOAD_PARENT)) break;
340       for ( ; ; n++) {
341 	lua_assert(n < snap->nent);
342 	if (snap_slot(map[n]) == ir->op1) {
343 	  ref = snap_ref(map[n++]);
344 	  break;
345 	}
346       }
347     } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
348       ref++;
349     } else if (ir->o == IR_PVAL) {
350       ref = ir->op1 + REF_BIAS;
351     } else {
352       break;
353     }
354     rs = T->ir[ref].prev;
355     if (bloomtest(rfilt, ref))
356       rs = snap_renameref(T, snapno, ref, rs);
357     ir->prev = (uint16_t)rs;
358     lua_assert(regsp_used(rs));
359   }
360   return ir;
361 }
362 
363 /* -- Snapshot replay ----------------------------------------------------- */
364 
365 /* Replay constant from parent trace. */
snap_replay_const(jit_State * J,IRIns * ir)366 static TRef snap_replay_const(jit_State *J, IRIns *ir)
367 {
368   /* Only have to deal with constants that can occur in stack slots. */
369   switch ((IROp)ir->o) {
370   case IR_KPRI: return TREF_PRI(irt_type(ir->t));
371   case IR_KINT: return lj_ir_kint(J, ir->i);
372   case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
373   case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
374   case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
375   case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir));  /* Continuation. */
376   default: lua_assert(0); return TREF_NIL; break;
377   }
378 }
379 
380 /* De-duplicate parent reference. */
snap_dedup(jit_State * J,SnapEntry * map,MSize nmax,IRRef ref)381 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
382 {
383   MSize j;
384   for (j = 0; j < nmax; j++)
385     if (snap_ref(map[j]) == ref)
386       return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
387   return 0;
388 }
389 
390 /* Emit parent reference with de-duplication. */
snap_pref(jit_State * J,GCtrace * T,SnapEntry * map,MSize nmax,BloomFilter seen,IRRef ref)391 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
392 		      BloomFilter seen, IRRef ref)
393 {
394   IRIns *ir = &T->ir[ref];
395   TRef tr;
396   if (irref_isk(ref))
397     tr = snap_replay_const(J, ir);
398   else if (!regsp_used(ir->prev))
399     tr = 0;
400   else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
401     tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
402   return tr;
403 }
404 
405 /* Check whether a sunk store corresponds to an allocation. Slow path. */
snap_sunk_store2(jit_State * J,IRIns * ira,IRIns * irs)406 static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs)
407 {
408   if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
409       irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
410     IRIns *irk = IR(irs->op1);
411     if (irk->o == IR_AREF || irk->o == IR_HREFK)
412       irk = IR(irk->op1);
413     return (IR(irk->op1) == ira);
414   }
415   return 0;
416 }
417 
418 /* Check whether a sunk store corresponds to an allocation. Fast path. */
snap_sunk_store(jit_State * J,IRIns * ira,IRIns * irs)419 static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs)
420 {
421   if (irs->s != 255)
422     return (ira + irs->s == irs);  /* Fast check. */
423   return snap_sunk_store2(J, ira, irs);
424 }
425 
426 /* Replay snapshot state to setup side trace. */
lj_snap_replay(jit_State * J,GCtrace * T)427 void lj_snap_replay(jit_State *J, GCtrace *T)
428 {
429   SnapShot *snap = &T->snap[J->exitno];
430   SnapEntry *map = &T->snapmap[snap->mapofs];
431   MSize n, nent = snap->nent;
432   BloomFilter seen = 0;
433   int pass23 = 0;
434   J->framedepth = 0;
435   /* Emit IR for slots inherited from parent snapshot. */
436   for (n = 0; n < nent; n++) {
437     SnapEntry sn = map[n];
438     BCReg s = snap_slot(sn);
439     IRRef ref = snap_ref(sn);
440     IRIns *ir = &T->ir[ref];
441     TRef tr;
442     /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
443     if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
444       goto setslot;
445     bloomset(seen, ref);
446     if (irref_isk(ref)) {
447       tr = snap_replay_const(J, ir);
448     } else if (!regsp_used(ir->prev)) {
449       pass23 = 1;
450       lua_assert(s != 0);
451       tr = s;
452     } else {
453       IRType t = irt_type(ir->t);
454       uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
455       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
456       if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
457       tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
458     }
459   setslot:
460     J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
461     J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
462     if ((sn & SNAP_FRAME))
463       J->baseslot = s+1;
464   }
465   if (pass23) {
466     IRIns *irlast = &T->ir[snap->ref];
467     pass23 = 0;
468     /* Emit dependent PVALs. */
469     for (n = 0; n < nent; n++) {
470       SnapEntry sn = map[n];
471       IRRef refp = snap_ref(sn);
472       IRIns *ir = &T->ir[refp];
473       if (regsp_reg(ir->r) == RID_SUNK) {
474 	if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
475 	pass23 = 1;
476 	lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
477 		   ir->o == IR_CNEW || ir->o == IR_CNEWI);
478 	if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
479 	if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
480 	if (LJ_HASFFI && ir->o == IR_CNEWI) {
481 	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
482 	    snap_pref(J, T, map, nent, seen, (ir+1)->op2);
483 	} else {
484 	  IRIns *irs;
485 	  for (irs = ir+1; irs < irlast; irs++)
486 	    if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
487 	      if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
488 		snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
489 	      else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
490 		       irs+1 < irlast && (irs+1)->o == IR_HIOP)
491 		snap_pref(J, T, map, nent, seen, (irs+1)->op2);
492 	    }
493 	}
494       } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
495 	lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
496 	J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
497       }
498     }
499     /* Replay sunk instructions. */
500     for (n = 0; pass23 && n < nent; n++) {
501       SnapEntry sn = map[n];
502       IRRef refp = snap_ref(sn);
503       IRIns *ir = &T->ir[refp];
504       if (regsp_reg(ir->r) == RID_SUNK) {
505 	TRef op1, op2;
506 	if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */
507 	  J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
508 	  continue;
509 	}
510 	op1 = ir->op1;
511 	if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
512 	op2 = ir->op2;
513 	if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
514 	if (LJ_HASFFI && ir->o == IR_CNEWI) {
515 	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
516 	    lj_needsplit(J);  /* Emit joining HIOP. */
517 	    op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
518 			     snap_pref(J, T, map, nent, seen, (ir+1)->op2));
519 	  }
520 	  J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
521 	} else {
522 	  IRIns *irs;
523 	  TRef tr = emitir(ir->ot, op1, op2);
524 	  J->slot[snap_slot(sn)] = tr;
525 	  for (irs = ir+1; irs < irlast; irs++)
526 	    if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
527 	      IRIns *irr = &T->ir[irs->op1];
528 	      TRef val, key = irr->op2, tmp = tr;
529 	      if (irr->o != IR_FREF) {
530 		IRIns *irk = &T->ir[key];
531 		if (irr->o == IR_HREFK)
532 		  key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
533 				    irk->op2);
534 		else
535 		  key = snap_replay_const(J, irk);
536 		if (irr->o == IR_HREFK || irr->o == IR_AREF) {
537 		  IRIns *irf = &T->ir[irr->op1];
538 		  tmp = emitir(irf->ot, tmp, irf->op2);
539 		}
540 	      }
541 	      tmp = emitir(irr->ot, tmp, key);
542 	      val = snap_pref(J, T, map, nent, seen, irs->op2);
543 	      if (val == 0) {
544 		IRIns *irc = &T->ir[irs->op2];
545 		lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
546 		val = snap_pref(J, T, map, nent, seen, irc->op1);
547 		val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
548 	      } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
549 			 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
550 		IRType t = IRT_I64;
551 		if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
552 		  t = IRT_NUM;
553 		lj_needsplit(J);
554 		if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
555 		  uint64_t k = (uint32_t)T->ir[irs->op2].i +
556 			       ((uint64_t)T->ir[(irs+1)->op2].i << 32);
557 		  val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
558 				  lj_ir_k64_find(J, k));
559 		} else {
560 		  val = emitir_raw(IRT(IR_HIOP, t), val,
561 			  snap_pref(J, T, map, nent, seen, (irs+1)->op2));
562 		}
563 		tmp = emitir(IRT(irs->o, t), tmp, val);
564 		continue;
565 	      }
566 	      tmp = emitir(irs->ot, tmp, val);
567 	    } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
568 	      emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
569 	    }
570 	}
571       }
572     }
573   }
574   J->base = J->slot + J->baseslot;
575   J->maxslot = snap->nslots - J->baseslot;
576   lj_snap_add(J);
577   if (pass23)  /* Need explicit GC step _after_ initial snapshot. */
578     emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
579 }
580 
581 /* -- Snapshot restore ---------------------------------------------------- */
582 
583 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
584 			SnapNo snapno, BloomFilter rfilt,
585 			IRIns *ir, TValue *o);
586 
587 /* Restore a value from the trace exit state. */
snap_restoreval(jit_State * J,GCtrace * T,ExitState * ex,SnapNo snapno,BloomFilter rfilt,IRRef ref,TValue * o)588 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
589 			    SnapNo snapno, BloomFilter rfilt,
590 			    IRRef ref, TValue *o)
591 {
592   IRIns *ir = &T->ir[ref];
593   IRType1 t = ir->t;
594   RegSP rs = ir->prev;
595   if (irref_isk(ref)) {  /* Restore constant slot. */
596     lj_ir_kvalue(J->L, o, ir);
597     return;
598   }
599   if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
600     rs = snap_renameref(T, snapno, ref, rs);
601   if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
602     int32_t *sps = &ex->spill[regsp_spill(rs)];
603     if (irt_isinteger(t)) {
604       setintV(o, *sps);
605 #if !LJ_SOFTFP
606     } else if (irt_isnum(t)) {
607       o->u64 = *(uint64_t *)sps;
608 #endif
609     } else if (LJ_64 && irt_islightud(t)) {
610       /* 64 bit lightuserdata which may escape already has the tag bits. */
611       o->u64 = *(uint64_t *)sps;
612     } else {
613       lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
614       setgcrefi(o->gcr, *sps);
615       setitype(o, irt_toitype(t));
616     }
617   } else {  /* Restore from register. */
618     Reg r = regsp_reg(rs);
619     if (ra_noreg(r)) {
620       lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
621       snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
622       if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
623       return;
624     } else if (irt_isinteger(t)) {
625       setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
626 #if !LJ_SOFTFP
627     } else if (irt_isnum(t)) {
628       setnumV(o, ex->fpr[r-RID_MIN_FPR]);
629 #endif
630     } else if (LJ_64 && irt_islightud(t)) {
631       /* 64 bit lightuserdata which may escape already has the tag bits. */
632       o->u64 = ex->gpr[r-RID_MIN_GPR];
633     } else {
634       if (!irt_ispri(t))
635 	setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
636       setitype(o, irt_toitype(t));
637     }
638   }
639 }
640 
641 #if LJ_HASFFI
642 /* Restore raw data from the trace exit state. */
snap_restoredata(GCtrace * T,ExitState * ex,SnapNo snapno,BloomFilter rfilt,IRRef ref,void * dst,CTSize sz)643 static void snap_restoredata(GCtrace *T, ExitState *ex,
644 			     SnapNo snapno, BloomFilter rfilt,
645 			     IRRef ref, void *dst, CTSize sz)
646 {
647   IRIns *ir = &T->ir[ref];
648   RegSP rs = ir->prev;
649   int32_t *src;
650   uint64_t tmp;
651   if (irref_isk(ref)) {
652     if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
653       src = mref(ir->ptr, int32_t);
654     } else if (sz == 8) {
655       tmp = (uint64_t)(uint32_t)ir->i;
656       src = (int32_t *)&tmp;
657     } else {
658       src = &ir->i;
659     }
660   } else {
661     if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
662       rs = snap_renameref(T, snapno, ref, rs);
663     if (ra_hasspill(regsp_spill(rs))) {
664       src = &ex->spill[regsp_spill(rs)];
665       if (sz == 8 && !irt_is64(ir->t)) {
666 	tmp = (uint64_t)(uint32_t)*src;
667 	src = (int32_t *)&tmp;
668       }
669     } else {
670       Reg r = regsp_reg(rs);
671       if (ra_noreg(r)) {
672 	/* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
673 	lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
674 	snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
675 	*(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
676 	return;
677       }
678       src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
679 #if !LJ_SOFTFP
680       if (r >= RID_MAX_GPR) {
681 	src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
682 #if LJ_TARGET_PPC
683 	if (sz == 4) {  /* PPC FPRs are always doubles. */
684 	  *(float *)dst = (float)*(double *)src;
685 	  return;
686 	}
687 #else
688 	if (LJ_BE && sz == 4) src++;
689 #endif
690       }
691 #endif
692     }
693   }
694   lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
695   if (sz == 4) *(int32_t *)dst = *src;
696   else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
697   else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
698   else *(int16_t *)dst = (int16_t)*src;
699 }
700 #endif
701 
702 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
snap_unsink(jit_State * J,GCtrace * T,ExitState * ex,SnapNo snapno,BloomFilter rfilt,IRIns * ir,TValue * o)703 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
704 			SnapNo snapno, BloomFilter rfilt,
705 			IRIns *ir, TValue *o)
706 {
707   lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
708 	     ir->o == IR_CNEW || ir->o == IR_CNEWI);
709 #if LJ_HASFFI
710   if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
711     CTState *cts = ctype_cts(J->L);
712     CTypeID id = (CTypeID)T->ir[ir->op1].i;
713     CTSize sz = lj_ctype_size(cts, id);
714     GCcdata *cd = lj_cdata_new(cts, id, sz);
715     setcdataV(J->L, o, cd);
716     if (ir->o == IR_CNEWI) {
717       uint8_t *p = (uint8_t *)cdataptr(cd);
718       lua_assert(sz == 4 || sz == 8);
719       if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
720 	snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
721 	if (LJ_BE) p += 4;
722 	sz = 4;
723       }
724       snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
725     } else {
726       IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
727       for (irs = ir+1; irs < irlast; irs++)
728 	if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
729 	  IRIns *iro = &T->ir[T->ir[irs->op1].op2];
730 	  uint8_t *p = (uint8_t *)cd;
731 	  CTSize szs;
732 	  lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
733 	  lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
734 	  if (irt_is64(irs->t)) szs = 8;
735 	  else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
736 	  else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
737 	  else szs = 4;
738 	  if (LJ_64 && iro->o == IR_KINT64)
739 	    p += (int64_t)ir_k64(iro)->u64;
740 	  else
741 	    p += iro->i;
742 	  lua_assert(p >= (uint8_t *)cdataptr(cd) &&
743 		     p + szs <= (uint8_t *)cdataptr(cd) + sz);
744 	  if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
745 	    lua_assert(szs == 4);
746 	    snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
747 	    if (LJ_BE) p += 4;
748 	  }
749 	  snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
750 	}
751     }
752   } else
753 #endif
754   {
755     IRIns *irs, *irlast;
756     GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
757 				  lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
758     settabV(J->L, o, t);
759     irlast = &T->ir[T->snap[snapno].ref];
760     for (irs = ir+1; irs < irlast; irs++)
761       if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
762 	IRIns *irk = &T->ir[irs->op1];
763 	TValue tmp, *val;
764 	lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
765 		   irs->o == IR_FSTORE);
766 	if (irk->o == IR_FREF) {
767 	  lua_assert(irk->op2 == IRFL_TAB_META);
768 	  snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
769 	  /* NOBARRIER: The table is new (marked white). */
770 	  setgcref(t->metatable, obj2gco(tabV(&tmp)));
771 	} else {
772 	  irk = &T->ir[irk->op2];
773 	  if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
774 	  lj_ir_kvalue(J->L, &tmp, irk);
775 	  val = lj_tab_set(J->L, t, &tmp);
776 	  /* NOBARRIER: The table is new (marked white). */
777 	  snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
778 	  if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
779 	    snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
780 	    val->u32.hi = tmp.u32.lo;
781 	  }
782 	}
783       }
784   }
785 }
786 
787 /* Restore interpreter state from exit state with the help of a snapshot. */
lj_snap_restore(jit_State * J,void * exptr)788 const BCIns *lj_snap_restore(jit_State *J, void *exptr)
789 {
790   ExitState *ex = (ExitState *)exptr;
791   SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
792   GCtrace *T = traceref(J, J->parent);
793   SnapShot *snap = &T->snap[snapno];
794   MSize n, nent = snap->nent;
795   SnapEntry *map = &T->snapmap[snap->mapofs];
796   SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
797   int32_t ftsz0;
798   TValue *frame;
799   BloomFilter rfilt = snap_renamefilter(T, snapno);
800   const BCIns *pc = snap_pc(map[nent]);
801   lua_State *L = J->L;
802 
803   /* Set interpreter PC to the next PC to get correct error messages. */
804   setcframe_pc(cframe_raw(L->cframe), pc+1);
805 
806   /* Make sure the stack is big enough for the slots from the snapshot. */
807   if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
808     L->top = curr_topL(L);
809     lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
810   }
811 
812   /* Fill stack slots with data from the registers and spill slots. */
813   frame = L->base-1;
814   ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */
815   for (n = 0; n < nent; n++) {
816     SnapEntry sn = map[n];
817     if (!(sn & SNAP_NORESTORE)) {
818       TValue *o = &frame[snap_slot(sn)];
819       IRRef ref = snap_ref(sn);
820       IRIns *ir = &T->ir[ref];
821       if (ir->r == RID_SUNK) {
822 	MSize j;
823 	for (j = 0; j < n; j++)
824 	  if (snap_ref(map[j]) == ref) {  /* De-duplicate sunk allocations. */
825 	    copyTV(L, o, &frame[snap_slot(map[j])]);
826 	    goto dupslot;
827 	  }
828 	snap_unsink(J, T, ex, snapno, rfilt, ir, o);
829       dupslot:
830 	continue;
831       }
832       snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
833       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
834 	TValue tmp;
835 	snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
836 	o->u32.hi = tmp.u32.lo;
837       } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
838 	/* Overwrite tag with frame link. */
839 	o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0;
840 	L->base = o+1;
841       }
842     }
843   }
844   lua_assert(map + nent == flinks);
845 
846   /* Compute current stack top. */
847   switch (bc_op(*pc)) {
848   default:
849     if (bc_op(*pc) < BC_FUNCF) {
850       L->top = curr_topL(L);
851       break;
852     }
853     /* fallthrough */
854   case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
855     L->top = frame + snap->nslots;
856     break;
857   }
858   return pc;
859 }
860 
861 #undef IR
862 #undef emitir_raw
863 #undef emitir
864 
865 #endif
866