1 /*
2 ** Snapshot handling.
3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4 */
5
6 #define lj_snap_c
7 #define LUA_CORE
8
9 #include "lj_obj.h"
10
11 #if LJ_HASJIT
12
13 #include "lj_gc.h"
14 #include "lj_tab.h"
15 #include "lj_state.h"
16 #include "lj_frame.h"
17 #include "lj_bc.h"
18 #include "lj_ir.h"
19 #include "lj_jit.h"
20 #include "lj_iropt.h"
21 #include "lj_trace.h"
22 #include "lj_snap.h"
23 #include "lj_target.h"
24 #if LJ_HASFFI
25 #include "lj_ctype.h"
26 #include "lj_cdata.h"
27 #endif
28
29 /* Pass IR on to next optimization in chain (FOLD). */
30 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
31
32 /* Emit raw IR without passing through optimizations. */
33 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
34
35 /* -- Snapshot buffer allocation ------------------------------------------ */
36
37 /* Grow snapshot buffer. */
lj_snap_grow_buf_(jit_State * J,MSize need)38 void lj_snap_grow_buf_(jit_State *J, MSize need)
39 {
40 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
41 if (need > maxsnap)
42 lj_trace_err(J, LJ_TRERR_SNAPOV);
43 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
44 J->cur.snap = J->snapbuf;
45 }
46
47 /* Grow snapshot map buffer. */
lj_snap_grow_map_(jit_State * J,MSize need)48 void lj_snap_grow_map_(jit_State *J, MSize need)
49 {
50 if (need < 2*J->sizesnapmap)
51 need = 2*J->sizesnapmap;
52 else if (need < 64)
53 need = 64;
54 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
55 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
56 J->cur.snapmap = J->snapmapbuf;
57 J->sizesnapmap = need;
58 }
59
60 /* -- Snapshot generation ------------------------------------------------- */
61
62 /* Add all modified slots to the snapshot. */
snapshot_slots(jit_State * J,SnapEntry * map,BCReg nslots)63 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
64 {
65 IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */
66 BCReg s;
67 MSize n = 0;
68 for (s = 0; s < nslots; s++) {
69 TRef tr = J->slot[s];
70 IRRef ref = tref_ref(tr);
71 #if LJ_FR2
72 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
73 if ((tr & TREF_FRAME))
74 map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
75 continue;
76 }
77 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
78 cTValue *base = J->L->base - J->baseslot;
79 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
80 ref = tref_ref(tr);
81 }
82 #endif
83 if (ref) {
84 SnapEntry sn = SNAP_TR(s, tr);
85 IRIns *ir = &J->cur.ir[ref];
86 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
87 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
88 /* No need to snapshot unmodified non-inherited slots. */
89 if (!(ir->op2 & IRSLOAD_INHERIT))
90 continue;
91 /* No need to restore readonly slots and unmodified non-parent slots. */
92 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
93 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
94 sn |= SNAP_NORESTORE;
95 }
96 if (LJ_SOFTFP && irt_isnum(ir->t))
97 sn |= SNAP_SOFTFPNUM;
98 map[n++] = sn;
99 }
100 }
101 return n;
102 }
103
104 /* Add frame links at the end of the snapshot. */
snapshot_framelinks(jit_State * J,SnapEntry * map,uint8_t * topslot)105 static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
106 {
107 cTValue *frame = J->L->base - 1;
108 cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
109 GCfunc *fn = frame_func(frame);
110 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
111 #if LJ_FR2
112 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
113 lua_assert(2 <= J->baseslot && J->baseslot <= 257);
114 memcpy(map, &pcbase, sizeof(uint64_t));
115 #else
116 MSize f = 0;
117 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
118 #endif
119 while (frame > lim) { /* Backwards traversal of all frames above base. */
120 if (frame_islua(frame)) {
121 #if !LJ_FR2
122 map[f++] = SNAP_MKPC(frame_pc(frame));
123 #endif
124 frame = frame_prevl(frame);
125 } else if (frame_iscont(frame)) {
126 #if !LJ_FR2
127 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
128 map[f++] = SNAP_MKPC(frame_contpc(frame));
129 #endif
130 frame = frame_prevd(frame);
131 } else {
132 lua_assert(!frame_isc(frame));
133 #if !LJ_FR2
134 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
135 #endif
136 frame = frame_prevd(frame);
137 continue;
138 }
139 if (frame + funcproto(frame_func(frame))->framesize > ftop)
140 ftop = frame + funcproto(frame_func(frame))->framesize;
141 }
142 *topslot = (uint8_t)(ftop - lim);
143 #if LJ_FR2
144 lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
145 return 2;
146 #else
147 lua_assert(f == (MSize)(1 + J->framedepth));
148 return f;
149 #endif
150 }
151
152 /* Take a snapshot of the current stack. */
snapshot_stack(jit_State * J,SnapShot * snap,MSize nsnapmap)153 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
154 {
155 BCReg nslots = J->baseslot + J->maxslot;
156 MSize nent;
157 SnapEntry *p;
158 /* Conservative estimate. */
159 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
160 p = &J->cur.snapmap[nsnapmap];
161 nent = snapshot_slots(J, p, nslots);
162 snap->nent = (uint8_t)nent;
163 nent += snapshot_framelinks(J, p + nent, &snap->topslot);
164 snap->mapofs = (uint16_t)nsnapmap;
165 snap->ref = (IRRef1)J->cur.nins;
166 snap->nslots = (uint8_t)nslots;
167 snap->count = 0;
168 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
169 }
170
171 /* Add or merge a snapshot. */
lj_snap_add(jit_State * J)172 void lj_snap_add(jit_State *J)
173 {
174 MSize nsnap = J->cur.nsnap;
175 MSize nsnapmap = J->cur.nsnapmap;
176 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
177 if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
178 (J->mergesnap && !irt_isguard(J->guardemit))) {
179 if (nsnap == 1) { /* But preserve snap #0 PC. */
180 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
181 goto nomerge;
182 }
183 nsnapmap = J->cur.snap[--nsnap].mapofs;
184 } else {
185 nomerge:
186 lj_snap_grow_buf(J, nsnap+1);
187 J->cur.nsnap = (uint16_t)(nsnap+1);
188 }
189 J->mergesnap = 0;
190 J->guardemit.irt = 0;
191 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
192 }
193
194 /* -- Snapshot modification ----------------------------------------------- */
195
196 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
197
198 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
snap_usedef(jit_State * J,uint8_t * udf,const BCIns * pc,BCReg maxslot)199 static BCReg snap_usedef(jit_State *J, uint8_t *udf,
200 const BCIns *pc, BCReg maxslot)
201 {
202 BCReg s;
203 GCobj *o;
204
205 if (maxslot == 0) return 0;
206 #ifdef LUAJIT_USE_VALGRIND
207 /* Avoid errors for harmless reads beyond maxslot. */
208 memset(udf, 1, SNAP_USEDEF_SLOTS);
209 #else
210 memset(udf, 1, maxslot);
211 #endif
212
213 /* Treat open upvalues as used. */
214 o = gcref(J->L->openupval);
215 while (o) {
216 if (uvval(gco2uv(o)) < J->L->base) break;
217 udf[uvval(gco2uv(o)) - J->L->base] = 0;
218 o = gcref(o->gch.nextgc);
219 }
220
221 #define USE_SLOT(s) udf[(s)] &= ~1
222 #define DEF_SLOT(s) udf[(s)] *= 3
223
224 /* Scan through following bytecode and check for uses/defs. */
225 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
226 for (;;) {
227 BCIns ins = *pc++;
228 BCOp op = bc_op(ins);
229 switch (bcmode_b(op)) {
230 case BCMvar: USE_SLOT(bc_b(ins)); break;
231 default: break;
232 }
233 switch (bcmode_c(op)) {
234 case BCMvar: USE_SLOT(bc_c(ins)); break;
235 case BCMrbase:
236 lua_assert(op == BC_CAT);
237 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
238 for (; s < maxslot; s++) DEF_SLOT(s);
239 break;
240 case BCMjump:
241 handle_jump: {
242 BCReg minslot = bc_a(ins);
243 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
244 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
245 else if (op == BC_UCLO) { pc += bc_j(ins); break; }
246 for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
247 return minslot < maxslot ? minslot : maxslot;
248 }
249 case BCMlit:
250 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
251 goto handle_jump;
252 } else if (bc_isret(op)) {
253 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
254 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
255 for (; s < top; s++) USE_SLOT(s);
256 for (; s < maxslot; s++) DEF_SLOT(s);
257 return 0;
258 }
259 break;
260 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
261 default: break;
262 }
263 switch (bcmode_a(op)) {
264 case BCMvar: USE_SLOT(bc_a(ins)); break;
265 case BCMdst:
266 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
267 break;
268 case BCMbase:
269 if (op >= BC_CALLM && op <= BC_VARG) {
270 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
271 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
272 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
273 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
274 for (; s < top; s++) USE_SLOT(s);
275 for (; s < maxslot; s++) DEF_SLOT(s);
276 if (op == BC_CALLT || op == BC_CALLMT) {
277 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
278 return 0;
279 }
280 } else if (op == BC_KNIL) {
281 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
282 } else if (op == BC_TSETM) {
283 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
284 }
285 break;
286 default: break;
287 }
288 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
289 }
290
291 #undef USE_SLOT
292 #undef DEF_SLOT
293
294 return 0; /* unreachable */
295 }
296
297 /* Purge dead slots before the next snapshot. */
lj_snap_purge(jit_State * J)298 void lj_snap_purge(jit_State *J)
299 {
300 uint8_t udf[SNAP_USEDEF_SLOTS];
301 BCReg maxslot = J->maxslot;
302 BCReg s = snap_usedef(J, udf, J->pc, maxslot);
303 for (; s < maxslot; s++)
304 if (udf[s] != 0)
305 J->base[s] = 0; /* Purge dead slots. */
306 }
307
308 /* Shrink last snapshot. */
lj_snap_shrink(jit_State * J)309 void lj_snap_shrink(jit_State *J)
310 {
311 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
312 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
313 MSize n, m, nlim, nent = snap->nent;
314 uint8_t udf[SNAP_USEDEF_SLOTS];
315 BCReg maxslot = J->maxslot;
316 BCReg baseslot = J->baseslot;
317 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
318 maxslot += baseslot;
319 minslot += baseslot;
320 snap->nslots = (uint8_t)maxslot;
321 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
322 BCReg s = snap_slot(map[n]);
323 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
324 map[m++] = map[n]; /* Only copy used slots. */
325 }
326 snap->nent = (uint8_t)m;
327 nlim = J->cur.nsnapmap - snap->mapofs - 1;
328 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
329 J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
330 }
331
332 /* -- Snapshot access ----------------------------------------------------- */
333
334 /* Initialize a Bloom Filter with all renamed refs.
335 ** There are very few renames (often none), so the filter has
336 ** very few bits set. This makes it suitable for negative filtering.
337 */
snap_renamefilter(GCtrace * T,SnapNo lim)338 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
339 {
340 BloomFilter rfilt = 0;
341 IRIns *ir;
342 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
343 if (ir->op2 <= lim)
344 bloomset(rfilt, ir->op1);
345 return rfilt;
346 }
347
348 /* Process matching renames to find the original RegSP. */
snap_renameref(GCtrace * T,SnapNo lim,IRRef ref,RegSP rs)349 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
350 {
351 IRIns *ir;
352 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
353 if (ir->op1 == ref && ir->op2 <= lim)
354 rs = ir->prev;
355 return rs;
356 }
357
358 /* Copy RegSP from parent snapshot to the parent links of the IR. */
lj_snap_regspmap(GCtrace * T,SnapNo snapno,IRIns * ir)359 IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
360 {
361 SnapShot *snap = &T->snap[snapno];
362 SnapEntry *map = &T->snapmap[snap->mapofs];
363 BloomFilter rfilt = snap_renamefilter(T, snapno);
364 MSize n = 0;
365 IRRef ref = 0;
366 for ( ; ; ir++) {
367 uint32_t rs;
368 if (ir->o == IR_SLOAD) {
369 if (!(ir->op2 & IRSLOAD_PARENT)) break;
370 for ( ; ; n++) {
371 lua_assert(n < snap->nent);
372 if (snap_slot(map[n]) == ir->op1) {
373 ref = snap_ref(map[n++]);
374 break;
375 }
376 }
377 } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
378 ref++;
379 } else if (ir->o == IR_PVAL) {
380 ref = ir->op1 + REF_BIAS;
381 } else {
382 break;
383 }
384 rs = T->ir[ref].prev;
385 if (bloomtest(rfilt, ref))
386 rs = snap_renameref(T, snapno, ref, rs);
387 ir->prev = (uint16_t)rs;
388 lua_assert(regsp_used(rs));
389 }
390 return ir;
391 }
392
393 /* -- Snapshot replay ----------------------------------------------------- */
394
395 /* Replay constant from parent trace. */
snap_replay_const(jit_State * J,IRIns * ir)396 static TRef snap_replay_const(jit_State *J, IRIns *ir)
397 {
398 /* Only have to deal with constants that can occur in stack slots. */
399 switch ((IROp)ir->o) {
400 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
401 case IR_KINT: return lj_ir_kint(J, ir->i);
402 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
403 case IR_KNUM: case IR_KINT64:
404 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
405 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
406 default: lua_assert(0); return TREF_NIL; break;
407 }
408 }
409
410 /* De-duplicate parent reference. */
snap_dedup(jit_State * J,SnapEntry * map,MSize nmax,IRRef ref)411 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
412 {
413 MSize j;
414 for (j = 0; j < nmax; j++)
415 if (snap_ref(map[j]) == ref)
416 return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
417 return 0;
418 }
419
420 /* Emit parent reference with de-duplication. */
snap_pref(jit_State * J,GCtrace * T,SnapEntry * map,MSize nmax,BloomFilter seen,IRRef ref)421 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
422 BloomFilter seen, IRRef ref)
423 {
424 IRIns *ir = &T->ir[ref];
425 TRef tr;
426 if (irref_isk(ref))
427 tr = snap_replay_const(J, ir);
428 else if (!regsp_used(ir->prev))
429 tr = 0;
430 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
431 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
432 return tr;
433 }
434
435 /* Check whether a sunk store corresponds to an allocation. Slow path. */
snap_sunk_store2(GCtrace * T,IRIns * ira,IRIns * irs)436 static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
437 {
438 if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
439 irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
440 IRIns *irk = &T->ir[irs->op1];
441 if (irk->o == IR_AREF || irk->o == IR_HREFK)
442 irk = &T->ir[irk->op1];
443 return (&T->ir[irk->op1] == ira);
444 }
445 return 0;
446 }
447
448 /* Check whether a sunk store corresponds to an allocation. Fast path. */
snap_sunk_store(GCtrace * T,IRIns * ira,IRIns * irs)449 static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
450 {
451 if (irs->s != 255)
452 return (ira + irs->s == irs); /* Fast check. */
453 return snap_sunk_store2(T, ira, irs);
454 }
455
456 /* Replay snapshot state to setup side trace. */
lj_snap_replay(jit_State * J,GCtrace * T)457 void lj_snap_replay(jit_State *J, GCtrace *T)
458 {
459 SnapShot *snap = &T->snap[J->exitno];
460 SnapEntry *map = &T->snapmap[snap->mapofs];
461 MSize n, nent = snap->nent;
462 BloomFilter seen = 0;
463 int pass23 = 0;
464 J->framedepth = 0;
465 /* Emit IR for slots inherited from parent snapshot. */
466 for (n = 0; n < nent; n++) {
467 SnapEntry sn = map[n];
468 BCReg s = snap_slot(sn);
469 IRRef ref = snap_ref(sn);
470 IRIns *ir = &T->ir[ref];
471 TRef tr;
472 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
473 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
474 goto setslot;
475 bloomset(seen, ref);
476 if (irref_isk(ref)) {
477 /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
478 if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
479 tr = 0;
480 else
481 tr = snap_replay_const(J, ir);
482 } else if (!regsp_used(ir->prev)) {
483 pass23 = 1;
484 lua_assert(s != 0);
485 tr = s;
486 } else {
487 IRType t = irt_type(ir->t);
488 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
489 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
490 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
491 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
492 }
493 setslot:
494 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
495 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
496 if ((sn & SNAP_FRAME))
497 J->baseslot = s+1;
498 }
499 if (pass23) {
500 IRIns *irlast = &T->ir[snap->ref];
501 pass23 = 0;
502 /* Emit dependent PVALs. */
503 for (n = 0; n < nent; n++) {
504 SnapEntry sn = map[n];
505 IRRef refp = snap_ref(sn);
506 IRIns *ir = &T->ir[refp];
507 if (regsp_reg(ir->r) == RID_SUNK) {
508 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
509 pass23 = 1;
510 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
511 ir->o == IR_CNEW || ir->o == IR_CNEWI);
512 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
513 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
514 if (LJ_HASFFI && ir->o == IR_CNEWI) {
515 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
516 snap_pref(J, T, map, nent, seen, (ir+1)->op2);
517 } else {
518 IRIns *irs;
519 for (irs = ir+1; irs < irlast; irs++)
520 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
521 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
522 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
523 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
524 irs+1 < irlast && (irs+1)->o == IR_HIOP)
525 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
526 }
527 }
528 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
529 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
530 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
531 }
532 }
533 /* Replay sunk instructions. */
534 for (n = 0; pass23 && n < nent; n++) {
535 SnapEntry sn = map[n];
536 IRRef refp = snap_ref(sn);
537 IRIns *ir = &T->ir[refp];
538 if (regsp_reg(ir->r) == RID_SUNK) {
539 TRef op1, op2;
540 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
541 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
542 continue;
543 }
544 op1 = ir->op1;
545 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
546 op2 = ir->op2;
547 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
548 if (LJ_HASFFI && ir->o == IR_CNEWI) {
549 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
550 lj_needsplit(J); /* Emit joining HIOP. */
551 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
552 snap_pref(J, T, map, nent, seen, (ir+1)->op2));
553 }
554 J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
555 } else {
556 IRIns *irs;
557 TRef tr = emitir(ir->ot, op1, op2);
558 J->slot[snap_slot(sn)] = tr;
559 for (irs = ir+1; irs < irlast; irs++)
560 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
561 IRIns *irr = &T->ir[irs->op1];
562 TRef val, key = irr->op2, tmp = tr;
563 if (irr->o != IR_FREF) {
564 IRIns *irk = &T->ir[key];
565 if (irr->o == IR_HREFK)
566 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
567 irk->op2);
568 else
569 key = snap_replay_const(J, irk);
570 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
571 IRIns *irf = &T->ir[irr->op1];
572 tmp = emitir(irf->ot, tmp, irf->op2);
573 }
574 }
575 tmp = emitir(irr->ot, tmp, key);
576 val = snap_pref(J, T, map, nent, seen, irs->op2);
577 if (val == 0) {
578 IRIns *irc = &T->ir[irs->op2];
579 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
580 val = snap_pref(J, T, map, nent, seen, irc->op1);
581 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
582 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
583 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
584 IRType t = IRT_I64;
585 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
586 t = IRT_NUM;
587 lj_needsplit(J);
588 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
589 uint64_t k = (uint32_t)T->ir[irs->op2].i +
590 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
591 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
592 } else {
593 val = emitir_raw(IRT(IR_HIOP, t), val,
594 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
595 }
596 tmp = emitir(IRT(irs->o, t), tmp, val);
597 continue;
598 }
599 tmp = emitir(irs->ot, tmp, val);
600 } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
601 emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
602 }
603 }
604 }
605 }
606 }
607 J->base = J->slot + J->baseslot;
608 J->maxslot = snap->nslots - J->baseslot;
609 lj_snap_add(J);
610 if (pass23) /* Need explicit GC step _after_ initial snapshot. */
611 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
612 }
613
614 /* -- Snapshot restore ---------------------------------------------------- */
615
616 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
617 SnapNo snapno, BloomFilter rfilt,
618 IRIns *ir, TValue *o);
619
620 /* Restore a value from the trace exit state. */
snap_restoreval(jit_State * J,GCtrace * T,ExitState * ex,SnapNo snapno,BloomFilter rfilt,IRRef ref,TValue * o)621 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
622 SnapNo snapno, BloomFilter rfilt,
623 IRRef ref, TValue *o)
624 {
625 IRIns *ir = &T->ir[ref];
626 IRType1 t = ir->t;
627 RegSP rs = ir->prev;
628 if (irref_isk(ref)) { /* Restore constant slot. */
629 lj_ir_kvalue(J->L, o, ir);
630 return;
631 }
632 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
633 rs = snap_renameref(T, snapno, ref, rs);
634 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
635 int32_t *sps = &ex->spill[regsp_spill(rs)];
636 if (irt_isinteger(t)) {
637 setintV(o, *sps);
638 #if !LJ_SOFTFP
639 } else if (irt_isnum(t)) {
640 o->u64 = *(uint64_t *)sps;
641 #endif
642 #if LJ_64 && !LJ_GC64
643 } else if (irt_islightud(t)) {
644 /* 64 bit lightuserdata which may escape already has the tag bits. */
645 o->u64 = *(uint64_t *)sps;
646 #endif
647 } else {
648 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
649 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
650 }
651 } else { /* Restore from register. */
652 Reg r = regsp_reg(rs);
653 if (ra_noreg(r)) {
654 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
655 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
656 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
657 return;
658 } else if (irt_isinteger(t)) {
659 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
660 #if !LJ_SOFTFP
661 } else if (irt_isnum(t)) {
662 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
663 #endif
664 #if LJ_64 && !LJ_GC64
665 } else if (irt_is64(t)) {
666 /* 64 bit values that already have the tag bits. */
667 o->u64 = ex->gpr[r-RID_MIN_GPR];
668 #endif
669 } else if (irt_ispri(t)) {
670 setpriV(o, irt_toitype(t));
671 } else {
672 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
673 }
674 }
675 }
676
677 #if LJ_HASFFI
678 /* Restore raw data from the trace exit state. */
snap_restoredata(GCtrace * T,ExitState * ex,SnapNo snapno,BloomFilter rfilt,IRRef ref,void * dst,CTSize sz)679 static void snap_restoredata(GCtrace *T, ExitState *ex,
680 SnapNo snapno, BloomFilter rfilt,
681 IRRef ref, void *dst, CTSize sz)
682 {
683 IRIns *ir = &T->ir[ref];
684 RegSP rs = ir->prev;
685 int32_t *src;
686 uint64_t tmp;
687 if (irref_isk(ref)) {
688 if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
689 src = (int32_t *)&ir[1];
690 } else if (sz == 8) {
691 tmp = (uint64_t)(uint32_t)ir->i;
692 src = (int32_t *)&tmp;
693 } else {
694 src = &ir->i;
695 }
696 } else {
697 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
698 rs = snap_renameref(T, snapno, ref, rs);
699 if (ra_hasspill(regsp_spill(rs))) {
700 src = &ex->spill[regsp_spill(rs)];
701 if (sz == 8 && !irt_is64(ir->t)) {
702 tmp = (uint64_t)(uint32_t)*src;
703 src = (int32_t *)&tmp;
704 }
705 } else {
706 Reg r = regsp_reg(rs);
707 if (ra_noreg(r)) {
708 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
709 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
710 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
711 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
712 return;
713 }
714 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
715 #if !LJ_SOFTFP
716 if (r >= RID_MAX_GPR) {
717 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
718 #if LJ_TARGET_PPC
719 if (sz == 4) { /* PPC FPRs are always doubles. */
720 *(float *)dst = (float)*(double *)src;
721 return;
722 }
723 #else
724 if (LJ_BE && sz == 4) src++;
725 #endif
726 } else
727 #endif
728 if (LJ_64 && LJ_BE && sz == 4) src++;
729 }
730 }
731 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
732 if (sz == 4) *(int32_t *)dst = *src;
733 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
734 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
735 else *(int16_t *)dst = (int16_t)*src;
736 }
737 #endif
738
739 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
snap_unsink(jit_State * J,GCtrace * T,ExitState * ex,SnapNo snapno,BloomFilter rfilt,IRIns * ir,TValue * o)740 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
741 SnapNo snapno, BloomFilter rfilt,
742 IRIns *ir, TValue *o)
743 {
744 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
745 ir->o == IR_CNEW || ir->o == IR_CNEWI);
746 #if LJ_HASFFI
747 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
748 CTState *cts = ctype_cts(J->L);
749 CTypeID id = (CTypeID)T->ir[ir->op1].i;
750 CTSize sz;
751 CTInfo info = lj_ctype_info(cts, id, &sz);
752 GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
753 setcdataV(J->L, o, cd);
754 if (ir->o == IR_CNEWI) {
755 uint8_t *p = (uint8_t *)cdataptr(cd);
756 lua_assert(sz == 4 || sz == 8);
757 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
758 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
759 if (LJ_BE) p += 4;
760 sz = 4;
761 }
762 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
763 } else {
764 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
765 for (irs = ir+1; irs < irlast; irs++)
766 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
767 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
768 uint8_t *p = (uint8_t *)cd;
769 CTSize szs;
770 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
771 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
772 if (irt_is64(irs->t)) szs = 8;
773 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
774 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
775 else szs = 4;
776 if (LJ_64 && iro->o == IR_KINT64)
777 p += (int64_t)ir_k64(iro)->u64;
778 else
779 p += iro->i;
780 lua_assert(p >= (uint8_t *)cdataptr(cd) &&
781 p + szs <= (uint8_t *)cdataptr(cd) + sz);
782 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
783 lua_assert(szs == 4);
784 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
785 if (LJ_BE) p += 4;
786 }
787 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
788 }
789 }
790 } else
791 #endif
792 {
793 IRIns *irs, *irlast;
794 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
795 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
796 settabV(J->L, o, t);
797 irlast = &T->ir[T->snap[snapno].ref];
798 for (irs = ir+1; irs < irlast; irs++)
799 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
800 IRIns *irk = &T->ir[irs->op1];
801 TValue tmp, *val;
802 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
803 irs->o == IR_FSTORE);
804 if (irk->o == IR_FREF) {
805 lua_assert(irk->op2 == IRFL_TAB_META);
806 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
807 /* NOBARRIER: The table is new (marked white). */
808 setgcref(t->metatable, obj2gco(tabV(&tmp)));
809 } else {
810 irk = &T->ir[irk->op2];
811 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
812 lj_ir_kvalue(J->L, &tmp, irk);
813 val = lj_tab_set(J->L, t, &tmp);
814 /* NOBARRIER: The table is new (marked white). */
815 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
816 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
817 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
818 val->u32.hi = tmp.u32.lo;
819 }
820 }
821 }
822 }
823 }
824
825 /* Restore interpreter state from exit state with the help of a snapshot. */
lj_snap_restore(jit_State * J,void * exptr)826 const BCIns *lj_snap_restore(jit_State *J, void *exptr)
827 {
828 ExitState *ex = (ExitState *)exptr;
829 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
830 GCtrace *T = traceref(J, J->parent);
831 SnapShot *snap = &T->snap[snapno];
832 MSize n, nent = snap->nent;
833 SnapEntry *map = &T->snapmap[snap->mapofs];
834 #if !LJ_FR2 || defined(LUA_USE_ASSERT)
835 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
836 #endif
837 #if !LJ_FR2
838 ptrdiff_t ftsz0;
839 #endif
840 TValue *frame;
841 BloomFilter rfilt = snap_renamefilter(T, snapno);
842 const BCIns *pc = snap_pc(&map[nent]);
843 lua_State *L = J->L;
844
845 /* Set interpreter PC to the next PC to get correct error messages. */
846 setcframe_pc(cframe_raw(L->cframe), pc+1);
847
848 /* Make sure the stack is big enough for the slots from the snapshot. */
849 if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
850 L->top = curr_topL(L);
851 lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
852 }
853
854 /* Fill stack slots with data from the registers and spill slots. */
855 frame = L->base-1-LJ_FR2;
856 #if !LJ_FR2
857 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
858 #endif
859 for (n = 0; n < nent; n++) {
860 SnapEntry sn = map[n];
861 if (!(sn & SNAP_NORESTORE)) {
862 TValue *o = &frame[snap_slot(sn)];
863 IRRef ref = snap_ref(sn);
864 IRIns *ir = &T->ir[ref];
865 if (ir->r == RID_SUNK) {
866 MSize j;
867 for (j = 0; j < n; j++)
868 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
869 copyTV(L, o, &frame[snap_slot(map[j])]);
870 goto dupslot;
871 }
872 snap_unsink(J, T, ex, snapno, rfilt, ir, o);
873 dupslot:
874 continue;
875 }
876 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
877 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
878 TValue tmp;
879 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
880 o->u32.hi = tmp.u32.lo;
881 #if !LJ_FR2
882 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
883 /* Overwrite tag with frame link. */
884 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
885 L->base = o+1;
886 #endif
887 }
888 }
889 }
890 #if LJ_FR2
891 L->base += (map[nent+LJ_BE] & 0xff);
892 #endif
893 lua_assert(map + nent == flinks);
894
895 /* Compute current stack top. */
896 switch (bc_op(*pc)) {
897 default:
898 if (bc_op(*pc) < BC_FUNCF) {
899 L->top = curr_topL(L);
900 break;
901 }
902 /* fallthrough */
903 case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
904 L->top = frame + snap->nslots;
905 break;
906 }
907 return pc;
908 }
909
910 #undef emitir_raw
911 #undef emitir
912
913 #endif
914