1 /*
2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3 ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4 */
5 
6 #define lj_opt_split_c
7 #define LUA_CORE
8 
9 #include "lj_obj.h"
10 
11 #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
12 
13 #include "lj_err.h"
14 #include "lj_buf.h"
15 #include "lj_ir.h"
16 #include "lj_jit.h"
17 #include "lj_ircall.h"
18 #include "lj_iropt.h"
19 #include "lj_dispatch.h"
20 #include "lj_vm.h"
21 
22 /* SPLIT pass:
23 **
24 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
25 ** instructions. It's only active for soft-float targets or for 32 bit CPUs
26 ** which lack native 64 bit integer operations (the FFI is currently the
27 ** only emitter for 64 bit integer instructions).
28 **
29 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
30 ** backend simple. Only a small amount of extra functionality needs to be
31 ** implemented. This is much easier than adding support for allocating
32 ** register pairs to each backend (believe me, I tried). A few simple, but
33 ** important optimizations can be performed by the SPLIT pass, which would
34 ** be tedious to do in the backend.
35 **
36 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
37 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
38 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
39 ** immediately follow it's counterpart. The actual functionality of HIOP is
40 ** inferred from the previous instruction.
41 **
42 ** The operands of HIOP hold the hiword input references. The output of HIOP
43 ** is the hiword output reference, which is also used to hold the hiword
44 ** register or spill slot information. The register allocator treats this
45 ** instruction independently of any other instruction, which improves code
46 ** quality compared to using fixed register pairs.
47 **
48 ** It's easier to split up some instructions into two regular 32 bit
49 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
50 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
51 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
52 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
53 ** are split up into two 32 bit arguments each.
54 **
55 ** On soft-float targets, floating-point instructions are directly converted
56 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
57 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
58 **
59 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
60 ** two int64_t fields:
61 **
62 ** 0100    p32 ADD    base  +8
63 ** 0101    i64 XLOAD  0100
64 ** 0102    i64 ADD    0101  +1
65 ** 0103    p32 ADD    base  +16
66 ** 0104    i64 XSTORE 0103  0102
67 **
68 **         mov rax, [esi+0x8]
69 **         add rax, +0x01
70 **         mov [esi+0x10], rax
71 **
72 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
73 **
74 ** 0100    p32 ADD    base  +8
75 ** 0101    int XLOAD  0100
76 ** 0102    p32 ADD    base  +12
77 ** 0103    int XLOAD  0102
78 ** 0104    int ADD    0101  +1
79 ** 0105    int HIOP   0103  +0
80 ** 0106    p32 ADD    base  +16
81 ** 0107    int XSTORE 0106  0104
82 ** 0108    int HIOP   0106  0105
83 **
84 **         mov eax, [esi+0x8]
85 **         mov ecx, [esi+0xc]
86 **         add eax, +0x01
87 **         adc ecx, +0x00
88 **         mov [esi+0x10], eax
89 **         mov [esi+0x14], ecx
90 **
91 ** You may notice the reassociated hiword address computation, which is
92 ** later fused into the mov operands by the assembler.
93 */
94 
95 /* Some local macros to save typing. Undef'd at the end. */
96 #define IR(ref)		(&J->cur.ir[(ref)])
97 
98 /* Directly emit the transformed IR without updating chains etc. */
split_emit(jit_State * J,uint16_t ot,IRRef1 op1,IRRef1 op2)99 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
100 {
101   IRRef nref = lj_ir_nextins(J);
102   IRIns *ir = IR(nref);
103   ir->ot = ot;
104   ir->op1 = op1;
105   ir->op2 = op2;
106   return nref;
107 }
108 
109 #if LJ_SOFTFP
110 /* Emit a (checked) number to integer conversion. */
split_num2int(jit_State * J,IRRef lo,IRRef hi,int check)111 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
112 {
113   IRRef tmp, res;
114 #if LJ_LE
115   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
116 #else
117   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
118 #endif
119   res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
120   if (check) {
121     tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
122     split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
123     split_emit(J, IRTGI(IR_EQ), tmp, lo);
124     split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
125   }
126   return res;
127 }
128 
129 /* Emit a CALLN with one split 64 bit argument. */
split_call_l(jit_State * J,IRRef1 * hisubst,IRIns * oir,IRIns * ir,IRCallID id)130 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
131 			  IRIns *ir, IRCallID id)
132 {
133   IRRef tmp, op1 = ir->op1;
134   J->cur.nins--;
135 #if LJ_LE
136   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
137 #else
138   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
139 #endif
140   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
141   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
142 }
143 #endif
144 
145 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
split_call_li(jit_State * J,IRRef1 * hisubst,IRIns * oir,IRIns * ir,IRCallID id)146 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
147 			   IRIns *ir, IRCallID id)
148 {
149   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
150   J->cur.nins--;
151 #if LJ_LE
152   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
153 #else
154   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
155 #endif
156   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
157   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
158   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
159 }
160 
161 /* Emit a CALLN with two split 64 bit arguments. */
split_call_ll(jit_State * J,IRRef1 * hisubst,IRIns * oir,IRIns * ir,IRCallID id)162 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
163 			   IRIns *ir, IRCallID id)
164 {
165   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
166   J->cur.nins--;
167 #if LJ_LE
168   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
169   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
170   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
171 #else
172   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
173   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
174   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
175 #endif
176   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
177   return split_emit(J,
178     IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
179     tmp, tmp);
180 }
181 
182 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
split_ptr(jit_State * J,IRIns * oir,IRRef ref)183 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
184 {
185   IRRef nref = oir[ref].prev;
186   IRIns *ir = IR(nref);
187   int32_t ofs = 4;
188   if (ir->o == IR_KPTR)
189     return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
190   if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
191     /* Reassociate address. */
192     ofs += IR(ir->op2)->i;
193     nref = ir->op1;
194     if (ofs == 0) return nref;
195   }
196   return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
197 }
198 
199 #if LJ_HASFFI
split_bitshift(jit_State * J,IRRef1 * hisubst,IRIns * oir,IRIns * nir,IRIns * ir)200 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
201 			    IRIns *oir, IRIns *nir, IRIns *ir)
202 {
203   IROp op = ir->o;
204   IRRef kref = nir->op2;
205   if (irref_isk(kref)) {  /* Optimize constant shifts. */
206     int32_t k = (IR(kref)->i & 63);
207     IRRef lo = nir->op1, hi = hisubst[ir->op1];
208     if (op == IR_BROL || op == IR_BROR) {
209       if (op == IR_BROR) k = (-k & 63);
210       if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
211       if (k == 0) {
212       passthrough:
213 	J->cur.nins--;
214 	ir->prev = lo;
215 	return hi;
216       } else {
217 	TRef k1, k2;
218 	IRRef t1, t2, t3, t4;
219 	J->cur.nins--;
220 	k1 = lj_ir_kint(J, k);
221 	k2 = lj_ir_kint(J, (-k & 31));
222 	t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
223 	t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
224 	t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
225 	t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
226 	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
227 	return split_emit(J, IRTI(IR_BOR), t2, t3);
228       }
229     } else if (k == 0) {
230       goto passthrough;
231     } else if (k < 32) {
232       if (op == IR_BSHL) {
233 	IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
234 	IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
235 	return split_emit(J, IRTI(IR_BOR), t1, t2);
236       } else {
237 	IRRef t1 = ir->prev, t2;
238 	lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
239 	nir->o = IR_BSHR;
240 	t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
241 	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
242 	return split_emit(J, IRTI(op), hi, kref);
243       }
244     } else {
245       if (op == IR_BSHL) {
246 	if (k == 32)
247 	  J->cur.nins--;
248 	else
249 	  lo = ir->prev;
250 	ir->prev = lj_ir_kint(J, 0);
251 	return lo;
252       } else {
253 	lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
254 	if (k == 32) {
255 	  J->cur.nins--;
256 	  ir->prev = hi;
257 	} else {
258 	  nir->op1 = hi;
259 	}
260 	if (op == IR_BSHR)
261 	  return lj_ir_kint(J, 0);
262 	else
263 	  return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
264       }
265     }
266   }
267   return split_call_li(J, hisubst, oir, ir,
268 		       op - IR_BSHL + IRCALL_lj_carith_shl64);
269 }
270 
split_bitop(jit_State * J,IRRef1 * hisubst,IRIns * nir,IRIns * ir)271 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
272 			 IRIns *nir, IRIns *ir)
273 {
274   IROp op = ir->o;
275   IRRef hi, kref = nir->op2;
276   if (irref_isk(kref)) {  /* Optimize bit operations with lo constant. */
277     int32_t k = IR(kref)->i;
278     if (k == 0 || k == -1) {
279       if (op == IR_BAND) k = ~k;
280       if (k == 0) {
281 	J->cur.nins--;
282 	ir->prev = nir->op1;
283       } else if (op == IR_BXOR) {
284 	nir->o = IR_BNOT;
285 	nir->op2 = 0;
286       } else {
287 	J->cur.nins--;
288 	ir->prev = kref;
289       }
290     }
291   }
292   hi = hisubst[ir->op1];
293   kref = hisubst[ir->op2];
294   if (irref_isk(kref)) {  /* Optimize bit operations with hi constant. */
295     int32_t k = IR(kref)->i;
296     if (k == 0 || k == -1) {
297       if (op == IR_BAND) k = ~k;
298       if (k == 0) {
299 	return hi;
300       } else if (op == IR_BXOR) {
301 	return split_emit(J, IRTI(IR_BNOT), hi, 0);
302       } else {
303 	return kref;
304       }
305     }
306   }
307   return split_emit(J, IRTI(op), hi, kref);
308 }
309 #endif
310 
311 /* Substitute references of a snapshot. */
split_subst_snap(jit_State * J,SnapShot * snap,IRIns * oir)312 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
313 {
314   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
315   MSize n, nent = snap->nent;
316   for (n = 0; n < nent; n++) {
317     SnapEntry sn = map[n];
318     IRIns *ir = &oir[snap_ref(sn)];
319     if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
320       map[n] = ((sn & 0xffff0000) | ir->prev);
321   }
322 }
323 
324 /* Transform the old IR to the new IR. */
split_ir(jit_State * J)325 static void split_ir(jit_State *J)
326 {
327   IRRef nins = J->cur.nins, nk = J->cur.nk;
328   MSize irlen = nins - nk;
329   MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
330   IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
331   IRRef1 *hisubst;
332   IRRef ref, snref;
333   SnapShot *snap;
334 
335   /* Copy old IR to buffer. */
336   memcpy(oir, IR(nk), irlen*sizeof(IRIns));
337   /* Bias hiword substitution table and old IR. Loword kept in field prev. */
338   hisubst = (IRRef1 *)&oir[irlen] - nk;
339   oir -= nk;
340 
341   /* Remove all IR instructions, but retain IR constants. */
342   J->cur.nins = REF_FIRST;
343   J->loopref = 0;
344 
345   /* Process constants and fixed references. */
346   for (ref = nk; ref <= REF_BASE; ref++) {
347     IRIns *ir = &oir[ref];
348     if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
349       /* Split up 64 bit constant. */
350       TValue tv = *ir_k64(ir);
351       ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
352       hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
353     } else {
354       ir->prev = ref;  /* Identity substitution for loword. */
355       hisubst[ref] = 0;
356     }
357     if (irt_is64(ir->t) && ir->o != IR_KNULL)
358       ref++;
359   }
360 
361   /* Process old IR instructions. */
362   snap = J->cur.snap;
363   snref = snap->ref;
364   for (ref = REF_FIRST; ref < nins; ref++) {
365     IRIns *ir = &oir[ref];
366     IRRef nref = lj_ir_nextins(J);
367     IRIns *nir = IR(nref);
368     IRRef hi = 0;
369 
370     if (ref >= snref) {
371       snap->ref = nref;
372       split_subst_snap(J, snap++, oir);
373       snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
374     }
375 
376     /* Copy-substitute old instruction to new instruction. */
377     nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
378     nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
379     ir->prev = nref;  /* Loword substitution. */
380     nir->o = ir->o;
381     nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
382     hisubst[ref] = 0;
383 
384     /* Split 64 bit instructions. */
385 #if LJ_SOFTFP
386     if (irt_isnum(ir->t)) {
387       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
388       /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
389       switch (ir->o) {
390       case IR_ADD:
391 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
392 	break;
393       case IR_SUB:
394 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
395 	break;
396       case IR_MUL:
397 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
398 	break;
399       case IR_DIV:
400 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
401 	break;
402       case IR_POW:
403 	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
404 	break;
405       case IR_FPMATH:
406 	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
407 	break;
408       case IR_LDEXP:
409 	hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
410 	break;
411       case IR_NEG: case IR_ABS:
412 	nir->o = IR_CONV;  /* Pass through loword. */
413 	nir->op2 = (IRT_INT << 5) | IRT_INT;
414 	hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
415 	       hisubst[ir->op1],
416 	       lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
417 	break;
418       case IR_SLOAD:
419 	if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from int to number. */
420 	  nir->op2 &= ~IRSLOAD_CONVERT;
421 	  ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
422 				       IRCALL_softfp_i2d);
423 	  hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
424 	  break;
425 	}
426 	/* fallthrough */
427       case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
428       case IR_STRTO:
429 	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
430 	break;
431       case IR_FLOAD:
432 	lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
433 	hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
434 	nir->op2 += LJ_BE*4;
435 	break;
436       case IR_XLOAD: {
437 	IRIns inslo = *nir;  /* Save/undo the emit of the lo XLOAD. */
438 	J->cur.nins--;
439 	hi = split_ptr(J, oir, ir->op1);  /* Insert the hiref ADD. */
440 #if LJ_BE
441 	hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
442 	inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
443 #endif
444 	nref = lj_ir_nextins(J);
445 	nir = IR(nref);
446 	*nir = inslo;  /* Re-emit lo XLOAD. */
447 #if LJ_LE
448 	hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
449 	ir->prev = nref;
450 #else
451 	ir->prev = hi; hi = nref;
452 #endif
453 	break;
454 	}
455       case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
456 	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
457 	break;
458       case IR_CONV: {  /* Conversion to number. Others handled below. */
459 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
460 	UNUSED(st);
461 #if LJ_32 && LJ_HASFFI
462 	if (st == IRT_I64 || st == IRT_U64) {
463 	  hi = split_call_l(J, hisubst, oir, ir,
464 		 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
465 	  break;
466 	}
467 #endif
468 	lj_assertJ(st == IRT_INT ||
469 		   (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
470 		   "bad source type for CONV");
471 	nir->o = IR_CALLN;
472 #if LJ_32 && LJ_HASFFI
473 	nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
474 		   st == IRT_FLOAT ? IRCALL_softfp_f2d :
475 		   IRCALL_softfp_ui2d;
476 #else
477 	nir->op2 = IRCALL_softfp_i2d;
478 #endif
479 	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
480 	break;
481 	}
482       case IR_CALLN:
483       case IR_CALLL:
484       case IR_CALLS:
485       case IR_CALLXS:
486 	goto split_call;
487       case IR_PHI:
488 	if (nir->op1 == nir->op2)
489 	  J->cur.nins--;  /* Drop useless PHIs. */
490 	if (hisubst[ir->op1] != hisubst[ir->op2])
491 	  split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
492 		     hisubst[ir->op1], hisubst[ir->op2]);
493 	break;
494       case IR_HIOP:
495 	J->cur.nins--;  /* Drop joining HIOP. */
496 	ir->prev = nir->op1;
497 	hi = nir->op2;
498 	break;
499       default:
500 	lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
501 		   "bad IR op %d", ir->o);
502 	hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
503 			hisubst[ir->op1], hisubst[ir->op2]);
504 	break;
505       }
506     } else
507 #endif
508 #if LJ_32 && LJ_HASFFI
509     if (irt_isint64(ir->t)) {
510       IRRef hiref = hisubst[ir->op1];
511       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
512       switch (ir->o) {
513       case IR_ADD:
514       case IR_SUB:
515 	/* Use plain op for hiword if loword cannot produce a carry/borrow. */
516 	if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
517 	  ir->prev = nir->op1;  /* Pass through loword. */
518 	  nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
519 	  hi = nref;
520 	  break;
521 	}
522 	/* fallthrough */
523       case IR_NEG:
524 	hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
525 	break;
526       case IR_MUL:
527 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
528 	break;
529       case IR_DIV:
530 	hi = split_call_ll(J, hisubst, oir, ir,
531 			   irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
532 					      IRCALL_lj_carith_divu64);
533 	break;
534       case IR_MOD:
535 	hi = split_call_ll(J, hisubst, oir, ir,
536 			   irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
537 					      IRCALL_lj_carith_modu64);
538 	break;
539       case IR_POW:
540 	hi = split_call_ll(J, hisubst, oir, ir,
541 			   irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
542 					      IRCALL_lj_carith_powu64);
543 	break;
544       case IR_BNOT:
545 	hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
546 	break;
547       case IR_BSWAP:
548 	ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
549 	hi = nref;
550 	break;
551       case IR_BAND: case IR_BOR: case IR_BXOR:
552 	hi = split_bitop(J, hisubst, nir, ir);
553 	break;
554       case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
555 	hi = split_bitshift(J, hisubst, oir, nir, ir);
556 	break;
557       case IR_FLOAD:
558 	lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
559 	hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
560 #if LJ_BE
561 	ir->prev = hi; hi = nref;
562 #endif
563 	break;
564       case IR_XLOAD:
565 	hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
566 #if LJ_BE
567 	ir->prev = hi; hi = nref;
568 #endif
569 	break;
570       case IR_XSTORE:
571 	split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
572 	break;
573       case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
574 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
575 #if LJ_SOFTFP
576 	if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
577 	  hi = split_call_l(J, hisubst, oir, ir,
578 		 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
579 	} else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
580 	  nir->o = IR_CALLN;
581 	  nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
582 	  hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
583 	}
584 #else
585 	if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
586 	  hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
587 	}
588 #endif
589 	else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
590 	  /* Drop cast, since assembler doesn't care. But fwd both parts. */
591 	  hi = hiref;
592 	  goto fwdlo;
593 	} else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
594 	  IRRef k31 = lj_ir_kint(J, 31);
595 	  nir = IR(nref);  /* May have been reallocated. */
596 	  ir->prev = nir->op1;  /* Pass through loword. */
597 	  nir->o = IR_BSAR;  /* hi = bsar(lo, 31). */
598 	  nir->op2 = k31;
599 	  hi = nref;
600 	} else {  /* Zero-extend to 64 bit. */
601 	  hi = lj_ir_kint(J, 0);
602 	  goto fwdlo;
603 	}
604 	break;
605 	}
606       case IR_CALLXS:
607 	goto split_call;
608       case IR_PHI: {
609 	IRRef hiref2;
610 	if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
611 	    nir->op1 == nir->op2)
612 	  J->cur.nins--;  /* Drop useless PHIs. */
613 	hiref2 = hisubst[ir->op2];
614 	if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
615 	  split_emit(J, IRTI(IR_PHI), hiref, hiref2);
616 	break;
617 	}
618       case IR_HIOP:
619 	J->cur.nins--;  /* Drop joining HIOP. */
620 	ir->prev = nir->op1;
621 	hi = nir->op2;
622 	break;
623       default:
624 	lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o);  /* Comparisons. */
625 	split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
626 	break;
627       }
628     } else
629 #endif
630 #if LJ_SOFTFP
631     if (ir->o == IR_SLOAD) {
632       if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from number to int. */
633 	nir->op2 &= ~IRSLOAD_CONVERT;
634 	if (!(nir->op2 & IRSLOAD_TYPECHECK))
635 	  nir->t.irt = IRT_INT;  /* Drop guard. */
636 	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
637 	ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
638       }
639     } else if (ir->o == IR_TOBIT) {
640       IRRef tmp, op1 = ir->op1;
641       J->cur.nins--;
642 #if LJ_LE
643       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
644 #else
645       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
646 #endif
647       ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
648     } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
649       if (hisubst[ir->op1]) {
650 	if (irref_isk(ir->op1))
651 	  nir->op1 = ir->op1;
652 	else
653 	  split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
654       }
655     } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
656       if (irref_isk(ir->op2) && hisubst[ir->op2])
657 	nir->op2 = ir->op2;
658     } else
659 #endif
660     if (ir->o == IR_CONV) {  /* See above, too. */
661       IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
662 #if LJ_32 && LJ_HASFFI
663       if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
664 #if LJ_SOFTFP
665 	if (irt_isfloat(ir->t)) {
666 	  split_call_l(J, hisubst, oir, ir,
667 		       st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
668 	  J->cur.nins--;  /* Drop unused HIOP. */
669 	}
670 #else
671 	if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
672 	  ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
673 				hisubst[ir->op1], nref);
674 	}
675 #endif
676 	else {  /* Truncate to lower 32 bits. */
677 	fwdlo:
678 	  ir->prev = nir->op1;  /* Forward loword. */
679 	  /* Replace with NOP to avoid messing up the snapshot logic. */
680 	  nir->ot = IRT(IR_NOP, IRT_NIL);
681 	  nir->op1 = nir->op2 = 0;
682 	}
683       }
684 #endif
685 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
686       else if (irt_isfloat(ir->t)) {
687 	if (st == IRT_NUM) {
688 	  split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
689 	  J->cur.nins--;  /* Drop unused HIOP. */
690 	} else {
691 	  nir->o = IR_CALLN;
692 	  nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
693 	}
694       } else if (st == IRT_FLOAT) {
695 	nir->o = IR_CALLN;
696 	nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
697       } else
698 #endif
699 #if LJ_SOFTFP
700       if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
701 	if (irt_isguard(ir->t)) {
702 	  lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types");
703 	  J->cur.nins--;
704 	  ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
705 	} else {
706 	  split_call_l(J, hisubst, oir, ir,
707 #if LJ_32 && LJ_HASFFI
708 	    st == IRT_NUM ?
709 	      (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
710 	      (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
711 #else
712 	    IRCALL_softfp_d2i
713 #endif
714 	  );
715 	  J->cur.nins--;  /* Drop unused HIOP. */
716 	}
717       }
718 #endif
719     } else if (ir->o == IR_CALLXS) {
720       IRRef hiref;
721     split_call:
722       hiref = hisubst[ir->op1];
723       if (hiref) {
724 	IROpT ot = nir->ot;
725 	IRRef op2 = nir->op2;
726 	nir->ot = IRT(IR_CARG, IRT_NIL);
727 #if LJ_LE
728 	nir->op2 = hiref;
729 #else
730 	nir->op2 = nir->op1; nir->op1 = hiref;
731 #endif
732 	ir->prev = nref = split_emit(J, ot, nref, op2);
733       }
734       if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
735 	hi = split_emit(J,
736 	  IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
737 	  nref, nref);
738     } else if (ir->o == IR_CARG) {
739       IRRef hiref = hisubst[ir->op1];
740       if (hiref) {
741 	IRRef op2 = nir->op2;
742 #if LJ_LE
743 	nir->op2 = hiref;
744 #else
745 	nir->op2 = nir->op1; nir->op1 = hiref;
746 #endif
747 	ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
748 	nir = IR(nref);
749       }
750       hiref = hisubst[ir->op2];
751       if (hiref) {
752 #if !LJ_TARGET_X86
753 	int carg = 0;
754 	IRIns *cir;
755 	for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
756 	  carg++;
757 	if ((carg & 1) == 0) {  /* Align 64 bit arguments. */
758 	  IRRef op2 = nir->op2;
759 	  nir->op2 = REF_NIL;
760 	  nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
761 	  nir = IR(nref);
762 	}
763 #endif
764 #if LJ_BE
765 	{ IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
766 #endif
767 	ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
768       }
769     } else if (ir->o == IR_CNEWI) {
770       if (hisubst[ir->op2])
771 	split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
772     } else if (ir->o == IR_LOOP) {
773       J->loopref = nref;  /* Needed by assembler. */
774     }
775     hisubst[ref] = hi;  /* Store hiword substitution. */
776   }
777   if (snref == nins) {  /* Substitution for last snapshot. */
778     snap->ref = J->cur.nins;
779     split_subst_snap(J, snap, oir);
780   }
781 
782   /* Add PHI marks. */
783   for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
784     IRIns *ir = IR(ref);
785     if (ir->o != IR_PHI) break;
786     if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
787     if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
788   }
789 }
790 
791 /* Protected callback for split pass. */
cpsplit(lua_State * L,lua_CFunction dummy,void * ud)792 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
793 {
794   jit_State *J = (jit_State *)ud;
795   split_ir(J);
796   UNUSED(L); UNUSED(dummy);
797   return NULL;
798 }
799 
800 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
801 /* Slow, but sure way to check whether a SPLIT pass is needed. */
split_needsplit(jit_State * J)802 static int split_needsplit(jit_State *J)
803 {
804   IRIns *ir, *irend;
805   IRRef ref;
806   for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
807     if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
808       return 1;
809   if (LJ_SOFTFP) {
810     for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
811       if ((IR(ref)->op2 & IRSLOAD_CONVERT))
812 	return 1;
813     if (J->chain[IR_TOBIT])
814       return 1;
815   }
816   for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
817     IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
818     if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
819 	st == IRT_I64 || st == IRT_U64)
820       return 1;
821   }
822   return 0;  /* Nope. */
823 }
824 #endif
825 
826 /* SPLIT pass. */
lj_opt_split(jit_State * J)827 void lj_opt_split(jit_State *J)
828 {
829 #if LJ_SOFTFP
830   if (!J->needsplit)
831     J->needsplit = split_needsplit(J);
832 #else
833   lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
834 #endif
835   if (J->needsplit) {
836     int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
837     if (errcode) {
838       /* Completely reset the trace to avoid inconsistent dump on abort. */
839       J->cur.nins = J->cur.nk = REF_BASE;
840       J->cur.nsnap = 0;
841       lj_err_throw(J->L, errcode);  /* Propagate errors. */
842     }
843   }
844 }
845 
846 #undef IR
847 
848 #endif
849