1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
6 */
7 
8 #define lj_opt_fold_c
9 #define LUA_CORE
10 
11 #include <math.h>
12 
13 #include "lj_obj.h"
14 
15 #if LJ_HASJIT
16 
17 #include "lj_buf.h"
18 #include "lj_str.h"
19 #include "lj_tab.h"
20 #include "lj_ir.h"
21 #include "lj_jit.h"
22 #include "lj_ircall.h"
23 #include "lj_iropt.h"
24 #include "lj_trace.h"
25 #if LJ_HASFFI
26 #include "lj_ctype.h"
27 #include "lj_carith.h"
28 #endif
29 #include "lj_vm.h"
30 #include "lj_strscan.h"
31 #include "lj_strfmt.h"
32 
33 /* Here's a short description how the FOLD engine processes instructions:
34 **
35 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
36 ** The instruction and its operands are used to select matching fold rules.
37 ** These are applied iteratively until a fixed point is reached.
38 **
39 ** The 8 bit opcode of the instruction itself plus the opcodes of the
40 ** two instructions referenced by its operands form a 24 bit key
41 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
42 **
43 ** This key is used for partial matching against the fold rules. The
44 ** left/right operand fields of the key are successively masked with
45 ** the 'any' wildcard, from most specific to least specific:
46 **
47 **   ins left right
48 **   ins any  right
49 **   ins left any
50 **   ins any  any
51 **
52 ** The masked key is used to lookup a matching fold rule in a semi-perfect
53 ** hash table. If a matching rule is found, the related fold function is run.
54 ** Multiple rules can share the same fold function. A fold rule may return
55 ** one of several special values:
56 **
57 ** - NEXTFOLD means no folding was applied, because an additional test
58 **   inside the fold function failed. Matching continues against less
59 **   specific fold rules. Finally the instruction is passed on to CSE.
60 **
61 ** - RETRYFOLD means the instruction was modified in-place. Folding is
62 **   retried as if this instruction had just been received.
63 **
64 ** All other return values are terminal actions -- no further folding is
65 ** applied:
66 **
67 ** - INTFOLD(i) returns a reference to the integer constant i.
68 **
69 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
70 **   without emitting an instruction.
71 **
72 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
73 **   it without passing through any further optimizations.
74 **
75 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
76 **   no result (e.g. guarded assertions): FAILFOLD means the guard would
77 **   always fail, i.e. the current trace is pointless. DROPFOLD means
78 **   the guard is always true and has been eliminated. CONDFOLD is a
79 **   shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
80 **
81 ** - Any other return value is interpreted as an IRRef or TRef. This
82 **   can be a reference to an existing or a newly created instruction.
83 **   Only the least-significant 16 bits (IRRef1) are used to form a TRef
84 **   which is finally returned to the caller.
85 **
86 ** The FOLD engine receives instructions both from the trace recorder and
87 ** substituted instructions from LOOP unrolling. This means all types
88 ** of instructions may end up here, even though the recorder bypasses
89 ** FOLD in some cases. Thus all loads, stores and allocations must have
90 ** an any/any rule to avoid being passed on to CSE.
91 **
92 ** Carefully read the following requirements before adding or modifying
93 ** any fold rules:
94 **
95 ** Requirement #1: All fold rules must preserve their destination type.
96 **
97 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
98 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
99 **
100 ** Requirement #2: Fold rules should not create *new* instructions which
101 ** reference operands *across* PHIs.
102 **
103 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
104 ** left operand is a PHI. Then fleft->op1 would point across the PHI
105 ** frontier to an invariant instruction. Adding a PHI for this instruction
106 ** would be counterproductive. The solution is to add a barrier which
107 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
108 ** The only exception is for recurrences with high latencies like
109 ** repeated int->num->int conversions.
110 **
111 ** One could relax this condition a bit if the referenced instruction is
112 ** a PHI, too. But this often leads to worse code due to excessive
113 ** register shuffling.
114 **
115 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
116 ** Even returning fleft->op1 would be ok, because a new PHI will added,
117 ** if needed. But again, this leads to excessive register shuffling and
118 ** should be avoided.
119 **
120 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
121 ** termination.
122 **
123 ** The goal is optimization, so one primarily wants to add strength-reducing
124 ** rules. This means eliminating an instruction or replacing an instruction
125 ** with one or more simpler instructions. Don't add fold rules which point
126 ** into the other direction.
127 **
128 ** Some rules (like commutativity) do not directly reduce the strength of
129 ** an instruction, but enable other fold rules (e.g. by moving constants
130 ** to the right operand). These rules must be made unidirectional to avoid
131 ** cycles.
132 **
133 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
134 */
135 
136 /* Some local macros to save typing. Undef'd at the end. */
137 #define IR(ref)		(&J->cur.ir[(ref)])
138 #define fins		(&J->fold.ins)
139 #define fleft		(J->fold.left)
140 #define fright		(J->fold.right)
141 #define knumleft	(ir_knum(fleft)->n)
142 #define knumright	(ir_knum(fright)->n)
143 
144 /* Pass IR on to next optimization in chain (FOLD). */
145 #define emitir(ot, a, b)	(lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
146 
147 /* Fold function type. Fastcall on x86 significantly reduces their size. */
148 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
149 
150 /* Macros for the fold specs, so buildvm can recognize them. */
151 #define LJFOLD(x)
152 #define LJFOLDX(x)
153 #define LJFOLDF(name)	static TRef LJ_FASTCALL fold_##name(jit_State *J)
154 /* Note: They must be at the start of a line or buildvm ignores them! */
155 
156 /* Barrier to prevent using operands across PHIs. */
157 #define PHIBARRIER(ir)	if (irt_isphi((ir)->t)) return NEXTFOLD
158 
159 /* Barrier to prevent folding across a GC step.
160 ** GC steps can only happen at the head of a trace and at LOOP.
161 ** And the GC is only driven forward if there's at least one allocation.
162 */
163 #define gcstep_barrier(J, ref) \
164   ((ref) < J->chain[IR_LOOP] && \
165    (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
166     J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
167     J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168     J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
169 
170 /* -- Constant folding for FP numbers ------------------------------------- */
171 
172 LJFOLD(ADD KNUM KNUM)
LJFOLD(SUB KNUM KNUM)173 LJFOLD(SUB KNUM KNUM)
174 LJFOLD(MUL KNUM KNUM)
175 LJFOLD(DIV KNUM KNUM)
176 LJFOLD(LDEXP KNUM KNUM)
177 LJFOLD(MIN KNUM KNUM)
178 LJFOLD(MAX KNUM KNUM)
179 LJFOLDF(kfold_numarith)
180 {
181   lua_Number a = knumleft;
182   lua_Number b = knumright;
183   lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
184   return lj_ir_knum(J, y);
185 }
186 
187 LJFOLD(NEG KNUM FLOAD)
LJFOLD(ABS KNUM FLOAD)188 LJFOLD(ABS KNUM FLOAD)
189 LJFOLDF(kfold_numabsneg)
190 {
191   lua_Number a = knumleft;
192   lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193   return lj_ir_knum(J, y);
194 }
195 
196 LJFOLD(LDEXP KNUM KINT)
LJFOLDF(kfold_ldexp)197 LJFOLDF(kfold_ldexp)
198 {
199 #if LJ_TARGET_X86ORX64
200   UNUSED(J);
201   return NEXTFOLD;
202 #else
203   return lj_ir_knum(J, ldexp(knumleft, fright->i));
204 #endif
205 }
206 
207 LJFOLD(FPMATH KNUM any)
LJFOLDF(kfold_fpmath)208 LJFOLDF(kfold_fpmath)
209 {
210   lua_Number a = knumleft;
211   lua_Number y = lj_vm_foldfpm(a, fins->op2);
212   return lj_ir_knum(J, y);
213 }
214 
215 LJFOLD(CALLN KNUM any)
LJFOLDF(kfold_fpcall1)216 LJFOLDF(kfold_fpcall1)
217 {
218   const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219   if (CCI_TYPE(ci) == IRT_NUM) {
220     double y = ((double (*)(double))ci->func)(knumleft);
221     return lj_ir_knum(J, y);
222   }
223   return NEXTFOLD;
224 }
225 
226 LJFOLD(CALLN CARG IRCALL_atan2)
LJFOLDF(kfold_fpcall2)227 LJFOLDF(kfold_fpcall2)
228 {
229   if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230     const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231     double a = ir_knum(IR(fleft->op1))->n;
232     double b = ir_knum(IR(fleft->op2))->n;
233     double y = ((double (*)(double, double))ci->func)(a, b);
234     return lj_ir_knum(J, y);
235   }
236   return NEXTFOLD;
237 }
238 
239 LJFOLD(POW KNUM KINT)
LJFOLD(POW KNUM KNUM)240 LJFOLD(POW KNUM KNUM)
241 LJFOLDF(kfold_numpow)
242 {
243   lua_Number a = knumleft;
244   lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
245   lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
246   return lj_ir_knum(J, y);
247 }
248 
249 /* Must not use kfold_kref for numbers (could be NaN). */
250 LJFOLD(EQ KNUM KNUM)
LJFOLD(NE KNUM KNUM)251 LJFOLD(NE KNUM KNUM)
252 LJFOLD(LT KNUM KNUM)
253 LJFOLD(GE KNUM KNUM)
254 LJFOLD(LE KNUM KNUM)
255 LJFOLD(GT KNUM KNUM)
256 LJFOLD(ULT KNUM KNUM)
257 LJFOLD(UGE KNUM KNUM)
258 LJFOLD(ULE KNUM KNUM)
259 LJFOLD(UGT KNUM KNUM)
260 LJFOLDF(kfold_numcomp)
261 {
262   return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
263 }
264 
265 /* -- Constant folding for 32 bit integers -------------------------------- */
266 
kfold_intop(int32_t k1,int32_t k2,IROp op)267 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
268 {
269   switch (op) {
270   case IR_ADD: k1 += k2; break;
271   case IR_SUB: k1 -= k2; break;
272   case IR_MUL: k1 *= k2; break;
273   case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
274   case IR_NEG: k1 = -k1; break;
275   case IR_BAND: k1 &= k2; break;
276   case IR_BOR: k1 |= k2; break;
277   case IR_BXOR: k1 ^= k2; break;
278   case IR_BSHL: k1 <<= (k2 & 31); break;
279   case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
280   case IR_BSAR: k1 >>= (k2 & 31); break;
281   case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
282   case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
283   case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
284   case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
285   default: lj_assertX(0, "bad IR op %d", op); break;
286   }
287   return k1;
288 }
289 
290 LJFOLD(ADD KINT KINT)
LJFOLD(SUB KINT KINT)291 LJFOLD(SUB KINT KINT)
292 LJFOLD(MUL KINT KINT)
293 LJFOLD(MOD KINT KINT)
294 LJFOLD(NEG KINT KINT)
295 LJFOLD(BAND KINT KINT)
296 LJFOLD(BOR KINT KINT)
297 LJFOLD(BXOR KINT KINT)
298 LJFOLD(BSHL KINT KINT)
299 LJFOLD(BSHR KINT KINT)
300 LJFOLD(BSAR KINT KINT)
301 LJFOLD(BROL KINT KINT)
302 LJFOLD(BROR KINT KINT)
303 LJFOLD(MIN KINT KINT)
304 LJFOLD(MAX KINT KINT)
305 LJFOLDF(kfold_intarith)
306 {
307   return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
308 }
309 
310 LJFOLD(ADDOV KINT KINT)
LJFOLD(SUBOV KINT KINT)311 LJFOLD(SUBOV KINT KINT)
312 LJFOLD(MULOV KINT KINT)
313 LJFOLDF(kfold_intovarith)
314 {
315   lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
316 				 fins->o - IR_ADDOV);
317   int32_t k = lj_num2int(n);
318   if (n != (lua_Number)k)
319     return FAILFOLD;
320   return INTFOLD(k);
321 }
322 
323 LJFOLD(BNOT KINT)
LJFOLDF(kfold_bnot)324 LJFOLDF(kfold_bnot)
325 {
326   return INTFOLD(~fleft->i);
327 }
328 
329 LJFOLD(BSWAP KINT)
LJFOLDF(kfold_bswap)330 LJFOLDF(kfold_bswap)
331 {
332   return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
333 }
334 
335 LJFOLD(LT KINT KINT)
LJFOLD(GE KINT KINT)336 LJFOLD(GE KINT KINT)
337 LJFOLD(LE KINT KINT)
338 LJFOLD(GT KINT KINT)
339 LJFOLD(ULT KINT KINT)
340 LJFOLD(UGE KINT KINT)
341 LJFOLD(ULE KINT KINT)
342 LJFOLD(UGT KINT KINT)
343 LJFOLD(ABC KINT KINT)
344 LJFOLDF(kfold_intcomp)
345 {
346   int32_t a = fleft->i, b = fright->i;
347   switch ((IROp)fins->o) {
348   case IR_LT: return CONDFOLD(a < b);
349   case IR_GE: return CONDFOLD(a >= b);
350   case IR_LE: return CONDFOLD(a <= b);
351   case IR_GT: return CONDFOLD(a > b);
352   case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
353   case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
354   case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
355   case IR_ABC:
356   case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
357   default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
358   }
359 }
360 
361 LJFOLD(UGE any KINT)
LJFOLDF(kfold_intcomp0)362 LJFOLDF(kfold_intcomp0)
363 {
364   if (fright->i == 0)
365     return DROPFOLD;
366   return NEXTFOLD;
367 }
368 
369 /* -- Constant folding for 64 bit integers -------------------------------- */
370 
kfold_int64arith(jit_State * J,uint64_t k1,uint64_t k2,IROp op)371 static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
372 				 IROp op)
373 {
374   UNUSED(J);
375 #if LJ_HASFFI
376   switch (op) {
377   case IR_ADD: k1 += k2; break;
378   case IR_SUB: k1 -= k2; break;
379   case IR_MUL: k1 *= k2; break;
380   case IR_BAND: k1 &= k2; break;
381   case IR_BOR: k1 |= k2; break;
382   case IR_BXOR: k1 ^= k2; break;
383   case IR_BSHL: k1 <<= (k2 & 63); break;
384   case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
385   case IR_BSAR: k1 >>= (k2 & 63); break;
386   case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
387   case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
388   default: lj_assertJ(0, "bad IR op %d", op); break;
389   }
390 #else
391   UNUSED(k2); UNUSED(op);
392   lj_assertJ(0, "FFI IR op without FFI");
393 #endif
394   return k1;
395 }
396 
397 LJFOLD(ADD KINT64 KINT64)
LJFOLD(SUB KINT64 KINT64)398 LJFOLD(SUB KINT64 KINT64)
399 LJFOLD(MUL KINT64 KINT64)
400 LJFOLD(BAND KINT64 KINT64)
401 LJFOLD(BOR KINT64 KINT64)
402 LJFOLD(BXOR KINT64 KINT64)
403 LJFOLDF(kfold_int64arith)
404 {
405   return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
406 				    ir_k64(fright)->u64, (IROp)fins->o));
407 }
408 
409 LJFOLD(DIV KINT64 KINT64)
LJFOLD(MOD KINT64 KINT64)410 LJFOLD(MOD KINT64 KINT64)
411 LJFOLD(POW KINT64 KINT64)
412 LJFOLDF(kfold_int64arith2)
413 {
414 #if LJ_HASFFI
415   uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
416   if (irt_isi64(fins->t)) {
417     k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
418 	 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
419 			     lj_carith_powi64((int64_t)k1, (int64_t)k2);
420   } else {
421     k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
422 	 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
423 			     lj_carith_powu64(k1, k2);
424   }
425   return INT64FOLD(k1);
426 #else
427   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
428 #endif
429 }
430 
431 LJFOLD(BSHL KINT64 KINT)
LJFOLD(BSHR KINT64 KINT)432 LJFOLD(BSHR KINT64 KINT)
433 LJFOLD(BSAR KINT64 KINT)
434 LJFOLD(BROL KINT64 KINT)
435 LJFOLD(BROR KINT64 KINT)
436 LJFOLDF(kfold_int64shift)
437 {
438 #if LJ_HASFFI
439   uint64_t k = ir_k64(fleft)->u64;
440   int32_t sh = (fright->i & 63);
441   return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
442 #else
443   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
444 #endif
445 }
446 
447 LJFOLD(BNOT KINT64)
LJFOLDF(kfold_bnot64)448 LJFOLDF(kfold_bnot64)
449 {
450 #if LJ_HASFFI
451   return INT64FOLD(~ir_k64(fleft)->u64);
452 #else
453   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
454 #endif
455 }
456 
457 LJFOLD(BSWAP KINT64)
LJFOLDF(kfold_bswap64)458 LJFOLDF(kfold_bswap64)
459 {
460 #if LJ_HASFFI
461   return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
462 #else
463   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
464 #endif
465 }
466 
467 LJFOLD(LT KINT64 KINT64)
LJFOLD(GE KINT64 KINT64)468 LJFOLD(GE KINT64 KINT64)
469 LJFOLD(LE KINT64 KINT64)
470 LJFOLD(GT KINT64 KINT64)
471 LJFOLD(ULT KINT64 KINT64)
472 LJFOLD(UGE KINT64 KINT64)
473 LJFOLD(ULE KINT64 KINT64)
474 LJFOLD(UGT KINT64 KINT64)
475 LJFOLDF(kfold_int64comp)
476 {
477 #if LJ_HASFFI
478   uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
479   switch ((IROp)fins->o) {
480   case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b);
481   case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b);
482   case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b);
483   case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b);
484   case IR_ULT: return CONDFOLD(a < b);
485   case IR_UGE: return CONDFOLD(a >= b);
486   case IR_ULE: return CONDFOLD(a <= b);
487   case IR_UGT: return CONDFOLD(a > b);
488   default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
489   }
490 #else
491   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
492 #endif
493 }
494 
495 LJFOLD(UGE any KINT64)
LJFOLDF(kfold_int64comp0)496 LJFOLDF(kfold_int64comp0)
497 {
498 #if LJ_HASFFI
499   if (ir_k64(fright)->u64 == 0)
500     return DROPFOLD;
501   return NEXTFOLD;
502 #else
503   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
504 #endif
505 }
506 
507 /* -- Constant folding for strings ---------------------------------------- */
508 
509 LJFOLD(SNEW KKPTR KINT)
LJFOLDF(kfold_snew_kptr)510 LJFOLDF(kfold_snew_kptr)
511 {
512   GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
513   return lj_ir_kstr(J, s);
514 }
515 
516 LJFOLD(SNEW any KINT)
LJFOLD(XSNEW any KINT)517 LJFOLD(XSNEW any KINT)
518 LJFOLDF(kfold_snew_empty)
519 {
520   if (fright->i == 0)
521     return lj_ir_kstr(J, &J2G(J)->strempty);
522   return NEXTFOLD;
523 }
524 
525 LJFOLD(STRREF KGC KINT)
LJFOLDF(kfold_strref)526 LJFOLDF(kfold_strref)
527 {
528   GCstr *str = ir_kstr(fleft);
529   lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
530   return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
531 }
532 
533 LJFOLD(STRREF SNEW any)
LJFOLDF(kfold_strref_snew)534 LJFOLDF(kfold_strref_snew)
535 {
536   PHIBARRIER(fleft);
537   if (irref_isk(fins->op2) && fright->i == 0) {
538     return fleft->op1;  /* strref(snew(ptr, len), 0) ==> ptr */
539   } else {
540     /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
541     IRIns *ir = IR(fleft->op1);
542     if (ir->o == IR_STRREF) {
543       IRRef1 str = ir->op1;  /* IRIns * is not valid across emitir. */
544       PHIBARRIER(ir);
545       fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
546       fins->op1 = str;
547       fins->ot = IRT(IR_STRREF, IRT_PGC);
548       return RETRYFOLD;
549     }
550   }
551   return NEXTFOLD;
552 }
553 
554 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
LJFOLDF(kfold_strcmp)555 LJFOLDF(kfold_strcmp)
556 {
557   if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
558     GCstr *a = ir_kstr(IR(fleft->op1));
559     GCstr *b = ir_kstr(IR(fleft->op2));
560     return INTFOLD(lj_str_cmp(a, b));
561   }
562   return NEXTFOLD;
563 }
564 
565 /* -- Constant folding and forwarding for buffers ------------------------- */
566 
567 /*
568 ** Buffer ops perform stores, but their effect is limited to the buffer
569 ** itself. Also, buffer ops are chained: a use of an op implies a use of
570 ** all other ops up the chain. Conversely, if an op is unused, all ops
571 ** up the chain can go unsed. This largely eliminates the need to treat
572 ** them as stores.
573 **
574 ** Alas, treating them as normal (IRM_N) ops doesn't work, because they
575 ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
576 ** or if FOLD is disabled.
577 **
578 ** The compromise is to declare them as loads, emit them like stores and
579 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
580 ** fragments left over from CSE are eliminated by DCE.
581 **
582 ** The string buffer methods emit a USE instead of a BUFSTR to keep the
583 ** chain alive.
584 */
585 
586 LJFOLD(BUFHDR any any)
LJFOLDF(bufhdr_merge)587 LJFOLDF(bufhdr_merge)
588 {
589   return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
590 }
591 
592 LJFOLD(BUFPUT any BUFSTR)
LJFOLDF(bufput_bufstr)593 LJFOLDF(bufput_bufstr)
594 {
595   if ((J->flags & JIT_F_OPT_FWD)) {
596     IRRef hdr = fright->op2;
597     /* New buffer, no other buffer op inbetween and same buffer? */
598     if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
599 	fleft->prev == hdr &&
600 	fleft->op1 == IR(hdr)->op1) {
601       IRRef ref = fins->op1;
602       IR(ref)->op2 = IRBUFHDR_APPEND;  /* Modify BUFHDR. */
603       IR(ref)->op1 = fright->op1;
604       return ref;
605     }
606     /* Replay puts to global temporary buffer. */
607     if (IR(hdr)->op2 == IRBUFHDR_RESET) {
608       IRIns *ir = IR(fright->op1);
609       /* For now only handle single string.reverse .lower .upper .rep. */
610       if (ir->o == IR_CALLL &&
611 	  ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
612 	  ir->op2 <= IRCALL_lj_buf_putstr_rep) {
613 	IRIns *carg1 = IR(ir->op1);
614 	if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
615 	  IRIns *carg2 = IR(carg1->op1);
616 	  if (carg2->op1 == hdr) {
617 	    return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
618 	  }
619 	} else if (carg1->op1 == hdr) {
620 	  return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
621 	}
622       }
623     }
624   }
625   return EMITFOLD;  /* Always emit, CSE later. */
626 }
627 
628 LJFOLD(BUFPUT any any)
LJFOLDF(bufput_kgc)629 LJFOLDF(bufput_kgc)
630 {
631   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
632     GCstr *s2 = ir_kstr(fright);
633     if (s2->len == 0) {  /* Empty string? */
634       return LEFTFOLD;
635     } else {
636       if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
637 	  !irt_isphi(fleft->t)) {  /* Join two constant string puts in a row. */
638 	GCstr *s1 = ir_kstr(IR(fleft->op2));
639 	IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
640 	/* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
641 	IR(fins->op1)->op2 = kref;  /* Modify previous BUFPUT. */
642 	return fins->op1;
643       }
644     }
645   }
646   return EMITFOLD;  /* Always emit, CSE later. */
647 }
648 
649 LJFOLD(BUFSTR any any)
LJFOLDF(bufstr_kfold_cse)650 LJFOLDF(bufstr_kfold_cse)
651 {
652   lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
653 	     fleft->o == IR_CALLL,
654 	     "bad buffer constructor IR op %d", fleft->o);
655   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
656     if (fleft->o == IR_BUFHDR) {  /* No put operations? */
657       if (fleft->op2 == IRBUFHDR_RESET)  /* Empty buffer? */
658 	return lj_ir_kstr(J, &J2G(J)->strempty);
659       fins->op1 = fleft->op1;
660       fins->op2 = fleft->prev;  /* Relies on checks in bufput_append. */
661       return CSEFOLD;
662     } else if (fleft->o == IR_BUFPUT) {
663       IRIns *irb = IR(fleft->op1);
664       if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
665 	return fleft->op2;  /* Shortcut for a single put operation. */
666     }
667   }
668   /* Try to CSE the whole chain. */
669   if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
670     IRRef ref = J->chain[IR_BUFSTR];
671     while (ref) {
672       IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
673       while (ira->o == irb->o && ira->op2 == irb->op2) {
674 	lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
675 		   ira->o == IR_CALLL || ira->o == IR_CARG,
676 		   "bad buffer constructor IR op %d", ira->o);
677 	if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
678 	  return ref;  /* CSE succeeded. */
679 	if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
680 	  break;
681 	ira = IR(ira->op1);
682 	irb = IR(irb->op1);
683       }
684       ref = irs->prev;
685     }
686   }
687   return EMITFOLD;  /* No CSE possible. */
688 }
689 
690 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)691 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
692 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
693 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
694 LJFOLDF(bufput_kfold_op)
695 {
696   if (irref_isk(fleft->op2)) {
697     const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
698     SBuf *sb = lj_buf_tmp_(J->L);
699     sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
700 						       ir_kstr(IR(fleft->op2)));
701     fins->o = IR_BUFPUT;
702     fins->op1 = fleft->op1;
703     fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
704     return RETRYFOLD;
705   }
706   return EMITFOLD;  /* Always emit, CSE later. */
707 }
708 
709 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
LJFOLDF(bufput_kfold_rep)710 LJFOLDF(bufput_kfold_rep)
711 {
712   if (irref_isk(fleft->op2)) {
713     IRIns *irc = IR(fleft->op1);
714     if (irref_isk(irc->op2)) {
715       SBuf *sb = lj_buf_tmp_(J->L);
716       sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
717       fins->o = IR_BUFPUT;
718       fins->op1 = irc->op1;
719       fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
720       return RETRYFOLD;
721     }
722   }
723   return EMITFOLD;  /* Always emit, CSE later. */
724 }
725 
726 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)727 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
728 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
729 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
730 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
731 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
732 LJFOLDF(bufput_kfold_fmt)
733 {
734   IRIns *irc = IR(fleft->op1);
735   lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
736   if (irref_isk(fleft->op2)) {
737     SFormat sf = (SFormat)IR(irc->op2)->i;
738     IRIns *ira = IR(fleft->op2);
739     SBuf *sb = lj_buf_tmp_(J->L);
740     switch (fins->op2) {
741     case IRCALL_lj_strfmt_putfxint:
742       sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
743       break;
744     case IRCALL_lj_strfmt_putfstr:
745       sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
746       break;
747     case IRCALL_lj_strfmt_putfchar:
748       sb = lj_strfmt_putfchar(sb, sf, ira->i);
749       break;
750     case IRCALL_lj_strfmt_putfnum_int:
751     case IRCALL_lj_strfmt_putfnum_uint:
752     case IRCALL_lj_strfmt_putfnum:
753     default: {
754       const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
755       sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
756 							 ir_knum(ira)->n);
757       break;
758       }
759     }
760     fins->o = IR_BUFPUT;
761     fins->op1 = irc->op1;
762     fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
763     return RETRYFOLD;
764   }
765   return EMITFOLD;  /* Always emit, CSE later. */
766 }
767 
768 /* -- Constant folding of pointer arithmetic ------------------------------ */
769 
770 LJFOLD(ADD KGC KINT)
LJFOLD(ADD KGC KINT64)771 LJFOLD(ADD KGC KINT64)
772 LJFOLDF(kfold_add_kgc)
773 {
774   GCobj *o = ir_kgc(fleft);
775 #if LJ_64
776   ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
777 #else
778   ptrdiff_t ofs = fright->i;
779 #endif
780 #if LJ_HASFFI
781   if (irt_iscdata(fleft->t)) {
782     CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
783     if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
784 	ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
785 	ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
786       return lj_ir_kkptr(J, (char *)o + ofs);
787   }
788 #endif
789   return lj_ir_kptr(J, (char *)o + ofs);
790 }
791 
792 LJFOLD(ADD KPTR KINT)
LJFOLD(ADD KPTR KINT64)793 LJFOLD(ADD KPTR KINT64)
794 LJFOLD(ADD KKPTR KINT)
795 LJFOLD(ADD KKPTR KINT64)
796 LJFOLDF(kfold_add_kptr)
797 {
798   void *p = ir_kptr(fleft);
799 #if LJ_64
800   ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
801 #else
802   ptrdiff_t ofs = fright->i;
803 #endif
804   return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
805 }
806 
807 LJFOLD(ADD any KGC)
LJFOLD(ADD any KPTR)808 LJFOLD(ADD any KPTR)
809 LJFOLD(ADD any KKPTR)
810 LJFOLDF(kfold_add_kright)
811 {
812   if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
813     IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
814     return RETRYFOLD;
815   }
816   return NEXTFOLD;
817 }
818 
819 /* -- Constant folding of conversions ------------------------------------- */
820 
821 LJFOLD(TOBIT KNUM KNUM)
LJFOLDF(kfold_tobit)822 LJFOLDF(kfold_tobit)
823 {
824   return INTFOLD(lj_num2bit(knumleft));
825 }
826 
827 LJFOLD(CONV KINT IRCONV_NUM_INT)
LJFOLDF(kfold_conv_kint_num)828 LJFOLDF(kfold_conv_kint_num)
829 {
830   return lj_ir_knum(J, (lua_Number)fleft->i);
831 }
832 
833 LJFOLD(CONV KINT IRCONV_NUM_U32)
LJFOLDF(kfold_conv_kintu32_num)834 LJFOLDF(kfold_conv_kintu32_num)
835 {
836   return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
837 }
838 
839 LJFOLD(CONV KINT IRCONV_INT_I8)
LJFOLD(CONV KINT IRCONV_INT_U8)840 LJFOLD(CONV KINT IRCONV_INT_U8)
841 LJFOLD(CONV KINT IRCONV_INT_I16)
842 LJFOLD(CONV KINT IRCONV_INT_U16)
843 LJFOLDF(kfold_conv_kint_ext)
844 {
845   int32_t k = fleft->i;
846   if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
847   else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
848   else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
849   else k = (uint16_t)k;
850   return INTFOLD(k);
851 }
852 
853 LJFOLD(CONV KINT IRCONV_I64_INT)
LJFOLD(CONV KINT IRCONV_U64_INT)854 LJFOLD(CONV KINT IRCONV_U64_INT)
855 LJFOLD(CONV KINT IRCONV_I64_U32)
856 LJFOLD(CONV KINT IRCONV_U64_U32)
857 LJFOLDF(kfold_conv_kint_i64)
858 {
859   if ((fins->op2 & IRCONV_SEXT))
860     return INT64FOLD((uint64_t)(int64_t)fleft->i);
861   else
862     return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
863 }
864 
865 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
LJFOLDF(kfold_conv_kint64_num_i64)866 LJFOLDF(kfold_conv_kint64_num_i64)
867 {
868   return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
869 }
870 
871 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
LJFOLDF(kfold_conv_kint64_num_u64)872 LJFOLDF(kfold_conv_kint64_num_u64)
873 {
874   return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
875 }
876 
877 LJFOLD(CONV KINT64 IRCONV_INT_I64)
LJFOLD(CONV KINT64 IRCONV_U32_I64)878 LJFOLD(CONV KINT64 IRCONV_U32_I64)
879 LJFOLDF(kfold_conv_kint64_int_i64)
880 {
881   return INTFOLD((int32_t)ir_kint64(fleft)->u64);
882 }
883 
884 LJFOLD(CONV KNUM IRCONV_INT_NUM)
LJFOLDF(kfold_conv_knum_int_num)885 LJFOLDF(kfold_conv_knum_int_num)
886 {
887   lua_Number n = knumleft;
888   int32_t k = lj_num2int(n);
889   if (irt_isguard(fins->t) && n != (lua_Number)k) {
890     /* We're about to create a guard which always fails, like CONV +1.5.
891     ** Some pathological loops cause this during LICM, e.g.:
892     **   local x,k,t = 0,1.5,{1,[1.5]=2}
893     **   for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
894     **   assert(x == 300)
895     */
896     return FAILFOLD;
897   }
898   return INTFOLD(k);
899 }
900 
901 LJFOLD(CONV KNUM IRCONV_U32_NUM)
LJFOLDF(kfold_conv_knum_u32_num)902 LJFOLDF(kfold_conv_knum_u32_num)
903 {
904 #ifdef _MSC_VER
905   {  /* Workaround for MSVC bug. */
906     volatile uint32_t u = (uint32_t)knumleft;
907     return INTFOLD((int32_t)u);
908   }
909 #else
910   return INTFOLD((int32_t)(uint32_t)knumleft);
911 #endif
912 }
913 
914 LJFOLD(CONV KNUM IRCONV_I64_NUM)
LJFOLDF(kfold_conv_knum_i64_num)915 LJFOLDF(kfold_conv_knum_i64_num)
916 {
917   return INT64FOLD((uint64_t)(int64_t)knumleft);
918 }
919 
920 LJFOLD(CONV KNUM IRCONV_U64_NUM)
LJFOLDF(kfold_conv_knum_u64_num)921 LJFOLDF(kfold_conv_knum_u64_num)
922 {
923   return INT64FOLD(lj_num2u64(knumleft));
924 }
925 
926 LJFOLD(TOSTR KNUM any)
LJFOLDF(kfold_tostr_knum)927 LJFOLDF(kfold_tostr_knum)
928 {
929   return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
930 }
931 
932 LJFOLD(TOSTR KINT any)
LJFOLDF(kfold_tostr_kint)933 LJFOLDF(kfold_tostr_kint)
934 {
935   return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
936 		       lj_strfmt_int(J->L, fleft->i) :
937 		       lj_strfmt_char(J->L, fleft->i));
938 }
939 
940 LJFOLD(STRTO KGC)
LJFOLDF(kfold_strto)941 LJFOLDF(kfold_strto)
942 {
943   TValue n;
944   if (lj_strscan_num(ir_kstr(fleft), &n))
945     return lj_ir_knum(J, numV(&n));
946   return FAILFOLD;
947 }
948 
949 /* -- Constant folding of equality checks --------------------------------- */
950 
951 /* Don't constant-fold away FLOAD checks against KNULL. */
952 LJFOLD(EQ FLOAD KNULL)
LJFOLD(NE FLOAD KNULL)953 LJFOLD(NE FLOAD KNULL)
954 LJFOLDX(lj_opt_cse)
955 
956 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
957 LJFOLD(EQ any KNULL)
958 LJFOLD(NE any KNULL)
959 LJFOLD(EQ KNULL any)
960 LJFOLD(NE KNULL any)
961 LJFOLD(EQ KINT KINT)  /* Constants are unique, so same refs <==> same value. */
962 LJFOLD(NE KINT KINT)
963 LJFOLD(EQ KINT64 KINT64)
964 LJFOLD(NE KINT64 KINT64)
965 LJFOLD(EQ KGC KGC)
966 LJFOLD(NE KGC KGC)
967 LJFOLDF(kfold_kref)
968 {
969   return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
970 }
971 
972 /* -- Algebraic shortcuts ------------------------------------------------- */
973 
974 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
LJFOLD(FPMATH FPMATH IRFPM_CEIL)975 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
976 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
977 LJFOLDF(shortcut_round)
978 {
979   IRFPMathOp op = (IRFPMathOp)fleft->op2;
980   if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
981     return LEFTFOLD;  /* round(round_left(x)) = round_left(x) */
982   return NEXTFOLD;
983 }
984 
985 LJFOLD(ABS ABS FLOAD)
LJFOLDF(shortcut_left)986 LJFOLDF(shortcut_left)
987 {
988   return LEFTFOLD;  /* f(g(x)) ==> g(x) */
989 }
990 
991 LJFOLD(ABS NEG FLOAD)
LJFOLDF(shortcut_dropleft)992 LJFOLDF(shortcut_dropleft)
993 {
994   PHIBARRIER(fleft);
995   fins->op1 = fleft->op1;  /* abs(neg(x)) ==> abs(x) */
996   return RETRYFOLD;
997 }
998 
999 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
1000 LJFOLD(NEG NEG any)
LJFOLD(BNOT BNOT)1001 LJFOLD(BNOT BNOT)
1002 LJFOLD(BSWAP BSWAP)
1003 LJFOLDF(shortcut_leftleft)
1004 {
1005   PHIBARRIER(fleft);  /* See above. Fold would be ok, but not beneficial. */
1006   return fleft->op1;  /* f(g(x)) ==> x */
1007 }
1008 
1009 /* -- FP algebraic simplifications ---------------------------------------- */
1010 
1011 /* FP arithmetic is tricky -- there's not much to simplify.
1012 ** Please note the following common pitfalls before sending "improvements":
1013 **   x+0 ==> x  is INVALID for x=-0
1014 **   0-x ==> -x is INVALID for x=+0
1015 **   x*0 ==> 0  is INVALID for x=-0, x=+-Inf or x=NaN
1016 */
1017 
1018 LJFOLD(ADD NEG any)
LJFOLDF(simplify_numadd_negx)1019 LJFOLDF(simplify_numadd_negx)
1020 {
1021   PHIBARRIER(fleft);
1022   fins->o = IR_SUB;  /* (-a) + b ==> b - a */
1023   fins->op1 = fins->op2;
1024   fins->op2 = fleft->op1;
1025   return RETRYFOLD;
1026 }
1027 
1028 LJFOLD(ADD any NEG)
LJFOLDF(simplify_numadd_xneg)1029 LJFOLDF(simplify_numadd_xneg)
1030 {
1031   PHIBARRIER(fright);
1032   fins->o = IR_SUB;  /* a + (-b) ==> a - b */
1033   fins->op2 = fright->op1;
1034   return RETRYFOLD;
1035 }
1036 
1037 LJFOLD(SUB any KNUM)
LJFOLDF(simplify_numsub_k)1038 LJFOLDF(simplify_numsub_k)
1039 {
1040   if (ir_knum(fright)->u64 == 0)  /* x - (+0) ==> x */
1041     return LEFTFOLD;
1042   return NEXTFOLD;
1043 }
1044 
1045 LJFOLD(SUB NEG KNUM)
LJFOLDF(simplify_numsub_negk)1046 LJFOLDF(simplify_numsub_negk)
1047 {
1048   PHIBARRIER(fleft);
1049   fins->op2 = fleft->op1;  /* (-x) - k ==> (-k) - x */
1050   fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
1051   return RETRYFOLD;
1052 }
1053 
1054 LJFOLD(SUB any NEG)
LJFOLDF(simplify_numsub_xneg)1055 LJFOLDF(simplify_numsub_xneg)
1056 {
1057   PHIBARRIER(fright);
1058   fins->o = IR_ADD;  /* a - (-b) ==> a + b */
1059   fins->op2 = fright->op1;
1060   return RETRYFOLD;
1061 }
1062 
1063 LJFOLD(MUL any KNUM)
LJFOLD(DIV any KNUM)1064 LJFOLD(DIV any KNUM)
1065 LJFOLDF(simplify_nummuldiv_k)
1066 {
1067   lua_Number n = knumright;
1068   if (n == 1.0) {  /* x o 1 ==> x */
1069     return LEFTFOLD;
1070   } else if (n == -1.0) {  /* x o -1 ==> -x */
1071     IRRef op1 = fins->op1;
1072     fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG);  /* Modifies fins. */
1073     fins->op1 = op1;
1074     fins->o = IR_NEG;
1075     return RETRYFOLD;
1076   } else if (fins->o == IR_MUL && n == 2.0) {  /* x * 2 ==> x + x */
1077     fins->o = IR_ADD;
1078     fins->op2 = fins->op1;
1079     return RETRYFOLD;
1080   } else if (fins->o == IR_DIV) {  /* x / 2^k ==> x * 2^-k */
1081     uint64_t u = ir_knum(fright)->u64;
1082     uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
1083     if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
1084       u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
1085       fins->o = IR_MUL;  /* Multiply by exact reciprocal. */
1086       fins->op2 = lj_ir_knum_u64(J, u);
1087       return RETRYFOLD;
1088     }
1089   }
1090   return NEXTFOLD;
1091 }
1092 
1093 LJFOLD(MUL NEG KNUM)
LJFOLD(DIV NEG KNUM)1094 LJFOLD(DIV NEG KNUM)
1095 LJFOLDF(simplify_nummuldiv_negk)
1096 {
1097   PHIBARRIER(fleft);
1098   fins->op1 = fleft->op1;  /* (-a) o k ==> a o (-k) */
1099   fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
1100   return RETRYFOLD;
1101 }
1102 
1103 LJFOLD(MUL NEG NEG)
LJFOLD(DIV NEG NEG)1104 LJFOLD(DIV NEG NEG)
1105 LJFOLDF(simplify_nummuldiv_negneg)
1106 {
1107   PHIBARRIER(fleft);
1108   PHIBARRIER(fright);
1109   fins->op1 = fleft->op1;  /* (-a) o (-b) ==> a o b */
1110   fins->op2 = fright->op1;
1111   return RETRYFOLD;
1112 }
1113 
1114 LJFOLD(POW any KINT)
LJFOLDF(simplify_numpow_xkint)1115 LJFOLDF(simplify_numpow_xkint)
1116 {
1117   int32_t k = fright->i;
1118   TRef ref = fins->op1;
1119   if (k == 0)  /* x ^ 0 ==> 1 */
1120     return lj_ir_knum_one(J);  /* Result must be a number, not an int. */
1121   if (k == 1)  /* x ^ 1 ==> x */
1122     return LEFTFOLD;
1123   if ((uint32_t)(k+65536) > 2*65536u)  /* Limit code explosion. */
1124     return NEXTFOLD;
1125   if (k < 0) {  /* x ^ (-k) ==> (1/x) ^ k. */
1126     ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
1127     k = -k;
1128   }
1129   /* Unroll x^k for 1 <= k <= 65536. */
1130   for (; (k & 1) == 0; k >>= 1)  /* Handle leading zeros. */
1131     ref = emitir(IRTN(IR_MUL), ref, ref);
1132   if ((k >>= 1) != 0) {  /* Handle trailing bits. */
1133     TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
1134     for (; k != 1; k >>= 1) {
1135       if (k & 1)
1136 	ref = emitir(IRTN(IR_MUL), ref, tmp);
1137       tmp = emitir(IRTN(IR_MUL), tmp, tmp);
1138     }
1139     ref = emitir(IRTN(IR_MUL), ref, tmp);
1140   }
1141   return ref;
1142 }
1143 
1144 LJFOLD(POW any KNUM)
LJFOLDF(simplify_numpow_xknum)1145 LJFOLDF(simplify_numpow_xknum)
1146 {
1147   if (knumright == 0.5)  /* x ^ 0.5 ==> sqrt(x) */
1148     return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
1149   return NEXTFOLD;
1150 }
1151 
1152 LJFOLD(POW KNUM any)
LJFOLDF(simplify_numpow_kx)1153 LJFOLDF(simplify_numpow_kx)
1154 {
1155   lua_Number n = knumleft;
1156   if (n == 2.0 && irt_isint(fright->t)) {  /* 2.0 ^ i ==> ldexp(1.0, i) */
1157 #if LJ_TARGET_X86ORX64
1158     /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
1159     fins->o = IR_CONV;
1160     fins->op1 = fins->op2;
1161     fins->op2 = IRCONV_NUM_INT;
1162     fins->op2 = (IRRef1)lj_opt_fold(J);
1163 #endif
1164     fins->op1 = (IRRef1)lj_ir_knum_one(J);
1165     fins->o = IR_LDEXP;
1166     return RETRYFOLD;
1167   }
1168   return NEXTFOLD;
1169 }
1170 
1171 /* -- Simplify conversions ------------------------------------------------ */
1172 
1173 LJFOLD(CONV CONV IRCONV_NUM_INT)  /* _NUM */
LJFOLDF(shortcut_conv_num_int)1174 LJFOLDF(shortcut_conv_num_int)
1175 {
1176   PHIBARRIER(fleft);
1177   /* Only safe with a guarded conversion to int. */
1178   if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
1179     return fleft->op1;  /* f(g(x)) ==> x */
1180   return NEXTFOLD;
1181 }
1182 
1183 LJFOLD(CONV CONV IRCONV_INT_NUM)  /* _INT */
LJFOLD(CONV CONV IRCONV_U32_NUM)1184 LJFOLD(CONV CONV IRCONV_U32_NUM)  /* _U32*/
1185 LJFOLDF(simplify_conv_int_num)
1186 {
1187   /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1188   if ((fleft->op2 & IRCONV_SRCMASK) ==
1189       ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
1190     return fleft->op1;
1191   return NEXTFOLD;
1192 }
1193 
1194 LJFOLD(CONV CONV IRCONV_I64_NUM)  /* _INT or _U32 */
LJFOLD(CONV CONV IRCONV_U64_NUM)1195 LJFOLD(CONV CONV IRCONV_U64_NUM)  /* _INT or _U32 */
1196 LJFOLDF(simplify_conv_i64_num)
1197 {
1198   PHIBARRIER(fleft);
1199   if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1200     /* Reduce to a sign-extension. */
1201     fins->op1 = fleft->op1;
1202     fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
1203     return RETRYFOLD;
1204   } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1205 #if LJ_TARGET_X64
1206     return fleft->op1;
1207 #else
1208     /* Reduce to a zero-extension. */
1209     fins->op1 = fleft->op1;
1210     fins->op2 = (IRT_I64<<5)|IRT_U32;
1211     return RETRYFOLD;
1212 #endif
1213   }
1214   return NEXTFOLD;
1215 }
1216 
1217 LJFOLD(CONV CONV IRCONV_INT_I64)  /* _INT or _U32 */
LJFOLD(CONV CONV IRCONV_INT_U64)1218 LJFOLD(CONV CONV IRCONV_INT_U64)  /* _INT or _U32 */
1219 LJFOLD(CONV CONV IRCONV_U32_I64)  /* _INT or _U32 */
1220 LJFOLD(CONV CONV IRCONV_U32_U64)  /* _INT or _U32 */
1221 LJFOLDF(simplify_conv_int_i64)
1222 {
1223   int src;
1224   PHIBARRIER(fleft);
1225   src = (fleft->op2 & IRCONV_SRCMASK);
1226   if (src == IRT_INT || src == IRT_U32) {
1227     if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
1228       return fleft->op1;
1229     } else {
1230       fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
1231       fins->op1 = fleft->op1;
1232       return RETRYFOLD;
1233     }
1234   }
1235   return NEXTFOLD;
1236 }
1237 
1238 LJFOLD(CONV CONV IRCONV_FLOAT_NUM)  /* _FLOAT */
LJFOLDF(simplify_conv_flt_num)1239 LJFOLDF(simplify_conv_flt_num)
1240 {
1241   PHIBARRIER(fleft);
1242   if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1243     return fleft->op1;
1244   return NEXTFOLD;
1245 }
1246 
1247 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1248 LJFOLD(TOBIT CONV KNUM)
LJFOLDF(simplify_tobit_conv)1249 LJFOLDF(simplify_tobit_conv)
1250 {
1251   /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1252   if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1253     lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1254     return fleft->op1;
1255   } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1256     lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1257     fins->o = IR_CONV;
1258     fins->op1 = fleft->op1;
1259     fins->op2 = (IRT_INT<<5)|IRT_U32;
1260     return RETRYFOLD;
1261   }
1262   return NEXTFOLD;
1263 }
1264 
1265 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1266 LJFOLD(FPMATH CONV IRFPM_FLOOR)
LJFOLD(FPMATH CONV IRFPM_CEIL)1267 LJFOLD(FPMATH CONV IRFPM_CEIL)
1268 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1269 LJFOLDF(simplify_floor_conv)
1270 {
1271   if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1272       (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1273     return LEFTFOLD;
1274   return NEXTFOLD;
1275 }
1276 
1277 /* Strength reduction of widening. */
1278 LJFOLD(CONV any IRCONV_I64_INT)
LJFOLD(CONV any IRCONV_U64_INT)1279 LJFOLD(CONV any IRCONV_U64_INT)
1280 LJFOLDF(simplify_conv_sext)
1281 {
1282   IRRef ref = fins->op1;
1283   int64_t ofs = 0;
1284   if (!(fins->op2 & IRCONV_SEXT))
1285     return NEXTFOLD;
1286   PHIBARRIER(fleft);
1287   if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1288     goto ok_reduce;
1289   if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1290     ofs = (int64_t)IR(fleft->op2)->i;
1291     ref = fleft->op1;
1292   }
1293   /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1294   if (ref == J->scev.idx) {
1295     IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1296     lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1297     if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1298     ok_reduce:
1299 #if LJ_TARGET_X64
1300       /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1301       return LEFTFOLD;
1302 #else
1303       /* Reduce to a (cheaper) zero-extension. */
1304       fins->op2 &= ~IRCONV_SEXT;
1305       return RETRYFOLD;
1306 #endif
1307     }
1308   }
1309   return NEXTFOLD;
1310 }
1311 
1312 /* Strength reduction of narrowing. */
1313 LJFOLD(CONV ADD IRCONV_INT_I64)
LJFOLD(CONV SUB IRCONV_INT_I64)1314 LJFOLD(CONV SUB IRCONV_INT_I64)
1315 LJFOLD(CONV MUL IRCONV_INT_I64)
1316 LJFOLD(CONV ADD IRCONV_INT_U64)
1317 LJFOLD(CONV SUB IRCONV_INT_U64)
1318 LJFOLD(CONV MUL IRCONV_INT_U64)
1319 LJFOLD(CONV ADD IRCONV_U32_I64)
1320 LJFOLD(CONV SUB IRCONV_U32_I64)
1321 LJFOLD(CONV MUL IRCONV_U32_I64)
1322 LJFOLD(CONV ADD IRCONV_U32_U64)
1323 LJFOLD(CONV SUB IRCONV_U32_U64)
1324 LJFOLD(CONV MUL IRCONV_U32_U64)
1325 LJFOLDF(simplify_conv_narrow)
1326 {
1327 #if LJ_64
1328   UNUSED(J);
1329   return NEXTFOLD;
1330 #else
1331   IROp op = (IROp)fleft->o;
1332   IRType t = irt_type(fins->t);
1333   IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1334   PHIBARRIER(fleft);
1335   op1 = emitir(IRT(IR_CONV, t), op1, mode);
1336   op2 = emitir(IRT(IR_CONV, t), op2, mode);
1337   fins->ot = IRT(op, t);
1338   fins->op1 = op1;
1339   fins->op2 = op2;
1340   return RETRYFOLD;
1341 #endif
1342 }
1343 
1344 /* Special CSE rule for CONV. */
1345 LJFOLD(CONV any any)
LJFOLDF(cse_conv)1346 LJFOLDF(cse_conv)
1347 {
1348   if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1349     IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1350     uint8_t guard = irt_isguard(fins->t);
1351     IRRef ref = J->chain[IR_CONV];
1352     while (ref > op1) {
1353       IRIns *ir = IR(ref);
1354       /* Commoning with stronger checks is ok. */
1355       if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1356 	  irt_isguard(ir->t) >= guard)
1357 	return ref;
1358       ref = ir->prev;
1359     }
1360   }
1361   return EMITFOLD;  /* No fallthrough to regular CSE. */
1362 }
1363 
1364 /* FP conversion narrowing. */
1365 LJFOLD(TOBIT ADD KNUM)
LJFOLD(TOBIT SUB KNUM)1366 LJFOLD(TOBIT SUB KNUM)
1367 LJFOLD(CONV ADD IRCONV_INT_NUM)
1368 LJFOLD(CONV SUB IRCONV_INT_NUM)
1369 LJFOLD(CONV ADD IRCONV_I64_NUM)
1370 LJFOLD(CONV SUB IRCONV_I64_NUM)
1371 LJFOLDF(narrow_convert)
1372 {
1373   PHIBARRIER(fleft);
1374   /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1375   if (J->chain[IR_LOOP])
1376     return NEXTFOLD;
1377   lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1378 	     "unexpected CONV TOBIT");
1379   return lj_opt_narrow_convert(J);
1380 }
1381 
1382 /* -- Integer algebraic simplifications ----------------------------------- */
1383 
1384 LJFOLD(ADD any KINT)
LJFOLD(ADDOV any KINT)1385 LJFOLD(ADDOV any KINT)
1386 LJFOLD(SUBOV any KINT)
1387 LJFOLDF(simplify_intadd_k)
1388 {
1389   if (fright->i == 0)  /* i o 0 ==> i */
1390     return LEFTFOLD;
1391   return NEXTFOLD;
1392 }
1393 
1394 LJFOLD(MULOV any KINT)
LJFOLDF(simplify_intmul_k)1395 LJFOLDF(simplify_intmul_k)
1396 {
1397   if (fright->i == 0)  /* i * 0 ==> 0 */
1398     return RIGHTFOLD;
1399   if (fright->i == 1)  /* i * 1 ==> i */
1400     return LEFTFOLD;
1401   if (fright->i == 2) {  /* i * 2 ==> i + i */
1402     fins->o = IR_ADDOV;
1403     fins->op2 = fins->op1;
1404     return RETRYFOLD;
1405   }
1406   return NEXTFOLD;
1407 }
1408 
1409 LJFOLD(SUB any KINT)
LJFOLDF(simplify_intsub_k)1410 LJFOLDF(simplify_intsub_k)
1411 {
1412   if (fright->i == 0)  /* i - 0 ==> i */
1413     return LEFTFOLD;
1414   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
1415   fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i);  /* Overflow for -2^31 ok. */
1416   return RETRYFOLD;
1417 }
1418 
1419 LJFOLD(SUB KINT any)
LJFOLD(SUB KINT64 any)1420 LJFOLD(SUB KINT64 any)
1421 LJFOLDF(simplify_intsub_kleft)
1422 {
1423   if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1424     fins->o = IR_NEG;  /* 0 - i ==> -i */
1425     fins->op1 = fins->op2;
1426     return RETRYFOLD;
1427   }
1428   return NEXTFOLD;
1429 }
1430 
1431 LJFOLD(ADD any KINT64)
LJFOLDF(simplify_intadd_k64)1432 LJFOLDF(simplify_intadd_k64)
1433 {
1434   if (ir_kint64(fright)->u64 == 0)  /* i + 0 ==> i */
1435     return LEFTFOLD;
1436   return NEXTFOLD;
1437 }
1438 
1439 LJFOLD(SUB any KINT64)
LJFOLDF(simplify_intsub_k64)1440 LJFOLDF(simplify_intsub_k64)
1441 {
1442   uint64_t k = ir_kint64(fright)->u64;
1443   if (k == 0)  /* i - 0 ==> i */
1444     return LEFTFOLD;
1445   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
1446   fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1447   return RETRYFOLD;
1448 }
1449 
simplify_intmul_k(jit_State * J,int32_t k)1450 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1451 {
1452   /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1453   ** But this is mainly intended for simple address arithmetic.
1454   ** Also it's easier for the backend to optimize the original multiplies.
1455   */
1456   if (k == 0) {  /* i * 0 ==> 0 */
1457     return RIGHTFOLD;
1458   } else if (k == 1) {  /* i * 1 ==> i */
1459     return LEFTFOLD;
1460   } else if ((k & (k-1)) == 0) {  /* i * 2^k ==> i << k */
1461     fins->o = IR_BSHL;
1462     fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1463     return RETRYFOLD;
1464   }
1465   return NEXTFOLD;
1466 }
1467 
1468 LJFOLD(MUL any KINT)
LJFOLDF(simplify_intmul_k32)1469 LJFOLDF(simplify_intmul_k32)
1470 {
1471   if (fright->i >= 0)
1472     return simplify_intmul_k(J, fright->i);
1473   return NEXTFOLD;
1474 }
1475 
1476 LJFOLD(MUL any KINT64)
LJFOLDF(simplify_intmul_k64)1477 LJFOLDF(simplify_intmul_k64)
1478 {
1479 #if LJ_HASFFI
1480   if (ir_kint64(fright)->u64 < 0x80000000u)
1481     return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1482   return NEXTFOLD;
1483 #else
1484   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1485 #endif
1486 }
1487 
1488 LJFOLD(MOD any KINT)
LJFOLDF(simplify_intmod_k)1489 LJFOLDF(simplify_intmod_k)
1490 {
1491   int32_t k = fright->i;
1492   lj_assertJ(k != 0, "integer mod 0");
1493   if (k > 0 && (k & (k-1)) == 0) {  /* i % (2^k) ==> i & (2^k-1) */
1494     fins->o = IR_BAND;
1495     fins->op2 = lj_ir_kint(J, k-1);
1496     return RETRYFOLD;
1497   }
1498   return NEXTFOLD;
1499 }
1500 
1501 LJFOLD(MOD KINT any)
LJFOLDF(simplify_intmod_kleft)1502 LJFOLDF(simplify_intmod_kleft)
1503 {
1504   if (fleft->i == 0)
1505     return INTFOLD(0);
1506   return NEXTFOLD;
1507 }
1508 
1509 LJFOLD(SUB any any)
LJFOLD(SUBOV any any)1510 LJFOLD(SUBOV any any)
1511 LJFOLDF(simplify_intsub)
1512 {
1513   if (fins->op1 == fins->op2 && !irt_isnum(fins->t))  /* i - i ==> 0 */
1514     return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1515   return NEXTFOLD;
1516 }
1517 
1518 LJFOLD(SUB ADD any)
LJFOLDF(simplify_intsubadd_leftcancel)1519 LJFOLDF(simplify_intsubadd_leftcancel)
1520 {
1521   if (!irt_isnum(fins->t)) {
1522     PHIBARRIER(fleft);
1523     if (fins->op2 == fleft->op1)  /* (i + j) - i ==> j */
1524       return fleft->op2;
1525     if (fins->op2 == fleft->op2)  /* (i + j) - j ==> i */
1526       return fleft->op1;
1527   }
1528   return NEXTFOLD;
1529 }
1530 
1531 LJFOLD(SUB SUB any)
LJFOLDF(simplify_intsubsub_leftcancel)1532 LJFOLDF(simplify_intsubsub_leftcancel)
1533 {
1534   if (!irt_isnum(fins->t)) {
1535     PHIBARRIER(fleft);
1536     if (fins->op2 == fleft->op1) {  /* (i - j) - i ==> 0 - j */
1537       fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1538       fins->op2 = fleft->op2;
1539       return RETRYFOLD;
1540     }
1541   }
1542   return NEXTFOLD;
1543 }
1544 
1545 LJFOLD(SUB any SUB)
LJFOLDF(simplify_intsubsub_rightcancel)1546 LJFOLDF(simplify_intsubsub_rightcancel)
1547 {
1548   if (!irt_isnum(fins->t)) {
1549     PHIBARRIER(fright);
1550     if (fins->op1 == fright->op1)  /* i - (i - j) ==> j */
1551       return fright->op2;
1552   }
1553   return NEXTFOLD;
1554 }
1555 
1556 LJFOLD(SUB any ADD)
LJFOLDF(simplify_intsubadd_rightcancel)1557 LJFOLDF(simplify_intsubadd_rightcancel)
1558 {
1559   if (!irt_isnum(fins->t)) {
1560     PHIBARRIER(fright);
1561     if (fins->op1 == fright->op1) {  /* i - (i + j) ==> 0 - j */
1562       fins->op2 = fright->op2;
1563       fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1564       return RETRYFOLD;
1565     }
1566     if (fins->op1 == fright->op2) {  /* i - (j + i) ==> 0 - j */
1567       fins->op2 = fright->op1;
1568       fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1569       return RETRYFOLD;
1570     }
1571   }
1572   return NEXTFOLD;
1573 }
1574 
1575 LJFOLD(SUB ADD ADD)
LJFOLDF(simplify_intsubaddadd_cancel)1576 LJFOLDF(simplify_intsubaddadd_cancel)
1577 {
1578   if (!irt_isnum(fins->t)) {
1579     PHIBARRIER(fleft);
1580     PHIBARRIER(fright);
1581     if (fleft->op1 == fright->op1) {  /* (i + j1) - (i + j2) ==> j1 - j2 */
1582       fins->op1 = fleft->op2;
1583       fins->op2 = fright->op2;
1584       return RETRYFOLD;
1585     }
1586     if (fleft->op1 == fright->op2) {  /* (i + j1) - (j2 + i) ==> j1 - j2 */
1587       fins->op1 = fleft->op2;
1588       fins->op2 = fright->op1;
1589       return RETRYFOLD;
1590     }
1591     if (fleft->op2 == fright->op1) {  /* (j1 + i) - (i + j2) ==> j1 - j2 */
1592       fins->op1 = fleft->op1;
1593       fins->op2 = fright->op2;
1594       return RETRYFOLD;
1595     }
1596     if (fleft->op2 == fright->op2) {  /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1597       fins->op1 = fleft->op1;
1598       fins->op2 = fright->op1;
1599       return RETRYFOLD;
1600     }
1601   }
1602   return NEXTFOLD;
1603 }
1604 
1605 LJFOLD(BAND any KINT)
LJFOLD(BAND any KINT64)1606 LJFOLD(BAND any KINT64)
1607 LJFOLDF(simplify_band_k)
1608 {
1609   int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1610 				     (int64_t)ir_k64(fright)->u64;
1611   if (k == 0)  /* i & 0 ==> 0 */
1612     return RIGHTFOLD;
1613   if (k == -1)  /* i & -1 ==> i */
1614     return LEFTFOLD;
1615   return NEXTFOLD;
1616 }
1617 
1618 LJFOLD(BOR any KINT)
LJFOLD(BOR any KINT64)1619 LJFOLD(BOR any KINT64)
1620 LJFOLDF(simplify_bor_k)
1621 {
1622   int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1623 				     (int64_t)ir_k64(fright)->u64;
1624   if (k == 0)  /* i | 0 ==> i */
1625     return LEFTFOLD;
1626   if (k == -1)  /* i | -1 ==> -1 */
1627     return RIGHTFOLD;
1628   return NEXTFOLD;
1629 }
1630 
1631 LJFOLD(BXOR any KINT)
LJFOLD(BXOR any KINT64)1632 LJFOLD(BXOR any KINT64)
1633 LJFOLDF(simplify_bxor_k)
1634 {
1635   int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1636 				     (int64_t)ir_k64(fright)->u64;
1637   if (k == 0)  /* i xor 0 ==> i */
1638     return LEFTFOLD;
1639   if (k == -1) {  /* i xor -1 ==> ~i */
1640     fins->o = IR_BNOT;
1641     fins->op2 = 0;
1642     return RETRYFOLD;
1643   }
1644   return NEXTFOLD;
1645 }
1646 
1647 LJFOLD(BSHL any KINT)
LJFOLD(BSHR any KINT)1648 LJFOLD(BSHR any KINT)
1649 LJFOLD(BSAR any KINT)
1650 LJFOLD(BROL any KINT)
1651 LJFOLD(BROR any KINT)
1652 LJFOLDF(simplify_shift_ik)
1653 {
1654   int32_t mask = irt_is64(fins->t) ? 63 : 31;
1655   int32_t k = (fright->i & mask);
1656   if (k == 0)  /* i o 0 ==> i */
1657     return LEFTFOLD;
1658   if (k == 1 && fins->o == IR_BSHL) {  /* i << 1 ==> i + i */
1659     fins->o = IR_ADD;
1660     fins->op2 = fins->op1;
1661     return RETRYFOLD;
1662   }
1663   if (k != fright->i) {  /* i o k ==> i o (k & mask) */
1664     fins->op2 = (IRRef1)lj_ir_kint(J, k);
1665     return RETRYFOLD;
1666   }
1667 #ifndef LJ_TARGET_UNIFYROT
1668   if (fins->o == IR_BROR) {  /* bror(i, k) ==> brol(i, (-k)&mask) */
1669     fins->o = IR_BROL;
1670     fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1671     return RETRYFOLD;
1672   }
1673 #endif
1674   return NEXTFOLD;
1675 }
1676 
1677 LJFOLD(BSHL any BAND)
LJFOLD(BSHR any BAND)1678 LJFOLD(BSHR any BAND)
1679 LJFOLD(BSAR any BAND)
1680 LJFOLD(BROL any BAND)
1681 LJFOLD(BROR any BAND)
1682 LJFOLDF(simplify_shift_andk)
1683 {
1684   IRIns *irk = IR(fright->op2);
1685   PHIBARRIER(fright);
1686   if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1687       irk->o == IR_KINT) {  /* i o (j & mask) ==> i o j */
1688     int32_t mask = irt_is64(fins->t) ? 63 : 31;
1689     int32_t k = irk->i & mask;
1690     if (k == mask) {
1691       fins->op2 = fright->op1;
1692       return RETRYFOLD;
1693     }
1694   }
1695   return NEXTFOLD;
1696 }
1697 
1698 LJFOLD(BSHL KINT any)
LJFOLD(BSHR KINT any)1699 LJFOLD(BSHR KINT any)
1700 LJFOLD(BSHL KINT64 any)
1701 LJFOLD(BSHR KINT64 any)
1702 LJFOLDF(simplify_shift1_ki)
1703 {
1704   int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1705 				    (int64_t)ir_k64(fleft)->u64;
1706   if (k == 0)  /* 0 o i ==> 0 */
1707     return LEFTFOLD;
1708   return NEXTFOLD;
1709 }
1710 
1711 LJFOLD(BSAR KINT any)
LJFOLD(BROL KINT any)1712 LJFOLD(BROL KINT any)
1713 LJFOLD(BROR KINT any)
1714 LJFOLD(BSAR KINT64 any)
1715 LJFOLD(BROL KINT64 any)
1716 LJFOLD(BROR KINT64 any)
1717 LJFOLDF(simplify_shift2_ki)
1718 {
1719   int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1720 				    (int64_t)ir_k64(fleft)->u64;
1721   if (k == 0 || k == -1)  /* 0 o i ==> 0; -1 o i ==> -1 */
1722     return LEFTFOLD;
1723   return NEXTFOLD;
1724 }
1725 
1726 LJFOLD(BSHL BAND KINT)
LJFOLD(BSHR BAND KINT)1727 LJFOLD(BSHR BAND KINT)
1728 LJFOLD(BROL BAND KINT)
1729 LJFOLD(BROR BAND KINT)
1730 LJFOLDF(simplify_shiftk_andk)
1731 {
1732   IRIns *irk = IR(fleft->op2);
1733   PHIBARRIER(fleft);
1734   if (irk->o == IR_KINT) {  /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1735     int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1736     fins->op1 = fleft->op1;
1737     fins->op1 = (IRRef1)lj_opt_fold(J);
1738     fins->op2 = (IRRef1)lj_ir_kint(J, k);
1739     fins->ot = IRTI(IR_BAND);
1740     return RETRYFOLD;
1741   } else if (irk->o == IR_KINT64) {
1742     uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1743 				  (IROp)fins->o);
1744     IROpT ot = fleft->ot;
1745     fins->op1 = fleft->op1;
1746     fins->op1 = (IRRef1)lj_opt_fold(J);
1747     fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1748     fins->ot = ot;
1749     return RETRYFOLD;
1750   }
1751   return NEXTFOLD;
1752 }
1753 
1754 LJFOLD(BAND BSHL KINT)
LJFOLD(BAND BSHR KINT)1755 LJFOLD(BAND BSHR KINT)
1756 LJFOLDF(simplify_andk_shiftk)
1757 {
1758   IRIns *irk = IR(fleft->op2);
1759   if (irk->o == IR_KINT &&
1760       kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1761     return LEFTFOLD;  /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1762   return NEXTFOLD;
1763 }
1764 
1765 LJFOLD(BAND BOR KINT)
LJFOLD(BOR BAND KINT)1766 LJFOLD(BOR BAND KINT)
1767 LJFOLDF(simplify_andor_k)
1768 {
1769   IRIns *irk = IR(fleft->op2);
1770   PHIBARRIER(fleft);
1771   if (irk->o == IR_KINT) {
1772     int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1773     /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1774     /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1775     if (k == (fins->o == IR_BAND ? 0 : -1)) {
1776       fins->op1 = fleft->op1;
1777       return RETRYFOLD;
1778     }
1779   }
1780   return NEXTFOLD;
1781 }
1782 
1783 LJFOLD(BAND BOR KINT64)
LJFOLD(BOR BAND KINT64)1784 LJFOLD(BOR BAND KINT64)
1785 LJFOLDF(simplify_andor_k64)
1786 {
1787 #if LJ_HASFFI
1788   IRIns *irk = IR(fleft->op2);
1789   PHIBARRIER(fleft);
1790   if (irk->o == IR_KINT64) {
1791     uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1792 				  (IROp)fins->o);
1793     /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1794     /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1795     if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1796       fins->op1 = fleft->op1;
1797       return RETRYFOLD;
1798     }
1799   }
1800   return NEXTFOLD;
1801 #else
1802   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1803 #endif
1804 }
1805 
1806 /* -- Reassociation ------------------------------------------------------- */
1807 
1808 LJFOLD(ADD ADD KINT)
LJFOLD(MUL MUL KINT)1809 LJFOLD(MUL MUL KINT)
1810 LJFOLD(BAND BAND KINT)
1811 LJFOLD(BOR BOR KINT)
1812 LJFOLD(BXOR BXOR KINT)
1813 LJFOLDF(reassoc_intarith_k)
1814 {
1815   IRIns *irk = IR(fleft->op2);
1816   if (irk->o == IR_KINT) {
1817     int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1818     if (k == irk->i)  /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1819       return LEFTFOLD;
1820     PHIBARRIER(fleft);
1821     fins->op1 = fleft->op1;
1822     fins->op2 = (IRRef1)lj_ir_kint(J, k);
1823     return RETRYFOLD;  /* (i o k1) o k2 ==> i o (k1 o k2) */
1824   }
1825   return NEXTFOLD;
1826 }
1827 
1828 LJFOLD(ADD ADD KINT64)
LJFOLD(MUL MUL KINT64)1829 LJFOLD(MUL MUL KINT64)
1830 LJFOLD(BAND BAND KINT64)
1831 LJFOLD(BOR BOR KINT64)
1832 LJFOLD(BXOR BXOR KINT64)
1833 LJFOLDF(reassoc_intarith_k64)
1834 {
1835 #if LJ_HASFFI
1836   IRIns *irk = IR(fleft->op2);
1837   if (irk->o == IR_KINT64) {
1838     uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1839 				  (IROp)fins->o);
1840     PHIBARRIER(fleft);
1841     fins->op1 = fleft->op1;
1842     fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1843     return RETRYFOLD;  /* (i o k1) o k2 ==> i o (k1 o k2) */
1844   }
1845   return NEXTFOLD;
1846 #else
1847   UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1848 #endif
1849 }
1850 
1851 LJFOLD(BAND BAND any)
LJFOLD(BOR BOR any)1852 LJFOLD(BOR BOR any)
1853 LJFOLDF(reassoc_dup)
1854 {
1855   if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1856     return LEFTFOLD;  /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1857   return NEXTFOLD;
1858 }
1859 
1860 LJFOLD(MIN MIN any)
LJFOLD(MAX MAX any)1861 LJFOLD(MAX MAX any)
1862 LJFOLDF(reassoc_dup_minmax)
1863 {
1864   if (fins->op2 == fleft->op2)
1865     return LEFTFOLD;  /* (a o b) o b ==> a o b */
1866   return NEXTFOLD;
1867 }
1868 
1869 LJFOLD(BXOR BXOR any)
LJFOLDF(reassoc_bxor)1870 LJFOLDF(reassoc_bxor)
1871 {
1872   PHIBARRIER(fleft);
1873   if (fins->op2 == fleft->op1)  /* (a xor b) xor a ==> b */
1874     return fleft->op2;
1875   if (fins->op2 == fleft->op2)  /* (a xor b) xor b ==> a */
1876     return fleft->op1;
1877   return NEXTFOLD;
1878 }
1879 
1880 LJFOLD(BSHL BSHL KINT)
LJFOLD(BSHR BSHR KINT)1881 LJFOLD(BSHR BSHR KINT)
1882 LJFOLD(BSAR BSAR KINT)
1883 LJFOLD(BROL BROL KINT)
1884 LJFOLD(BROR BROR KINT)
1885 LJFOLDF(reassoc_shift)
1886 {
1887   IRIns *irk = IR(fleft->op2);
1888   PHIBARRIER(fleft);  /* The (shift any KINT) rule covers k2 == 0 and more. */
1889   if (irk->o == IR_KINT) {  /* (i o k1) o k2 ==> i o (k1 + k2) */
1890     int32_t mask = irt_is64(fins->t) ? 63 : 31;
1891     int32_t k = (irk->i & mask) + (fright->i & mask);
1892     if (k > mask) {  /* Combined shift too wide? */
1893       if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1894 	return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1895       else if (fins->o == IR_BSAR)
1896 	k = mask;
1897       else
1898 	k &= mask;
1899     }
1900     fins->op1 = fleft->op1;
1901     fins->op2 = (IRRef1)lj_ir_kint(J, k);
1902     return RETRYFOLD;
1903   }
1904   return NEXTFOLD;
1905 }
1906 
1907 LJFOLD(MIN MIN KINT)
LJFOLD(MAX MAX KINT)1908 LJFOLD(MAX MAX KINT)
1909 LJFOLDF(reassoc_minmax_k)
1910 {
1911   IRIns *irk = IR(fleft->op2);
1912   if (irk->o == IR_KINT) {
1913     int32_t a = irk->i;
1914     int32_t y = kfold_intop(a, fright->i, fins->o);
1915     if (a == y)  /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1916       return LEFTFOLD;
1917     PHIBARRIER(fleft);
1918     fins->op1 = fleft->op1;
1919     fins->op2 = (IRRef1)lj_ir_kint(J, y);
1920     return RETRYFOLD;  /* (x o k1) o k2 ==> x o (k1 o k2) */
1921   }
1922   return NEXTFOLD;
1923 }
1924 
1925 /* -- Array bounds check elimination -------------------------------------- */
1926 
1927 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1928 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1929 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1930 */
1931 LJFOLD(ABC any ADD)
LJFOLDF(abc_fwd)1932 LJFOLDF(abc_fwd)
1933 {
1934   if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1935     if (irref_isk(fright->op2)) {
1936       IRIns *add2 = IR(fright->op1);
1937       if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1938 	  IR(fright->op2)->i == -IR(add2->op2)->i) {
1939 	IRRef ref = J->chain[IR_ABC];
1940 	IRRef lim = add2->op1;
1941 	if (fins->op1 > lim) lim = fins->op1;
1942 	while (ref > lim) {
1943 	  IRIns *ir = IR(ref);
1944 	  if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1945 	    return DROPFOLD;
1946 	  ref = ir->prev;
1947 	}
1948       }
1949     }
1950   }
1951   return NEXTFOLD;
1952 }
1953 
1954 /* Eliminate ABC for constants.
1955 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1956 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1957 */
1958 LJFOLD(ABC any KINT)
LJFOLDF(abc_k)1959 LJFOLDF(abc_k)
1960 {
1961   if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1962     IRRef ref = J->chain[IR_ABC];
1963     IRRef asize = fins->op1;
1964     while (ref > asize) {
1965       IRIns *ir = IR(ref);
1966       if (ir->op1 == asize && irref_isk(ir->op2)) {
1967 	int32_t k = IR(ir->op2)->i;
1968 	if (fright->i > k)
1969 	  ir->op2 = fins->op2;
1970 	return DROPFOLD;
1971       }
1972       ref = ir->prev;
1973     }
1974     return EMITFOLD;  /* Already performed CSE. */
1975   }
1976   return NEXTFOLD;
1977 }
1978 
1979 /* Eliminate invariant ABC inside loop. */
1980 LJFOLD(ABC any any)
LJFOLDF(abc_invar)1981 LJFOLDF(abc_invar)
1982 {
1983   /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1984   if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1985       !irt_isphi(IR(fins->op1)->t))
1986     return DROPFOLD;
1987   return NEXTFOLD;
1988 }
1989 
1990 /* -- Commutativity ------------------------------------------------------- */
1991 
1992 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1993 ** Rationale behind this:
1994 ** - It (also) moves constants to the right.
1995 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1996 ** - It helps CSE to find more matches.
1997 ** - The assembler generates better code with constants at the right.
1998 */
1999 
2000 LJFOLD(ADD any any)
LJFOLD(MUL any any)2001 LJFOLD(MUL any any)
2002 LJFOLD(ADDOV any any)
2003 LJFOLD(MULOV any any)
2004 LJFOLDF(comm_swap)
2005 {
2006   if (fins->op1 < fins->op2) {  /* Move lower ref to the right. */
2007     IRRef1 tmp = fins->op1;
2008     fins->op1 = fins->op2;
2009     fins->op2 = tmp;
2010     return RETRYFOLD;
2011   }
2012   return NEXTFOLD;
2013 }
2014 
2015 LJFOLD(EQ any any)
LJFOLD(NE any any)2016 LJFOLD(NE any any)
2017 LJFOLDF(comm_equal)
2018 {
2019   /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
2020   if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
2021     return CONDFOLD(fins->o == IR_EQ);
2022   return fold_comm_swap(J);
2023 }
2024 
2025 LJFOLD(LT any any)
LJFOLD(GE any any)2026 LJFOLD(GE any any)
2027 LJFOLD(LE any any)
2028 LJFOLD(GT any any)
2029 LJFOLD(ULT any any)
2030 LJFOLD(UGE any any)
2031 LJFOLD(ULE any any)
2032 LJFOLD(UGT any any)
2033 LJFOLDF(comm_comp)
2034 {
2035   /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
2036   if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
2037     return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
2038   if (fins->op1 < fins->op2) {  /* Move lower ref to the right. */
2039     IRRef1 tmp = fins->op1;
2040     fins->op1 = fins->op2;
2041     fins->op2 = tmp;
2042     fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
2043     return RETRYFOLD;
2044   }
2045   return NEXTFOLD;
2046 }
2047 
2048 LJFOLD(BAND any any)
LJFOLD(BOR any any)2049 LJFOLD(BOR any any)
2050 LJFOLDF(comm_dup)
2051 {
2052   if (fins->op1 == fins->op2)  /* x o x ==> x */
2053     return LEFTFOLD;
2054   return fold_comm_swap(J);
2055 }
2056 
2057 LJFOLD(MIN any any)
LJFOLD(MAX any any)2058 LJFOLD(MAX any any)
2059 LJFOLDF(comm_dup_minmax)
2060 {
2061   if (fins->op1 == fins->op2)  /* x o x ==> x */
2062     return LEFTFOLD;
2063   return NEXTFOLD;
2064 }
2065 
2066 LJFOLD(BXOR any any)
LJFOLDF(comm_bxor)2067 LJFOLDF(comm_bxor)
2068 {
2069   if (fins->op1 == fins->op2)  /* i xor i ==> 0 */
2070     return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
2071   return fold_comm_swap(J);
2072 }
2073 
2074 /* -- Simplification of compound expressions ------------------------------ */
2075 
kfold_xload(jit_State * J,IRIns * ir,const void * p)2076 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
2077 {
2078   int32_t k;
2079   switch (irt_type(ir->t)) {
2080   case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
2081   case IRT_I8: k = (int32_t)*(int8_t *)p; break;
2082   case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
2083   case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
2084   case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
2085   case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
2086   case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
2087   default: return 0;
2088   }
2089   return lj_ir_kint(J, k);
2090 }
2091 
2092 /* Turn: string.sub(str, a, b) == kstr
2093 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
2094 ** Note: this creates unaligned XLOADs on x86/x64.
2095 */
2096 LJFOLD(EQ SNEW KGC)
LJFOLD(NE SNEW KGC)2097 LJFOLD(NE SNEW KGC)
2098 LJFOLDF(merge_eqne_snew_kgc)
2099 {
2100   GCstr *kstr = ir_kstr(fright);
2101   int32_t len = (int32_t)kstr->len;
2102   lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
2103 
2104 #if LJ_TARGET_UNALIGNED
2105 #define FOLD_SNEW_MAX_LEN	4  /* Handle string lengths 0, 1, 2, 3, 4. */
2106 #define FOLD_SNEW_TYPE8		IRT_I8	/* Creates shorter immediates. */
2107 #else
2108 #define FOLD_SNEW_MAX_LEN	1  /* Handle string lengths 0 or 1. */
2109 #define FOLD_SNEW_TYPE8		IRT_U8  /* Prefer unsigned loads. */
2110 #endif
2111 
2112   PHIBARRIER(fleft);
2113   if (len <= FOLD_SNEW_MAX_LEN) {
2114     IROp op = (IROp)fins->o;
2115     IRRef strref = fleft->op1;
2116     if (IR(strref)->o != IR_STRREF)
2117       return NEXTFOLD;
2118     if (op == IR_EQ) {
2119       emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
2120       /* Caveat: fins/fleft/fright is no longer valid after emitir. */
2121     } else {
2122       /* NE is not expanded since this would need an OR of two conds. */
2123       if (!irref_isk(fleft->op2))  /* Only handle the constant length case. */
2124 	return NEXTFOLD;
2125       if (IR(fleft->op2)->i != len)
2126 	return DROPFOLD;
2127     }
2128     if (len > 0) {
2129       /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
2130       uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
2131 			       len == 2 ? IRT(IR_XLOAD, IRT_U16) :
2132 			       IRTI(IR_XLOAD));
2133       TRef tmp = emitir(ot, strref,
2134 			IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
2135       TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
2136       if (len == 3)
2137 	tmp = emitir(IRTI(IR_BAND), tmp,
2138 		     lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
2139       fins->op1 = (IRRef1)tmp;
2140       fins->op2 = (IRRef1)val;
2141       fins->ot = (IROpT)IRTGI(op);
2142       return RETRYFOLD;
2143     } else {
2144       return DROPFOLD;
2145     }
2146   }
2147   return NEXTFOLD;
2148 }
2149 
2150 /* -- Loads --------------------------------------------------------------- */
2151 
2152 /* Loads cannot be folded or passed on to CSE in general.
2153 ** Alias analysis is needed to check for forwarding opportunities.
2154 **
2155 ** Caveat: *all* loads must be listed here or they end up at CSE!
2156 */
2157 
2158 LJFOLD(ALOAD any)
LJFOLDX(lj_opt_fwd_aload)2159 LJFOLDX(lj_opt_fwd_aload)
2160 
2161 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
2162 LJFOLD(HLOAD KKPTR)
2163 LJFOLDF(kfold_hload_kkptr)
2164 {
2165   UNUSED(J);
2166   lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
2167   return TREF_NIL;
2168 }
2169 
2170 LJFOLD(HLOAD any)
LJFOLDX(lj_opt_fwd_hload)2171 LJFOLDX(lj_opt_fwd_hload)
2172 
2173 LJFOLD(ULOAD any)
2174 LJFOLDX(lj_opt_fwd_uload)
2175 
2176 LJFOLD(ALEN any any)
2177 LJFOLDX(lj_opt_fwd_alen)
2178 
2179 /* Upvalue refs are really loads, but there are no corresponding stores.
2180 ** So CSE is ok for them, except for UREFO across a GC step (see below).
2181 ** If the referenced function is const, its upvalue addresses are const, too.
2182 ** This can be used to improve CSE by looking for the same address,
2183 ** even if the upvalues originate from a different function.
2184 */
2185 LJFOLD(UREFO KGC any)
2186 LJFOLD(UREFC KGC any)
2187 LJFOLDF(cse_uref)
2188 {
2189   if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2190     IRRef ref = J->chain[fins->o];
2191     GCfunc *fn = ir_kfunc(fleft);
2192     GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
2193     while (ref > 0) {
2194       IRIns *ir = IR(ref);
2195       if (irref_isk(ir->op1)) {
2196 	GCfunc *fn2 = ir_kfunc(IR(ir->op1));
2197 	if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
2198 	  if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
2199 	    break;
2200 	  return ref;
2201 	}
2202       }
2203       ref = ir->prev;
2204     }
2205   }
2206   return EMITFOLD;
2207 }
2208 
2209 LJFOLD(HREFK any any)
LJFOLDX(lj_opt_fwd_hrefk)2210 LJFOLDX(lj_opt_fwd_hrefk)
2211 
2212 LJFOLD(HREF TNEW any)
2213 LJFOLDF(fwd_href_tnew)
2214 {
2215   if (lj_opt_fwd_href_nokey(J))
2216     return lj_ir_kkptr(J, niltvg(J2G(J)));
2217   return NEXTFOLD;
2218 }
2219 
2220 LJFOLD(HREF TDUP KPRI)
LJFOLD(HREF TDUP KGC)2221 LJFOLD(HREF TDUP KGC)
2222 LJFOLD(HREF TDUP KNUM)
2223 LJFOLDF(fwd_href_tdup)
2224 {
2225   TValue keyv;
2226   lj_ir_kvalue(J->L, &keyv, fright);
2227   if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
2228       lj_opt_fwd_href_nokey(J))
2229     return lj_ir_kkptr(J, niltvg(J2G(J)));
2230   return NEXTFOLD;
2231 }
2232 
2233 /* We can safely FOLD/CSE array/hash refs and field loads, since there
2234 ** are no corresponding stores. But we need to check for any NEWREF with
2235 ** an aliased table, as it may invalidate all of the pointers and fields.
2236 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
2237 ** FLOADs. And NEWREF itself is treated like a store (see below).
2238 ** LREF is constant (per trace) since coroutine switches are not inlined.
2239 */
2240 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
LJFOLDF(fload_tab_tnew_asize)2241 LJFOLDF(fload_tab_tnew_asize)
2242 {
2243   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2244     return INTFOLD(fleft->op1);
2245   return NEXTFOLD;
2246 }
2247 
2248 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
LJFOLDF(fload_tab_tnew_hmask)2249 LJFOLDF(fload_tab_tnew_hmask)
2250 {
2251   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2252     return INTFOLD((1 << fleft->op2)-1);
2253   return NEXTFOLD;
2254 }
2255 
2256 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
LJFOLDF(fload_tab_tdup_asize)2257 LJFOLDF(fload_tab_tdup_asize)
2258 {
2259   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2260     return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
2261   return NEXTFOLD;
2262 }
2263 
2264 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
LJFOLDF(fload_tab_tdup_hmask)2265 LJFOLDF(fload_tab_tdup_hmask)
2266 {
2267   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2268     return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
2269   return NEXTFOLD;
2270 }
2271 
2272 LJFOLD(HREF any any)
LJFOLD(FLOAD any IRFL_TAB_ARRAY)2273 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
2274 LJFOLD(FLOAD any IRFL_TAB_NODE)
2275 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
2276 LJFOLD(FLOAD any IRFL_TAB_HMASK)
2277 LJFOLDF(fload_tab_ah)
2278 {
2279   TRef tr = lj_opt_cse(J);
2280   return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
2281 }
2282 
2283 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2284 LJFOLD(FLOAD KGC IRFL_STR_LEN)
LJFOLDF(fload_str_len_kgc)2285 LJFOLDF(fload_str_len_kgc)
2286 {
2287   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2288     return INTFOLD((int32_t)ir_kstr(fleft)->len);
2289   return NEXTFOLD;
2290 }
2291 
2292 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
LJFOLDF(fload_str_len_snew)2293 LJFOLDF(fload_str_len_snew)
2294 {
2295   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2296     PHIBARRIER(fleft);
2297     return fleft->op2;
2298   }
2299   return NEXTFOLD;
2300 }
2301 
2302 LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
LJFOLDF(fload_str_len_tostr)2303 LJFOLDF(fload_str_len_tostr)
2304 {
2305   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2306     return INTFOLD(1);
2307   return NEXTFOLD;
2308 }
2309 
2310 LJFOLD(FLOAD any IRFL_SBUF_W)
LJFOLD(FLOAD any IRFL_SBUF_E)2311 LJFOLD(FLOAD any IRFL_SBUF_E)
2312 LJFOLD(FLOAD any IRFL_SBUF_B)
2313 LJFOLD(FLOAD any IRFL_SBUF_L)
2314 LJFOLD(FLOAD any IRFL_SBUF_REF)
2315 LJFOLD(FLOAD any IRFL_SBUF_R)
2316 LJFOLDF(fload_sbuf)
2317 {
2318   TRef tr = lj_opt_fwd_fload(J);
2319   return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2320 }
2321 
2322 /* The fast function ID of function objects is immutable. */
2323 LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
LJFOLDF(fload_func_ffid_kgc)2324 LJFOLDF(fload_func_ffid_kgc)
2325 {
2326   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2327     return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
2328   return NEXTFOLD;
2329 }
2330 
2331 /* The C type ID of cdata objects is immutable. */
2332 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
LJFOLDF(fload_cdata_typeid_kgc)2333 LJFOLDF(fload_cdata_typeid_kgc)
2334 {
2335   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2336     return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2337   return NEXTFOLD;
2338 }
2339 
2340 /* Get the contents of immutable cdata objects. */
2341 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
LJFOLD(FLOAD KGC IRFL_CDATA_INT)2342 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2343 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2344 LJFOLDF(fload_cdata_int64_kgc)
2345 {
2346   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2347     void *p = cdataptr(ir_kcdata(fleft));
2348     if (irt_is64(fins->t))
2349       return INT64FOLD(*(uint64_t *)p);
2350     else
2351       return INTFOLD(*(int32_t *)p);
2352   }
2353   return NEXTFOLD;
2354 }
2355 
2356 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)2357 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2358 LJFOLDF(fload_cdata_typeid_cnew)
2359 {
2360   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2361     return fleft->op1;  /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2362   return NEXTFOLD;
2363 }
2364 
2365 /* Pointer, int and int64 cdata objects are immutable. */
2366 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)2367 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2368 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2369 LJFOLDF(fload_cdata_ptr_int64_cnew)
2370 {
2371   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2372     return fleft->op2;  /* Fold even across PHI to avoid allocations. */
2373   return NEXTFOLD;
2374 }
2375 
2376 LJFOLD(FLOAD any IRFL_STR_LEN)
LJFOLD(FLOAD any IRFL_FUNC_ENV)2377 LJFOLD(FLOAD any IRFL_FUNC_ENV)
2378 LJFOLD(FLOAD any IRFL_THREAD_ENV)
2379 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2380 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2381 LJFOLD(FLOAD any IRFL_CDATA_INT)
2382 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2383 LJFOLD(VLOAD any any)  /* Vararg loads have no corresponding stores. */
2384 LJFOLDX(lj_opt_cse)
2385 
2386 /* All other field loads need alias analysis. */
2387 LJFOLD(FLOAD any any)
2388 LJFOLDX(lj_opt_fwd_fload)
2389 
2390 /* This is for LOOP only. Recording handles SLOADs internally. */
2391 LJFOLD(SLOAD any any)
2392 LJFOLDF(fwd_sload)
2393 {
2394   if ((fins->op2 & IRSLOAD_FRAME)) {
2395     TRef tr = lj_opt_cse(J);
2396     return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2397   } else {
2398     lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2399     return J->slot[fins->op1];
2400   }
2401 }
2402 
2403 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2404 LJFOLD(XLOAD KKPTR any)
LJFOLDF(xload_kptr)2405 LJFOLDF(xload_kptr)
2406 {
2407   TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2408   return tr ? tr : NEXTFOLD;
2409 }
2410 
2411 LJFOLD(XLOAD any any)
LJFOLDX(lj_opt_fwd_xload)2412 LJFOLDX(lj_opt_fwd_xload)
2413 
2414 /* -- Frame handling ------------------------------------------------------ */
2415 
2416 /* Prevent CSE of a REF_BASE operand across IR_RETF. */
2417 LJFOLD(SUB any BASE)
2418 LJFOLD(SUB BASE any)
2419 LJFOLD(EQ any BASE)
2420 LJFOLDF(fold_base)
2421 {
2422   return lj_opt_cselim(J, J->chain[IR_RETF]);
2423 }
2424 
2425 /* -- Write barriers ------------------------------------------------------ */
2426 
2427 /* Write barriers are amenable to CSE, but not across any incremental
2428 ** GC steps.
2429 **
2430 ** The same logic applies to open upvalue references, because a stack
2431 ** may be resized during a GC step (not the current stack, but maybe that
2432 ** of a coroutine).
2433 */
2434 LJFOLD(TBAR any)
LJFOLD(OBAR any any)2435 LJFOLD(OBAR any any)
2436 LJFOLD(UREFO any any)
2437 LJFOLDF(barrier_tab)
2438 {
2439   TRef tr = lj_opt_cse(J);
2440   if (gcstep_barrier(J, tref_ref(tr)))  /* CSE across GC step? */
2441     return EMITFOLD;  /* Raw emit. Assumes fins is left intact by CSE. */
2442   return tr;
2443 }
2444 
2445 LJFOLD(TBAR TNEW)
LJFOLD(TBAR TDUP)2446 LJFOLD(TBAR TDUP)
2447 LJFOLDF(barrier_tnew_tdup)
2448 {
2449   /* New tables are always white and never need a barrier. */
2450   if (fins->op1 < J->chain[IR_LOOP])  /* Except across a GC step. */
2451     return NEXTFOLD;
2452   return DROPFOLD;
2453 }
2454 
2455 /* -- Profiling ----------------------------------------------------------- */
2456 
2457 LJFOLD(PROF any any)
LJFOLDF(prof)2458 LJFOLDF(prof)
2459 {
2460   IRRef ref = J->chain[IR_PROF];
2461   if (ref+1 == J->cur.nins)  /* Drop neighbouring IR_PROF. */
2462     return ref;
2463   return EMITFOLD;
2464 }
2465 
2466 /* -- Stores and allocations ---------------------------------------------- */
2467 
2468 /* Stores and allocations cannot be folded or passed on to CSE in general.
2469 ** But some stores can be eliminated with dead-store elimination (DSE).
2470 **
2471 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2472 */
2473 
2474 LJFOLD(ASTORE any any)
LJFOLD(HSTORE any any)2475 LJFOLD(HSTORE any any)
2476 LJFOLDX(lj_opt_dse_ahstore)
2477 
2478 LJFOLD(USTORE any any)
2479 LJFOLDX(lj_opt_dse_ustore)
2480 
2481 LJFOLD(FSTORE any any)
2482 LJFOLDX(lj_opt_dse_fstore)
2483 
2484 LJFOLD(XSTORE any any)
2485 LJFOLDX(lj_opt_dse_xstore)
2486 
2487 LJFOLD(NEWREF any any)  /* Treated like a store. */
2488 LJFOLD(TMPREF any any)
2489 LJFOLD(CALLA any any)
2490 LJFOLD(CALLL any any)  /* Safeguard fallback. */
2491 LJFOLD(CALLS any any)
2492 LJFOLD(CALLXS any any)
2493 LJFOLD(XBAR)
2494 LJFOLD(RETF any any)  /* Modifies BASE. */
2495 LJFOLD(TNEW any any)
2496 LJFOLD(TDUP any)
2497 LJFOLD(CNEW any any)
2498 LJFOLD(XSNEW any any)
2499 LJFOLDX(lj_ir_emit)
2500 
2501 /* ------------------------------------------------------------------------ */
2502 
2503 /* Every entry in the generated hash table is a 32 bit pattern:
2504 **
2505 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2506 **
2507 **   xxxxxxxx = 8 bit index into fold function table
2508 **    iiiiiii = 7 bit folded instruction opcode
2509 **    lllllll = 7 bit left instruction opcode
2510 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2511 */
2512 
2513 #include "lj_folddef.h"
2514 
2515 /* ------------------------------------------------------------------------ */
2516 
2517 /* Fold IR instruction. */
2518 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2519 {
2520   uint32_t key, any;
2521   IRRef ref;
2522 
2523   if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2524     lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2525 		JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2526 	       "bad JIT_F_OPT_DEFAULT");
2527     /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2528     if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2529       return lj_opt_cse(J);
2530 
2531     /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2532     if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2533 		    (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2534 	irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2535       return lj_ir_emit(J);
2536 
2537     /* No FOLD or DSE? Emit raw IR for stores. */
2538     if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2539 		    (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2540 	irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2541       return lj_ir_emit(J);
2542   }
2543 
2544   /* Fold engine start/retry point. */
2545 retry:
2546   /* Construct key from opcode and operand opcodes (unless literal/none). */
2547   key = ((uint32_t)fins->o << 17);
2548   if (fins->op1 >= J->cur.nk) {
2549     key += (uint32_t)IR(fins->op1)->o << 10;
2550     *fleft = *IR(fins->op1);
2551     if (fins->op1 < REF_TRUE)
2552       fleft[1] = IR(fins->op1)[1];
2553   }
2554   if (fins->op2 >= J->cur.nk) {
2555     key += (uint32_t)IR(fins->op2)->o;
2556     *fright = *IR(fins->op2);
2557     if (fins->op2 < REF_TRUE)
2558       fright[1] = IR(fins->op2)[1];
2559   } else {
2560     key += (fins->op2 & 0x3ffu);  /* Literal mask. Must include IRCONV_*MASK. */
2561   }
2562 
2563   /* Check for a match in order from most specific to least specific. */
2564   any = 0;
2565   for (;;) {
2566     uint32_t k = key | (any & 0x1ffff);
2567     uint32_t h = fold_hashkey(k);
2568     uint32_t fh = fold_hash[h];  /* Lookup key in semi-perfect hash table. */
2569     if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2570       ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2571       if (ref != NEXTFOLD)
2572 	break;
2573     }
2574     if (any == 0xfffff)  /* Exhausted folding. Pass on to CSE. */
2575       return lj_opt_cse(J);
2576     any = (any | (any >> 10)) ^ 0xffc00;
2577   }
2578 
2579   /* Return value processing, ordered by frequency. */
2580   if (LJ_LIKELY(ref >= MAX_FOLD))
2581     return TREF(ref, irt_t(IR(ref)->t));
2582   if (ref == RETRYFOLD)
2583     goto retry;
2584   if (ref == KINTFOLD)
2585     return lj_ir_kint(J, fins->i);
2586   if (ref == FAILFOLD)
2587     lj_trace_err(J, LJ_TRERR_GFAIL);
2588   lj_assertJ(ref == DROPFOLD, "bad fold result");
2589   return REF_DROP;
2590 }
2591 
2592 /* -- Common-Subexpression Elimination ------------------------------------ */
2593 
2594 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
lj_opt_cse(jit_State * J)2595 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2596 {
2597   /* Avoid narrow to wide store-to-load forwarding stall */
2598   IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2599   IROp op = fins->o;
2600   if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2601     /* Limited search for same operands in per-opcode chain. */
2602     IRRef ref = J->chain[op];
2603     IRRef lim = fins->op1;
2604     if (fins->op2 > lim) lim = fins->op2;  /* Relies on lit < REF_BIAS. */
2605     while (ref > lim) {
2606       if (IR(ref)->op12 == op12)
2607 	return TREF(ref, irt_t(IR(ref)->t));  /* Common subexpression found. */
2608       ref = IR(ref)->prev;
2609     }
2610   }
2611   /* Otherwise emit IR (inlined for speed). */
2612   {
2613     IRRef ref = lj_ir_nextins(J);
2614     IRIns *ir = IR(ref);
2615     ir->prev = J->chain[op];
2616     ir->op12 = op12;
2617     J->chain[op] = (IRRef1)ref;
2618     ir->o = fins->o;
2619     J->guardemit.irt |= fins->t.irt;
2620     return TREF(ref, irt_t((ir->t = fins->t)));
2621   }
2622 }
2623 
2624 /* CSE with explicit search limit. */
lj_opt_cselim(jit_State * J,IRRef lim)2625 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2626 {
2627   IRRef ref = J->chain[fins->o];
2628   IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2629   while (ref > lim) {
2630     if (IR(ref)->op12 == op12)
2631       return ref;
2632     ref = IR(ref)->prev;
2633   }
2634   return lj_ir_emit(J);
2635 }
2636 
2637 /* ------------------------------------------------------------------------ */
2638 
2639 #undef IR
2640 #undef fins
2641 #undef fleft
2642 #undef fright
2643 #undef knumleft
2644 #undef knumright
2645 #undef emitir
2646 
2647 #endif
2648