1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
6 */
7
8 #define lj_opt_fold_c
9 #define LUA_CORE
10
11 #include <math.h>
12
13 #include "lj_obj.h"
14
15 #if LJ_HASJIT
16
17 #include "lj_buf.h"
18 #include "lj_str.h"
19 #include "lj_tab.h"
20 #include "lj_ir.h"
21 #include "lj_jit.h"
22 #include "lj_ircall.h"
23 #include "lj_iropt.h"
24 #include "lj_trace.h"
25 #if LJ_HASFFI
26 #include "lj_ctype.h"
27 #include "lj_carith.h"
28 #endif
29 #include "lj_vm.h"
30 #include "lj_strscan.h"
31 #include "lj_strfmt.h"
32
33 /* Here's a short description how the FOLD engine processes instructions:
34 **
35 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
36 ** The instruction and its operands are used to select matching fold rules.
37 ** These are applied iteratively until a fixed point is reached.
38 **
39 ** The 8 bit opcode of the instruction itself plus the opcodes of the
40 ** two instructions referenced by its operands form a 24 bit key
41 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
42 **
43 ** This key is used for partial matching against the fold rules. The
44 ** left/right operand fields of the key are successively masked with
45 ** the 'any' wildcard, from most specific to least specific:
46 **
47 ** ins left right
48 ** ins any right
49 ** ins left any
50 ** ins any any
51 **
52 ** The masked key is used to lookup a matching fold rule in a semi-perfect
53 ** hash table. If a matching rule is found, the related fold function is run.
54 ** Multiple rules can share the same fold function. A fold rule may return
55 ** one of several special values:
56 **
57 ** - NEXTFOLD means no folding was applied, because an additional test
58 ** inside the fold function failed. Matching continues against less
59 ** specific fold rules. Finally the instruction is passed on to CSE.
60 **
61 ** - RETRYFOLD means the instruction was modified in-place. Folding is
62 ** retried as if this instruction had just been received.
63 **
64 ** All other return values are terminal actions -- no further folding is
65 ** applied:
66 **
67 ** - INTFOLD(i) returns a reference to the integer constant i.
68 **
69 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
70 ** without emitting an instruction.
71 **
72 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
73 ** it without passing through any further optimizations.
74 **
75 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
76 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
77 ** always fail, i.e. the current trace is pointless. DROPFOLD means
78 ** the guard is always true and has been eliminated. CONDFOLD is a
79 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
80 **
81 ** - Any other return value is interpreted as an IRRef or TRef. This
82 ** can be a reference to an existing or a newly created instruction.
83 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
84 ** which is finally returned to the caller.
85 **
86 ** The FOLD engine receives instructions both from the trace recorder and
87 ** substituted instructions from LOOP unrolling. This means all types
88 ** of instructions may end up here, even though the recorder bypasses
89 ** FOLD in some cases. Thus all loads, stores and allocations must have
90 ** an any/any rule to avoid being passed on to CSE.
91 **
92 ** Carefully read the following requirements before adding or modifying
93 ** any fold rules:
94 **
95 ** Requirement #1: All fold rules must preserve their destination type.
96 **
97 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
98 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
99 **
100 ** Requirement #2: Fold rules should not create *new* instructions which
101 ** reference operands *across* PHIs.
102 **
103 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
104 ** left operand is a PHI. Then fleft->op1 would point across the PHI
105 ** frontier to an invariant instruction. Adding a PHI for this instruction
106 ** would be counterproductive. The solution is to add a barrier which
107 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
108 ** The only exception is for recurrences with high latencies like
109 ** repeated int->num->int conversions.
110 **
111 ** One could relax this condition a bit if the referenced instruction is
112 ** a PHI, too. But this often leads to worse code due to excessive
113 ** register shuffling.
114 **
115 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
116 ** Even returning fleft->op1 would be ok, because a new PHI will added,
117 ** if needed. But again, this leads to excessive register shuffling and
118 ** should be avoided.
119 **
120 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
121 ** termination.
122 **
123 ** The goal is optimization, so one primarily wants to add strength-reducing
124 ** rules. This means eliminating an instruction or replacing an instruction
125 ** with one or more simpler instructions. Don't add fold rules which point
126 ** into the other direction.
127 **
128 ** Some rules (like commutativity) do not directly reduce the strength of
129 ** an instruction, but enable other fold rules (e.g. by moving constants
130 ** to the right operand). These rules must be made unidirectional to avoid
131 ** cycles.
132 **
133 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
134 */
135
136 /* Some local macros to save typing. Undef'd at the end. */
137 #define IR(ref) (&J->cur.ir[(ref)])
138 #define fins (&J->fold.ins)
139 #define fleft (J->fold.left)
140 #define fright (J->fold.right)
141 #define knumleft (ir_knum(fleft)->n)
142 #define knumright (ir_knum(fright)->n)
143
144 /* Pass IR on to next optimization in chain (FOLD). */
145 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
146
147 /* Fold function type. Fastcall on x86 significantly reduces their size. */
148 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
149
150 /* Macros for the fold specs, so buildvm can recognize them. */
151 #define LJFOLD(x)
152 #define LJFOLDX(x)
153 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
154 /* Note: They must be at the start of a line or buildvm ignores them! */
155
156 /* Barrier to prevent using operands across PHIs. */
157 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
158
159 /* Barrier to prevent folding across a GC step.
160 ** GC steps can only happen at the head of a trace and at LOOP.
161 ** And the GC is only driven forward if there's at least one allocation.
162 */
163 #define gcstep_barrier(J, ref) \
164 ((ref) < J->chain[IR_LOOP] && \
165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
169
170 /* -- Constant folding for FP numbers ------------------------------------- */
171
172 LJFOLD(ADD KNUM KNUM)
LJFOLD(SUB KNUM KNUM)173 LJFOLD(SUB KNUM KNUM)
174 LJFOLD(MUL KNUM KNUM)
175 LJFOLD(DIV KNUM KNUM)
176 LJFOLD(LDEXP KNUM KNUM)
177 LJFOLD(MIN KNUM KNUM)
178 LJFOLD(MAX KNUM KNUM)
179 LJFOLDF(kfold_numarith)
180 {
181 lua_Number a = knumleft;
182 lua_Number b = knumright;
183 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
184 return lj_ir_knum(J, y);
185 }
186
187 LJFOLD(NEG KNUM FLOAD)
LJFOLD(ABS KNUM FLOAD)188 LJFOLD(ABS KNUM FLOAD)
189 LJFOLDF(kfold_numabsneg)
190 {
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
194 }
195
196 LJFOLD(LDEXP KNUM KINT)
LJFOLDF(kfold_ldexp)197 LJFOLDF(kfold_ldexp)
198 {
199 #if LJ_TARGET_X86ORX64
200 UNUSED(J);
201 return NEXTFOLD;
202 #else
203 return lj_ir_knum(J, ldexp(knumleft, fright->i));
204 #endif
205 }
206
207 LJFOLD(FPMATH KNUM any)
LJFOLDF(kfold_fpmath)208 LJFOLDF(kfold_fpmath)
209 {
210 lua_Number a = knumleft;
211 lua_Number y = lj_vm_foldfpm(a, fins->op2);
212 return lj_ir_knum(J, y);
213 }
214
215 LJFOLD(CALLN KNUM any)
LJFOLDF(kfold_fpcall1)216 LJFOLDF(kfold_fpcall1)
217 {
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
222 }
223 return NEXTFOLD;
224 }
225
226 LJFOLD(CALLN CARG IRCALL_atan2)
LJFOLDF(kfold_fpcall2)227 LJFOLDF(kfold_fpcall2)
228 {
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
235 }
236 return NEXTFOLD;
237 }
238
239 LJFOLD(POW KNUM KINT)
LJFOLD(POW KNUM KNUM)240 LJFOLD(POW KNUM KNUM)
241 LJFOLDF(kfold_numpow)
242 {
243 lua_Number a = knumleft;
244 lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
245 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
246 return lj_ir_knum(J, y);
247 }
248
249 /* Must not use kfold_kref for numbers (could be NaN). */
250 LJFOLD(EQ KNUM KNUM)
LJFOLD(NE KNUM KNUM)251 LJFOLD(NE KNUM KNUM)
252 LJFOLD(LT KNUM KNUM)
253 LJFOLD(GE KNUM KNUM)
254 LJFOLD(LE KNUM KNUM)
255 LJFOLD(GT KNUM KNUM)
256 LJFOLD(ULT KNUM KNUM)
257 LJFOLD(UGE KNUM KNUM)
258 LJFOLD(ULE KNUM KNUM)
259 LJFOLD(UGT KNUM KNUM)
260 LJFOLDF(kfold_numcomp)
261 {
262 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
263 }
264
265 /* -- Constant folding for 32 bit integers -------------------------------- */
266
kfold_intop(int32_t k1,int32_t k2,IROp op)267 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
268 {
269 switch (op) {
270 case IR_ADD: k1 += k2; break;
271 case IR_SUB: k1 -= k2; break;
272 case IR_MUL: k1 *= k2; break;
273 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
274 case IR_NEG: k1 = -k1; break;
275 case IR_BAND: k1 &= k2; break;
276 case IR_BOR: k1 |= k2; break;
277 case IR_BXOR: k1 ^= k2; break;
278 case IR_BSHL: k1 <<= (k2 & 31); break;
279 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
280 case IR_BSAR: k1 >>= (k2 & 31); break;
281 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
282 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
283 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
284 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
285 default: lj_assertX(0, "bad IR op %d", op); break;
286 }
287 return k1;
288 }
289
290 LJFOLD(ADD KINT KINT)
LJFOLD(SUB KINT KINT)291 LJFOLD(SUB KINT KINT)
292 LJFOLD(MUL KINT KINT)
293 LJFOLD(MOD KINT KINT)
294 LJFOLD(NEG KINT KINT)
295 LJFOLD(BAND KINT KINT)
296 LJFOLD(BOR KINT KINT)
297 LJFOLD(BXOR KINT KINT)
298 LJFOLD(BSHL KINT KINT)
299 LJFOLD(BSHR KINT KINT)
300 LJFOLD(BSAR KINT KINT)
301 LJFOLD(BROL KINT KINT)
302 LJFOLD(BROR KINT KINT)
303 LJFOLD(MIN KINT KINT)
304 LJFOLD(MAX KINT KINT)
305 LJFOLDF(kfold_intarith)
306 {
307 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
308 }
309
310 LJFOLD(ADDOV KINT KINT)
LJFOLD(SUBOV KINT KINT)311 LJFOLD(SUBOV KINT KINT)
312 LJFOLD(MULOV KINT KINT)
313 LJFOLDF(kfold_intovarith)
314 {
315 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
316 fins->o - IR_ADDOV);
317 int32_t k = lj_num2int(n);
318 if (n != (lua_Number)k)
319 return FAILFOLD;
320 return INTFOLD(k);
321 }
322
323 LJFOLD(BNOT KINT)
LJFOLDF(kfold_bnot)324 LJFOLDF(kfold_bnot)
325 {
326 return INTFOLD(~fleft->i);
327 }
328
329 LJFOLD(BSWAP KINT)
LJFOLDF(kfold_bswap)330 LJFOLDF(kfold_bswap)
331 {
332 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
333 }
334
335 LJFOLD(LT KINT KINT)
LJFOLD(GE KINT KINT)336 LJFOLD(GE KINT KINT)
337 LJFOLD(LE KINT KINT)
338 LJFOLD(GT KINT KINT)
339 LJFOLD(ULT KINT KINT)
340 LJFOLD(UGE KINT KINT)
341 LJFOLD(ULE KINT KINT)
342 LJFOLD(UGT KINT KINT)
343 LJFOLD(ABC KINT KINT)
344 LJFOLDF(kfold_intcomp)
345 {
346 int32_t a = fleft->i, b = fright->i;
347 switch ((IROp)fins->o) {
348 case IR_LT: return CONDFOLD(a < b);
349 case IR_GE: return CONDFOLD(a >= b);
350 case IR_LE: return CONDFOLD(a <= b);
351 case IR_GT: return CONDFOLD(a > b);
352 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
353 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
354 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
355 case IR_ABC:
356 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
357 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
358 }
359 }
360
361 LJFOLD(UGE any KINT)
LJFOLDF(kfold_intcomp0)362 LJFOLDF(kfold_intcomp0)
363 {
364 if (fright->i == 0)
365 return DROPFOLD;
366 return NEXTFOLD;
367 }
368
369 /* -- Constant folding for 64 bit integers -------------------------------- */
370
kfold_int64arith(jit_State * J,uint64_t k1,uint64_t k2,IROp op)371 static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
372 IROp op)
373 {
374 UNUSED(J);
375 #if LJ_HASFFI
376 switch (op) {
377 case IR_ADD: k1 += k2; break;
378 case IR_SUB: k1 -= k2; break;
379 case IR_MUL: k1 *= k2; break;
380 case IR_BAND: k1 &= k2; break;
381 case IR_BOR: k1 |= k2; break;
382 case IR_BXOR: k1 ^= k2; break;
383 case IR_BSHL: k1 <<= (k2 & 63); break;
384 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
385 case IR_BSAR: k1 >>= (k2 & 63); break;
386 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
387 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
388 default: lj_assertJ(0, "bad IR op %d", op); break;
389 }
390 #else
391 UNUSED(k2); UNUSED(op);
392 lj_assertJ(0, "FFI IR op without FFI");
393 #endif
394 return k1;
395 }
396
397 LJFOLD(ADD KINT64 KINT64)
LJFOLD(SUB KINT64 KINT64)398 LJFOLD(SUB KINT64 KINT64)
399 LJFOLD(MUL KINT64 KINT64)
400 LJFOLD(BAND KINT64 KINT64)
401 LJFOLD(BOR KINT64 KINT64)
402 LJFOLD(BXOR KINT64 KINT64)
403 LJFOLDF(kfold_int64arith)
404 {
405 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
406 ir_k64(fright)->u64, (IROp)fins->o));
407 }
408
409 LJFOLD(DIV KINT64 KINT64)
LJFOLD(MOD KINT64 KINT64)410 LJFOLD(MOD KINT64 KINT64)
411 LJFOLD(POW KINT64 KINT64)
412 LJFOLDF(kfold_int64arith2)
413 {
414 #if LJ_HASFFI
415 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
416 if (irt_isi64(fins->t)) {
417 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
418 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
419 lj_carith_powi64((int64_t)k1, (int64_t)k2);
420 } else {
421 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
422 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
423 lj_carith_powu64(k1, k2);
424 }
425 return INT64FOLD(k1);
426 #else
427 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
428 #endif
429 }
430
431 LJFOLD(BSHL KINT64 KINT)
LJFOLD(BSHR KINT64 KINT)432 LJFOLD(BSHR KINT64 KINT)
433 LJFOLD(BSAR KINT64 KINT)
434 LJFOLD(BROL KINT64 KINT)
435 LJFOLD(BROR KINT64 KINT)
436 LJFOLDF(kfold_int64shift)
437 {
438 #if LJ_HASFFI
439 uint64_t k = ir_k64(fleft)->u64;
440 int32_t sh = (fright->i & 63);
441 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
442 #else
443 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
444 #endif
445 }
446
447 LJFOLD(BNOT KINT64)
LJFOLDF(kfold_bnot64)448 LJFOLDF(kfold_bnot64)
449 {
450 #if LJ_HASFFI
451 return INT64FOLD(~ir_k64(fleft)->u64);
452 #else
453 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
454 #endif
455 }
456
457 LJFOLD(BSWAP KINT64)
LJFOLDF(kfold_bswap64)458 LJFOLDF(kfold_bswap64)
459 {
460 #if LJ_HASFFI
461 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
462 #else
463 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
464 #endif
465 }
466
467 LJFOLD(LT KINT64 KINT64)
LJFOLD(GE KINT64 KINT64)468 LJFOLD(GE KINT64 KINT64)
469 LJFOLD(LE KINT64 KINT64)
470 LJFOLD(GT KINT64 KINT64)
471 LJFOLD(ULT KINT64 KINT64)
472 LJFOLD(UGE KINT64 KINT64)
473 LJFOLD(ULE KINT64 KINT64)
474 LJFOLD(UGT KINT64 KINT64)
475 LJFOLDF(kfold_int64comp)
476 {
477 #if LJ_HASFFI
478 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
479 switch ((IROp)fins->o) {
480 case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b);
481 case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b);
482 case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b);
483 case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b);
484 case IR_ULT: return CONDFOLD(a < b);
485 case IR_UGE: return CONDFOLD(a >= b);
486 case IR_ULE: return CONDFOLD(a <= b);
487 case IR_UGT: return CONDFOLD(a > b);
488 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
489 }
490 #else
491 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
492 #endif
493 }
494
495 LJFOLD(UGE any KINT64)
LJFOLDF(kfold_int64comp0)496 LJFOLDF(kfold_int64comp0)
497 {
498 #if LJ_HASFFI
499 if (ir_k64(fright)->u64 == 0)
500 return DROPFOLD;
501 return NEXTFOLD;
502 #else
503 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
504 #endif
505 }
506
507 /* -- Constant folding for strings ---------------------------------------- */
508
509 LJFOLD(SNEW KKPTR KINT)
LJFOLDF(kfold_snew_kptr)510 LJFOLDF(kfold_snew_kptr)
511 {
512 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
513 return lj_ir_kstr(J, s);
514 }
515
516 LJFOLD(SNEW any KINT)
LJFOLD(XSNEW any KINT)517 LJFOLD(XSNEW any KINT)
518 LJFOLDF(kfold_snew_empty)
519 {
520 if (fright->i == 0)
521 return lj_ir_kstr(J, &J2G(J)->strempty);
522 return NEXTFOLD;
523 }
524
525 LJFOLD(STRREF KGC KINT)
LJFOLDF(kfold_strref)526 LJFOLDF(kfold_strref)
527 {
528 GCstr *str = ir_kstr(fleft);
529 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
530 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
531 }
532
533 LJFOLD(STRREF SNEW any)
LJFOLDF(kfold_strref_snew)534 LJFOLDF(kfold_strref_snew)
535 {
536 PHIBARRIER(fleft);
537 if (irref_isk(fins->op2) && fright->i == 0) {
538 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
539 } else {
540 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
541 IRIns *ir = IR(fleft->op1);
542 if (ir->o == IR_STRREF) {
543 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
544 PHIBARRIER(ir);
545 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
546 fins->op1 = str;
547 fins->ot = IRT(IR_STRREF, IRT_PGC);
548 return RETRYFOLD;
549 }
550 }
551 return NEXTFOLD;
552 }
553
554 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
LJFOLDF(kfold_strcmp)555 LJFOLDF(kfold_strcmp)
556 {
557 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
558 GCstr *a = ir_kstr(IR(fleft->op1));
559 GCstr *b = ir_kstr(IR(fleft->op2));
560 return INTFOLD(lj_str_cmp(a, b));
561 }
562 return NEXTFOLD;
563 }
564
565 /* -- Constant folding and forwarding for buffers ------------------------- */
566
567 /*
568 ** Buffer ops perform stores, but their effect is limited to the buffer
569 ** itself. Also, buffer ops are chained: a use of an op implies a use of
570 ** all other ops up the chain. Conversely, if an op is unused, all ops
571 ** up the chain can go unsed. This largely eliminates the need to treat
572 ** them as stores.
573 **
574 ** Alas, treating them as normal (IRM_N) ops doesn't work, because they
575 ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
576 ** or if FOLD is disabled.
577 **
578 ** The compromise is to declare them as loads, emit them like stores and
579 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
580 ** fragments left over from CSE are eliminated by DCE.
581 **
582 ** The string buffer methods emit a USE instead of a BUFSTR to keep the
583 ** chain alive.
584 */
585
586 LJFOLD(BUFHDR any any)
LJFOLDF(bufhdr_merge)587 LJFOLDF(bufhdr_merge)
588 {
589 return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
590 }
591
592 LJFOLD(BUFPUT any BUFSTR)
LJFOLDF(bufput_bufstr)593 LJFOLDF(bufput_bufstr)
594 {
595 if ((J->flags & JIT_F_OPT_FWD)) {
596 IRRef hdr = fright->op2;
597 /* New buffer, no other buffer op inbetween and same buffer? */
598 if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
599 fleft->prev == hdr &&
600 fleft->op1 == IR(hdr)->op1) {
601 IRRef ref = fins->op1;
602 IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
603 IR(ref)->op1 = fright->op1;
604 return ref;
605 }
606 /* Replay puts to global temporary buffer. */
607 if (IR(hdr)->op2 == IRBUFHDR_RESET) {
608 IRIns *ir = IR(fright->op1);
609 /* For now only handle single string.reverse .lower .upper .rep. */
610 if (ir->o == IR_CALLL &&
611 ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
612 ir->op2 <= IRCALL_lj_buf_putstr_rep) {
613 IRIns *carg1 = IR(ir->op1);
614 if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
615 IRIns *carg2 = IR(carg1->op1);
616 if (carg2->op1 == hdr) {
617 return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
618 }
619 } else if (carg1->op1 == hdr) {
620 return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
621 }
622 }
623 }
624 }
625 return EMITFOLD; /* Always emit, CSE later. */
626 }
627
628 LJFOLD(BUFPUT any any)
LJFOLDF(bufput_kgc)629 LJFOLDF(bufput_kgc)
630 {
631 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
632 GCstr *s2 = ir_kstr(fright);
633 if (s2->len == 0) { /* Empty string? */
634 return LEFTFOLD;
635 } else {
636 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
637 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
638 GCstr *s1 = ir_kstr(IR(fleft->op2));
639 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
640 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
641 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
642 return fins->op1;
643 }
644 }
645 }
646 return EMITFOLD; /* Always emit, CSE later. */
647 }
648
649 LJFOLD(BUFSTR any any)
LJFOLDF(bufstr_kfold_cse)650 LJFOLDF(bufstr_kfold_cse)
651 {
652 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
653 fleft->o == IR_CALLL,
654 "bad buffer constructor IR op %d", fleft->o);
655 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
656 if (fleft->o == IR_BUFHDR) { /* No put operations? */
657 if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
658 return lj_ir_kstr(J, &J2G(J)->strempty);
659 fins->op1 = fleft->op1;
660 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
661 return CSEFOLD;
662 } else if (fleft->o == IR_BUFPUT) {
663 IRIns *irb = IR(fleft->op1);
664 if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
665 return fleft->op2; /* Shortcut for a single put operation. */
666 }
667 }
668 /* Try to CSE the whole chain. */
669 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
670 IRRef ref = J->chain[IR_BUFSTR];
671 while (ref) {
672 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
673 while (ira->o == irb->o && ira->op2 == irb->op2) {
674 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
675 ira->o == IR_CALLL || ira->o == IR_CARG,
676 "bad buffer constructor IR op %d", ira->o);
677 if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
678 return ref; /* CSE succeeded. */
679 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
680 break;
681 ira = IR(ira->op1);
682 irb = IR(irb->op1);
683 }
684 ref = irs->prev;
685 }
686 }
687 return EMITFOLD; /* No CSE possible. */
688 }
689
690 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)691 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
692 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
693 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
694 LJFOLDF(bufput_kfold_op)
695 {
696 if (irref_isk(fleft->op2)) {
697 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
698 SBuf *sb = lj_buf_tmp_(J->L);
699 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
700 ir_kstr(IR(fleft->op2)));
701 fins->o = IR_BUFPUT;
702 fins->op1 = fleft->op1;
703 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
704 return RETRYFOLD;
705 }
706 return EMITFOLD; /* Always emit, CSE later. */
707 }
708
709 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
LJFOLDF(bufput_kfold_rep)710 LJFOLDF(bufput_kfold_rep)
711 {
712 if (irref_isk(fleft->op2)) {
713 IRIns *irc = IR(fleft->op1);
714 if (irref_isk(irc->op2)) {
715 SBuf *sb = lj_buf_tmp_(J->L);
716 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
717 fins->o = IR_BUFPUT;
718 fins->op1 = irc->op1;
719 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
720 return RETRYFOLD;
721 }
722 }
723 return EMITFOLD; /* Always emit, CSE later. */
724 }
725
726 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)727 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
728 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
729 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
730 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
731 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
732 LJFOLDF(bufput_kfold_fmt)
733 {
734 IRIns *irc = IR(fleft->op1);
735 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
736 if (irref_isk(fleft->op2)) {
737 SFormat sf = (SFormat)IR(irc->op2)->i;
738 IRIns *ira = IR(fleft->op2);
739 SBuf *sb = lj_buf_tmp_(J->L);
740 switch (fins->op2) {
741 case IRCALL_lj_strfmt_putfxint:
742 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
743 break;
744 case IRCALL_lj_strfmt_putfstr:
745 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
746 break;
747 case IRCALL_lj_strfmt_putfchar:
748 sb = lj_strfmt_putfchar(sb, sf, ira->i);
749 break;
750 case IRCALL_lj_strfmt_putfnum_int:
751 case IRCALL_lj_strfmt_putfnum_uint:
752 case IRCALL_lj_strfmt_putfnum:
753 default: {
754 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
755 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
756 ir_knum(ira)->n);
757 break;
758 }
759 }
760 fins->o = IR_BUFPUT;
761 fins->op1 = irc->op1;
762 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
763 return RETRYFOLD;
764 }
765 return EMITFOLD; /* Always emit, CSE later. */
766 }
767
768 /* -- Constant folding of pointer arithmetic ------------------------------ */
769
770 LJFOLD(ADD KGC KINT)
LJFOLD(ADD KGC KINT64)771 LJFOLD(ADD KGC KINT64)
772 LJFOLDF(kfold_add_kgc)
773 {
774 GCobj *o = ir_kgc(fleft);
775 #if LJ_64
776 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
777 #else
778 ptrdiff_t ofs = fright->i;
779 #endif
780 #if LJ_HASFFI
781 if (irt_iscdata(fleft->t)) {
782 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
783 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
784 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
785 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
786 return lj_ir_kkptr(J, (char *)o + ofs);
787 }
788 #endif
789 return lj_ir_kptr(J, (char *)o + ofs);
790 }
791
792 LJFOLD(ADD KPTR KINT)
LJFOLD(ADD KPTR KINT64)793 LJFOLD(ADD KPTR KINT64)
794 LJFOLD(ADD KKPTR KINT)
795 LJFOLD(ADD KKPTR KINT64)
796 LJFOLDF(kfold_add_kptr)
797 {
798 void *p = ir_kptr(fleft);
799 #if LJ_64
800 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
801 #else
802 ptrdiff_t ofs = fright->i;
803 #endif
804 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
805 }
806
807 LJFOLD(ADD any KGC)
LJFOLD(ADD any KPTR)808 LJFOLD(ADD any KPTR)
809 LJFOLD(ADD any KKPTR)
810 LJFOLDF(kfold_add_kright)
811 {
812 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
813 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
814 return RETRYFOLD;
815 }
816 return NEXTFOLD;
817 }
818
819 /* -- Constant folding of conversions ------------------------------------- */
820
821 LJFOLD(TOBIT KNUM KNUM)
LJFOLDF(kfold_tobit)822 LJFOLDF(kfold_tobit)
823 {
824 return INTFOLD(lj_num2bit(knumleft));
825 }
826
827 LJFOLD(CONV KINT IRCONV_NUM_INT)
LJFOLDF(kfold_conv_kint_num)828 LJFOLDF(kfold_conv_kint_num)
829 {
830 return lj_ir_knum(J, (lua_Number)fleft->i);
831 }
832
833 LJFOLD(CONV KINT IRCONV_NUM_U32)
LJFOLDF(kfold_conv_kintu32_num)834 LJFOLDF(kfold_conv_kintu32_num)
835 {
836 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
837 }
838
839 LJFOLD(CONV KINT IRCONV_INT_I8)
LJFOLD(CONV KINT IRCONV_INT_U8)840 LJFOLD(CONV KINT IRCONV_INT_U8)
841 LJFOLD(CONV KINT IRCONV_INT_I16)
842 LJFOLD(CONV KINT IRCONV_INT_U16)
843 LJFOLDF(kfold_conv_kint_ext)
844 {
845 int32_t k = fleft->i;
846 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
847 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
848 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
849 else k = (uint16_t)k;
850 return INTFOLD(k);
851 }
852
853 LJFOLD(CONV KINT IRCONV_I64_INT)
LJFOLD(CONV KINT IRCONV_U64_INT)854 LJFOLD(CONV KINT IRCONV_U64_INT)
855 LJFOLD(CONV KINT IRCONV_I64_U32)
856 LJFOLD(CONV KINT IRCONV_U64_U32)
857 LJFOLDF(kfold_conv_kint_i64)
858 {
859 if ((fins->op2 & IRCONV_SEXT))
860 return INT64FOLD((uint64_t)(int64_t)fleft->i);
861 else
862 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
863 }
864
865 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
LJFOLDF(kfold_conv_kint64_num_i64)866 LJFOLDF(kfold_conv_kint64_num_i64)
867 {
868 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
869 }
870
871 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
LJFOLDF(kfold_conv_kint64_num_u64)872 LJFOLDF(kfold_conv_kint64_num_u64)
873 {
874 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
875 }
876
877 LJFOLD(CONV KINT64 IRCONV_INT_I64)
LJFOLD(CONV KINT64 IRCONV_U32_I64)878 LJFOLD(CONV KINT64 IRCONV_U32_I64)
879 LJFOLDF(kfold_conv_kint64_int_i64)
880 {
881 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
882 }
883
884 LJFOLD(CONV KNUM IRCONV_INT_NUM)
LJFOLDF(kfold_conv_knum_int_num)885 LJFOLDF(kfold_conv_knum_int_num)
886 {
887 lua_Number n = knumleft;
888 int32_t k = lj_num2int(n);
889 if (irt_isguard(fins->t) && n != (lua_Number)k) {
890 /* We're about to create a guard which always fails, like CONV +1.5.
891 ** Some pathological loops cause this during LICM, e.g.:
892 ** local x,k,t = 0,1.5,{1,[1.5]=2}
893 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
894 ** assert(x == 300)
895 */
896 return FAILFOLD;
897 }
898 return INTFOLD(k);
899 }
900
901 LJFOLD(CONV KNUM IRCONV_U32_NUM)
LJFOLDF(kfold_conv_knum_u32_num)902 LJFOLDF(kfold_conv_knum_u32_num)
903 {
904 #ifdef _MSC_VER
905 { /* Workaround for MSVC bug. */
906 volatile uint32_t u = (uint32_t)knumleft;
907 return INTFOLD((int32_t)u);
908 }
909 #else
910 return INTFOLD((int32_t)(uint32_t)knumleft);
911 #endif
912 }
913
914 LJFOLD(CONV KNUM IRCONV_I64_NUM)
LJFOLDF(kfold_conv_knum_i64_num)915 LJFOLDF(kfold_conv_knum_i64_num)
916 {
917 return INT64FOLD((uint64_t)(int64_t)knumleft);
918 }
919
920 LJFOLD(CONV KNUM IRCONV_U64_NUM)
LJFOLDF(kfold_conv_knum_u64_num)921 LJFOLDF(kfold_conv_knum_u64_num)
922 {
923 return INT64FOLD(lj_num2u64(knumleft));
924 }
925
926 LJFOLD(TOSTR KNUM any)
LJFOLDF(kfold_tostr_knum)927 LJFOLDF(kfold_tostr_knum)
928 {
929 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
930 }
931
932 LJFOLD(TOSTR KINT any)
LJFOLDF(kfold_tostr_kint)933 LJFOLDF(kfold_tostr_kint)
934 {
935 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
936 lj_strfmt_int(J->L, fleft->i) :
937 lj_strfmt_char(J->L, fleft->i));
938 }
939
940 LJFOLD(STRTO KGC)
LJFOLDF(kfold_strto)941 LJFOLDF(kfold_strto)
942 {
943 TValue n;
944 if (lj_strscan_num(ir_kstr(fleft), &n))
945 return lj_ir_knum(J, numV(&n));
946 return FAILFOLD;
947 }
948
949 /* -- Constant folding of equality checks --------------------------------- */
950
951 /* Don't constant-fold away FLOAD checks against KNULL. */
952 LJFOLD(EQ FLOAD KNULL)
LJFOLD(NE FLOAD KNULL)953 LJFOLD(NE FLOAD KNULL)
954 LJFOLDX(lj_opt_cse)
955
956 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
957 LJFOLD(EQ any KNULL)
958 LJFOLD(NE any KNULL)
959 LJFOLD(EQ KNULL any)
960 LJFOLD(NE KNULL any)
961 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
962 LJFOLD(NE KINT KINT)
963 LJFOLD(EQ KINT64 KINT64)
964 LJFOLD(NE KINT64 KINT64)
965 LJFOLD(EQ KGC KGC)
966 LJFOLD(NE KGC KGC)
967 LJFOLDF(kfold_kref)
968 {
969 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
970 }
971
972 /* -- Algebraic shortcuts ------------------------------------------------- */
973
974 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
LJFOLD(FPMATH FPMATH IRFPM_CEIL)975 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
976 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
977 LJFOLDF(shortcut_round)
978 {
979 IRFPMathOp op = (IRFPMathOp)fleft->op2;
980 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
981 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
982 return NEXTFOLD;
983 }
984
985 LJFOLD(ABS ABS FLOAD)
LJFOLDF(shortcut_left)986 LJFOLDF(shortcut_left)
987 {
988 return LEFTFOLD; /* f(g(x)) ==> g(x) */
989 }
990
991 LJFOLD(ABS NEG FLOAD)
LJFOLDF(shortcut_dropleft)992 LJFOLDF(shortcut_dropleft)
993 {
994 PHIBARRIER(fleft);
995 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
996 return RETRYFOLD;
997 }
998
999 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
1000 LJFOLD(NEG NEG any)
LJFOLD(BNOT BNOT)1001 LJFOLD(BNOT BNOT)
1002 LJFOLD(BSWAP BSWAP)
1003 LJFOLDF(shortcut_leftleft)
1004 {
1005 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
1006 return fleft->op1; /* f(g(x)) ==> x */
1007 }
1008
1009 /* -- FP algebraic simplifications ---------------------------------------- */
1010
1011 /* FP arithmetic is tricky -- there's not much to simplify.
1012 ** Please note the following common pitfalls before sending "improvements":
1013 ** x+0 ==> x is INVALID for x=-0
1014 ** 0-x ==> -x is INVALID for x=+0
1015 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
1016 */
1017
1018 LJFOLD(ADD NEG any)
LJFOLDF(simplify_numadd_negx)1019 LJFOLDF(simplify_numadd_negx)
1020 {
1021 PHIBARRIER(fleft);
1022 fins->o = IR_SUB; /* (-a) + b ==> b - a */
1023 fins->op1 = fins->op2;
1024 fins->op2 = fleft->op1;
1025 return RETRYFOLD;
1026 }
1027
1028 LJFOLD(ADD any NEG)
LJFOLDF(simplify_numadd_xneg)1029 LJFOLDF(simplify_numadd_xneg)
1030 {
1031 PHIBARRIER(fright);
1032 fins->o = IR_SUB; /* a + (-b) ==> a - b */
1033 fins->op2 = fright->op1;
1034 return RETRYFOLD;
1035 }
1036
1037 LJFOLD(SUB any KNUM)
LJFOLDF(simplify_numsub_k)1038 LJFOLDF(simplify_numsub_k)
1039 {
1040 if (ir_knum(fright)->u64 == 0) /* x - (+0) ==> x */
1041 return LEFTFOLD;
1042 return NEXTFOLD;
1043 }
1044
1045 LJFOLD(SUB NEG KNUM)
LJFOLDF(simplify_numsub_negk)1046 LJFOLDF(simplify_numsub_negk)
1047 {
1048 PHIBARRIER(fleft);
1049 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
1050 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
1051 return RETRYFOLD;
1052 }
1053
1054 LJFOLD(SUB any NEG)
LJFOLDF(simplify_numsub_xneg)1055 LJFOLDF(simplify_numsub_xneg)
1056 {
1057 PHIBARRIER(fright);
1058 fins->o = IR_ADD; /* a - (-b) ==> a + b */
1059 fins->op2 = fright->op1;
1060 return RETRYFOLD;
1061 }
1062
1063 LJFOLD(MUL any KNUM)
LJFOLD(DIV any KNUM)1064 LJFOLD(DIV any KNUM)
1065 LJFOLDF(simplify_nummuldiv_k)
1066 {
1067 lua_Number n = knumright;
1068 if (n == 1.0) { /* x o 1 ==> x */
1069 return LEFTFOLD;
1070 } else if (n == -1.0) { /* x o -1 ==> -x */
1071 IRRef op1 = fins->op1;
1072 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1073 fins->op1 = op1;
1074 fins->o = IR_NEG;
1075 return RETRYFOLD;
1076 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
1077 fins->o = IR_ADD;
1078 fins->op2 = fins->op1;
1079 return RETRYFOLD;
1080 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
1081 uint64_t u = ir_knum(fright)->u64;
1082 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
1083 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
1084 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
1085 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
1086 fins->op2 = lj_ir_knum_u64(J, u);
1087 return RETRYFOLD;
1088 }
1089 }
1090 return NEXTFOLD;
1091 }
1092
1093 LJFOLD(MUL NEG KNUM)
LJFOLD(DIV NEG KNUM)1094 LJFOLD(DIV NEG KNUM)
1095 LJFOLDF(simplify_nummuldiv_negk)
1096 {
1097 PHIBARRIER(fleft);
1098 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
1099 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
1100 return RETRYFOLD;
1101 }
1102
1103 LJFOLD(MUL NEG NEG)
LJFOLD(DIV NEG NEG)1104 LJFOLD(DIV NEG NEG)
1105 LJFOLDF(simplify_nummuldiv_negneg)
1106 {
1107 PHIBARRIER(fleft);
1108 PHIBARRIER(fright);
1109 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
1110 fins->op2 = fright->op1;
1111 return RETRYFOLD;
1112 }
1113
1114 LJFOLD(POW any KINT)
LJFOLDF(simplify_numpow_xkint)1115 LJFOLDF(simplify_numpow_xkint)
1116 {
1117 int32_t k = fright->i;
1118 TRef ref = fins->op1;
1119 if (k == 0) /* x ^ 0 ==> 1 */
1120 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
1121 if (k == 1) /* x ^ 1 ==> x */
1122 return LEFTFOLD;
1123 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
1124 return NEXTFOLD;
1125 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
1126 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
1127 k = -k;
1128 }
1129 /* Unroll x^k for 1 <= k <= 65536. */
1130 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
1131 ref = emitir(IRTN(IR_MUL), ref, ref);
1132 if ((k >>= 1) != 0) { /* Handle trailing bits. */
1133 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
1134 for (; k != 1; k >>= 1) {
1135 if (k & 1)
1136 ref = emitir(IRTN(IR_MUL), ref, tmp);
1137 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
1138 }
1139 ref = emitir(IRTN(IR_MUL), ref, tmp);
1140 }
1141 return ref;
1142 }
1143
1144 LJFOLD(POW any KNUM)
LJFOLDF(simplify_numpow_xknum)1145 LJFOLDF(simplify_numpow_xknum)
1146 {
1147 if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
1148 return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
1149 return NEXTFOLD;
1150 }
1151
1152 LJFOLD(POW KNUM any)
LJFOLDF(simplify_numpow_kx)1153 LJFOLDF(simplify_numpow_kx)
1154 {
1155 lua_Number n = knumleft;
1156 if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */
1157 #if LJ_TARGET_X86ORX64
1158 /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
1159 fins->o = IR_CONV;
1160 fins->op1 = fins->op2;
1161 fins->op2 = IRCONV_NUM_INT;
1162 fins->op2 = (IRRef1)lj_opt_fold(J);
1163 #endif
1164 fins->op1 = (IRRef1)lj_ir_knum_one(J);
1165 fins->o = IR_LDEXP;
1166 return RETRYFOLD;
1167 }
1168 return NEXTFOLD;
1169 }
1170
1171 /* -- Simplify conversions ------------------------------------------------ */
1172
1173 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
LJFOLDF(shortcut_conv_num_int)1174 LJFOLDF(shortcut_conv_num_int)
1175 {
1176 PHIBARRIER(fleft);
1177 /* Only safe with a guarded conversion to int. */
1178 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
1179 return fleft->op1; /* f(g(x)) ==> x */
1180 return NEXTFOLD;
1181 }
1182
1183 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
LJFOLD(CONV CONV IRCONV_U32_NUM)1184 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
1185 LJFOLDF(simplify_conv_int_num)
1186 {
1187 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1188 if ((fleft->op2 & IRCONV_SRCMASK) ==
1189 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
1190 return fleft->op1;
1191 return NEXTFOLD;
1192 }
1193
1194 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
LJFOLD(CONV CONV IRCONV_U64_NUM)1195 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
1196 LJFOLDF(simplify_conv_i64_num)
1197 {
1198 PHIBARRIER(fleft);
1199 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1200 /* Reduce to a sign-extension. */
1201 fins->op1 = fleft->op1;
1202 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
1203 return RETRYFOLD;
1204 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1205 #if LJ_TARGET_X64
1206 return fleft->op1;
1207 #else
1208 /* Reduce to a zero-extension. */
1209 fins->op1 = fleft->op1;
1210 fins->op2 = (IRT_I64<<5)|IRT_U32;
1211 return RETRYFOLD;
1212 #endif
1213 }
1214 return NEXTFOLD;
1215 }
1216
1217 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
LJFOLD(CONV CONV IRCONV_INT_U64)1218 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
1219 LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
1220 LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
1221 LJFOLDF(simplify_conv_int_i64)
1222 {
1223 int src;
1224 PHIBARRIER(fleft);
1225 src = (fleft->op2 & IRCONV_SRCMASK);
1226 if (src == IRT_INT || src == IRT_U32) {
1227 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
1228 return fleft->op1;
1229 } else {
1230 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
1231 fins->op1 = fleft->op1;
1232 return RETRYFOLD;
1233 }
1234 }
1235 return NEXTFOLD;
1236 }
1237
1238 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
LJFOLDF(simplify_conv_flt_num)1239 LJFOLDF(simplify_conv_flt_num)
1240 {
1241 PHIBARRIER(fleft);
1242 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1243 return fleft->op1;
1244 return NEXTFOLD;
1245 }
1246
1247 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1248 LJFOLD(TOBIT CONV KNUM)
LJFOLDF(simplify_tobit_conv)1249 LJFOLDF(simplify_tobit_conv)
1250 {
1251 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1252 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1253 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1254 return fleft->op1;
1255 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1256 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1257 fins->o = IR_CONV;
1258 fins->op1 = fleft->op1;
1259 fins->op2 = (IRT_INT<<5)|IRT_U32;
1260 return RETRYFOLD;
1261 }
1262 return NEXTFOLD;
1263 }
1264
1265 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1266 LJFOLD(FPMATH CONV IRFPM_FLOOR)
LJFOLD(FPMATH CONV IRFPM_CEIL)1267 LJFOLD(FPMATH CONV IRFPM_CEIL)
1268 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1269 LJFOLDF(simplify_floor_conv)
1270 {
1271 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1272 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1273 return LEFTFOLD;
1274 return NEXTFOLD;
1275 }
1276
1277 /* Strength reduction of widening. */
1278 LJFOLD(CONV any IRCONV_I64_INT)
LJFOLD(CONV any IRCONV_U64_INT)1279 LJFOLD(CONV any IRCONV_U64_INT)
1280 LJFOLDF(simplify_conv_sext)
1281 {
1282 IRRef ref = fins->op1;
1283 int64_t ofs = 0;
1284 if (!(fins->op2 & IRCONV_SEXT))
1285 return NEXTFOLD;
1286 PHIBARRIER(fleft);
1287 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1288 goto ok_reduce;
1289 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1290 ofs = (int64_t)IR(fleft->op2)->i;
1291 ref = fleft->op1;
1292 }
1293 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1294 if (ref == J->scev.idx) {
1295 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1296 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1297 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1298 ok_reduce:
1299 #if LJ_TARGET_X64
1300 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1301 return LEFTFOLD;
1302 #else
1303 /* Reduce to a (cheaper) zero-extension. */
1304 fins->op2 &= ~IRCONV_SEXT;
1305 return RETRYFOLD;
1306 #endif
1307 }
1308 }
1309 return NEXTFOLD;
1310 }
1311
1312 /* Strength reduction of narrowing. */
1313 LJFOLD(CONV ADD IRCONV_INT_I64)
LJFOLD(CONV SUB IRCONV_INT_I64)1314 LJFOLD(CONV SUB IRCONV_INT_I64)
1315 LJFOLD(CONV MUL IRCONV_INT_I64)
1316 LJFOLD(CONV ADD IRCONV_INT_U64)
1317 LJFOLD(CONV SUB IRCONV_INT_U64)
1318 LJFOLD(CONV MUL IRCONV_INT_U64)
1319 LJFOLD(CONV ADD IRCONV_U32_I64)
1320 LJFOLD(CONV SUB IRCONV_U32_I64)
1321 LJFOLD(CONV MUL IRCONV_U32_I64)
1322 LJFOLD(CONV ADD IRCONV_U32_U64)
1323 LJFOLD(CONV SUB IRCONV_U32_U64)
1324 LJFOLD(CONV MUL IRCONV_U32_U64)
1325 LJFOLDF(simplify_conv_narrow)
1326 {
1327 #if LJ_64
1328 UNUSED(J);
1329 return NEXTFOLD;
1330 #else
1331 IROp op = (IROp)fleft->o;
1332 IRType t = irt_type(fins->t);
1333 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1334 PHIBARRIER(fleft);
1335 op1 = emitir(IRT(IR_CONV, t), op1, mode);
1336 op2 = emitir(IRT(IR_CONV, t), op2, mode);
1337 fins->ot = IRT(op, t);
1338 fins->op1 = op1;
1339 fins->op2 = op2;
1340 return RETRYFOLD;
1341 #endif
1342 }
1343
1344 /* Special CSE rule for CONV. */
1345 LJFOLD(CONV any any)
LJFOLDF(cse_conv)1346 LJFOLDF(cse_conv)
1347 {
1348 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1349 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1350 uint8_t guard = irt_isguard(fins->t);
1351 IRRef ref = J->chain[IR_CONV];
1352 while (ref > op1) {
1353 IRIns *ir = IR(ref);
1354 /* Commoning with stronger checks is ok. */
1355 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1356 irt_isguard(ir->t) >= guard)
1357 return ref;
1358 ref = ir->prev;
1359 }
1360 }
1361 return EMITFOLD; /* No fallthrough to regular CSE. */
1362 }
1363
1364 /* FP conversion narrowing. */
1365 LJFOLD(TOBIT ADD KNUM)
LJFOLD(TOBIT SUB KNUM)1366 LJFOLD(TOBIT SUB KNUM)
1367 LJFOLD(CONV ADD IRCONV_INT_NUM)
1368 LJFOLD(CONV SUB IRCONV_INT_NUM)
1369 LJFOLD(CONV ADD IRCONV_I64_NUM)
1370 LJFOLD(CONV SUB IRCONV_I64_NUM)
1371 LJFOLDF(narrow_convert)
1372 {
1373 PHIBARRIER(fleft);
1374 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1375 if (J->chain[IR_LOOP])
1376 return NEXTFOLD;
1377 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1378 "unexpected CONV TOBIT");
1379 return lj_opt_narrow_convert(J);
1380 }
1381
1382 /* -- Integer algebraic simplifications ----------------------------------- */
1383
1384 LJFOLD(ADD any KINT)
LJFOLD(ADDOV any KINT)1385 LJFOLD(ADDOV any KINT)
1386 LJFOLD(SUBOV any KINT)
1387 LJFOLDF(simplify_intadd_k)
1388 {
1389 if (fright->i == 0) /* i o 0 ==> i */
1390 return LEFTFOLD;
1391 return NEXTFOLD;
1392 }
1393
1394 LJFOLD(MULOV any KINT)
LJFOLDF(simplify_intmul_k)1395 LJFOLDF(simplify_intmul_k)
1396 {
1397 if (fright->i == 0) /* i * 0 ==> 0 */
1398 return RIGHTFOLD;
1399 if (fright->i == 1) /* i * 1 ==> i */
1400 return LEFTFOLD;
1401 if (fright->i == 2) { /* i * 2 ==> i + i */
1402 fins->o = IR_ADDOV;
1403 fins->op2 = fins->op1;
1404 return RETRYFOLD;
1405 }
1406 return NEXTFOLD;
1407 }
1408
1409 LJFOLD(SUB any KINT)
LJFOLDF(simplify_intsub_k)1410 LJFOLDF(simplify_intsub_k)
1411 {
1412 if (fright->i == 0) /* i - 0 ==> i */
1413 return LEFTFOLD;
1414 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1415 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1416 return RETRYFOLD;
1417 }
1418
1419 LJFOLD(SUB KINT any)
LJFOLD(SUB KINT64 any)1420 LJFOLD(SUB KINT64 any)
1421 LJFOLDF(simplify_intsub_kleft)
1422 {
1423 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1424 fins->o = IR_NEG; /* 0 - i ==> -i */
1425 fins->op1 = fins->op2;
1426 return RETRYFOLD;
1427 }
1428 return NEXTFOLD;
1429 }
1430
1431 LJFOLD(ADD any KINT64)
LJFOLDF(simplify_intadd_k64)1432 LJFOLDF(simplify_intadd_k64)
1433 {
1434 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1435 return LEFTFOLD;
1436 return NEXTFOLD;
1437 }
1438
1439 LJFOLD(SUB any KINT64)
LJFOLDF(simplify_intsub_k64)1440 LJFOLDF(simplify_intsub_k64)
1441 {
1442 uint64_t k = ir_kint64(fright)->u64;
1443 if (k == 0) /* i - 0 ==> i */
1444 return LEFTFOLD;
1445 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1446 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1447 return RETRYFOLD;
1448 }
1449
simplify_intmul_k(jit_State * J,int32_t k)1450 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1451 {
1452 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1453 ** But this is mainly intended for simple address arithmetic.
1454 ** Also it's easier for the backend to optimize the original multiplies.
1455 */
1456 if (k == 0) { /* i * 0 ==> 0 */
1457 return RIGHTFOLD;
1458 } else if (k == 1) { /* i * 1 ==> i */
1459 return LEFTFOLD;
1460 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1461 fins->o = IR_BSHL;
1462 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1463 return RETRYFOLD;
1464 }
1465 return NEXTFOLD;
1466 }
1467
1468 LJFOLD(MUL any KINT)
LJFOLDF(simplify_intmul_k32)1469 LJFOLDF(simplify_intmul_k32)
1470 {
1471 if (fright->i >= 0)
1472 return simplify_intmul_k(J, fright->i);
1473 return NEXTFOLD;
1474 }
1475
1476 LJFOLD(MUL any KINT64)
LJFOLDF(simplify_intmul_k64)1477 LJFOLDF(simplify_intmul_k64)
1478 {
1479 #if LJ_HASFFI
1480 if (ir_kint64(fright)->u64 < 0x80000000u)
1481 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1482 return NEXTFOLD;
1483 #else
1484 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1485 #endif
1486 }
1487
1488 LJFOLD(MOD any KINT)
LJFOLDF(simplify_intmod_k)1489 LJFOLDF(simplify_intmod_k)
1490 {
1491 int32_t k = fright->i;
1492 lj_assertJ(k != 0, "integer mod 0");
1493 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1494 fins->o = IR_BAND;
1495 fins->op2 = lj_ir_kint(J, k-1);
1496 return RETRYFOLD;
1497 }
1498 return NEXTFOLD;
1499 }
1500
1501 LJFOLD(MOD KINT any)
LJFOLDF(simplify_intmod_kleft)1502 LJFOLDF(simplify_intmod_kleft)
1503 {
1504 if (fleft->i == 0)
1505 return INTFOLD(0);
1506 return NEXTFOLD;
1507 }
1508
1509 LJFOLD(SUB any any)
LJFOLD(SUBOV any any)1510 LJFOLD(SUBOV any any)
1511 LJFOLDF(simplify_intsub)
1512 {
1513 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1514 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1515 return NEXTFOLD;
1516 }
1517
1518 LJFOLD(SUB ADD any)
LJFOLDF(simplify_intsubadd_leftcancel)1519 LJFOLDF(simplify_intsubadd_leftcancel)
1520 {
1521 if (!irt_isnum(fins->t)) {
1522 PHIBARRIER(fleft);
1523 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1524 return fleft->op2;
1525 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1526 return fleft->op1;
1527 }
1528 return NEXTFOLD;
1529 }
1530
1531 LJFOLD(SUB SUB any)
LJFOLDF(simplify_intsubsub_leftcancel)1532 LJFOLDF(simplify_intsubsub_leftcancel)
1533 {
1534 if (!irt_isnum(fins->t)) {
1535 PHIBARRIER(fleft);
1536 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1537 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1538 fins->op2 = fleft->op2;
1539 return RETRYFOLD;
1540 }
1541 }
1542 return NEXTFOLD;
1543 }
1544
1545 LJFOLD(SUB any SUB)
LJFOLDF(simplify_intsubsub_rightcancel)1546 LJFOLDF(simplify_intsubsub_rightcancel)
1547 {
1548 if (!irt_isnum(fins->t)) {
1549 PHIBARRIER(fright);
1550 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1551 return fright->op2;
1552 }
1553 return NEXTFOLD;
1554 }
1555
1556 LJFOLD(SUB any ADD)
LJFOLDF(simplify_intsubadd_rightcancel)1557 LJFOLDF(simplify_intsubadd_rightcancel)
1558 {
1559 if (!irt_isnum(fins->t)) {
1560 PHIBARRIER(fright);
1561 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1562 fins->op2 = fright->op2;
1563 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1564 return RETRYFOLD;
1565 }
1566 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1567 fins->op2 = fright->op1;
1568 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1569 return RETRYFOLD;
1570 }
1571 }
1572 return NEXTFOLD;
1573 }
1574
1575 LJFOLD(SUB ADD ADD)
LJFOLDF(simplify_intsubaddadd_cancel)1576 LJFOLDF(simplify_intsubaddadd_cancel)
1577 {
1578 if (!irt_isnum(fins->t)) {
1579 PHIBARRIER(fleft);
1580 PHIBARRIER(fright);
1581 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1582 fins->op1 = fleft->op2;
1583 fins->op2 = fright->op2;
1584 return RETRYFOLD;
1585 }
1586 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1587 fins->op1 = fleft->op2;
1588 fins->op2 = fright->op1;
1589 return RETRYFOLD;
1590 }
1591 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1592 fins->op1 = fleft->op1;
1593 fins->op2 = fright->op2;
1594 return RETRYFOLD;
1595 }
1596 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1597 fins->op1 = fleft->op1;
1598 fins->op2 = fright->op1;
1599 return RETRYFOLD;
1600 }
1601 }
1602 return NEXTFOLD;
1603 }
1604
1605 LJFOLD(BAND any KINT)
LJFOLD(BAND any KINT64)1606 LJFOLD(BAND any KINT64)
1607 LJFOLDF(simplify_band_k)
1608 {
1609 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1610 (int64_t)ir_k64(fright)->u64;
1611 if (k == 0) /* i & 0 ==> 0 */
1612 return RIGHTFOLD;
1613 if (k == -1) /* i & -1 ==> i */
1614 return LEFTFOLD;
1615 return NEXTFOLD;
1616 }
1617
1618 LJFOLD(BOR any KINT)
LJFOLD(BOR any KINT64)1619 LJFOLD(BOR any KINT64)
1620 LJFOLDF(simplify_bor_k)
1621 {
1622 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1623 (int64_t)ir_k64(fright)->u64;
1624 if (k == 0) /* i | 0 ==> i */
1625 return LEFTFOLD;
1626 if (k == -1) /* i | -1 ==> -1 */
1627 return RIGHTFOLD;
1628 return NEXTFOLD;
1629 }
1630
1631 LJFOLD(BXOR any KINT)
LJFOLD(BXOR any KINT64)1632 LJFOLD(BXOR any KINT64)
1633 LJFOLDF(simplify_bxor_k)
1634 {
1635 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1636 (int64_t)ir_k64(fright)->u64;
1637 if (k == 0) /* i xor 0 ==> i */
1638 return LEFTFOLD;
1639 if (k == -1) { /* i xor -1 ==> ~i */
1640 fins->o = IR_BNOT;
1641 fins->op2 = 0;
1642 return RETRYFOLD;
1643 }
1644 return NEXTFOLD;
1645 }
1646
1647 LJFOLD(BSHL any KINT)
LJFOLD(BSHR any KINT)1648 LJFOLD(BSHR any KINT)
1649 LJFOLD(BSAR any KINT)
1650 LJFOLD(BROL any KINT)
1651 LJFOLD(BROR any KINT)
1652 LJFOLDF(simplify_shift_ik)
1653 {
1654 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1655 int32_t k = (fright->i & mask);
1656 if (k == 0) /* i o 0 ==> i */
1657 return LEFTFOLD;
1658 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1659 fins->o = IR_ADD;
1660 fins->op2 = fins->op1;
1661 return RETRYFOLD;
1662 }
1663 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1664 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1665 return RETRYFOLD;
1666 }
1667 #ifndef LJ_TARGET_UNIFYROT
1668 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1669 fins->o = IR_BROL;
1670 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1671 return RETRYFOLD;
1672 }
1673 #endif
1674 return NEXTFOLD;
1675 }
1676
1677 LJFOLD(BSHL any BAND)
LJFOLD(BSHR any BAND)1678 LJFOLD(BSHR any BAND)
1679 LJFOLD(BSAR any BAND)
1680 LJFOLD(BROL any BAND)
1681 LJFOLD(BROR any BAND)
1682 LJFOLDF(simplify_shift_andk)
1683 {
1684 IRIns *irk = IR(fright->op2);
1685 PHIBARRIER(fright);
1686 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1687 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1688 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1689 int32_t k = irk->i & mask;
1690 if (k == mask) {
1691 fins->op2 = fright->op1;
1692 return RETRYFOLD;
1693 }
1694 }
1695 return NEXTFOLD;
1696 }
1697
1698 LJFOLD(BSHL KINT any)
LJFOLD(BSHR KINT any)1699 LJFOLD(BSHR KINT any)
1700 LJFOLD(BSHL KINT64 any)
1701 LJFOLD(BSHR KINT64 any)
1702 LJFOLDF(simplify_shift1_ki)
1703 {
1704 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1705 (int64_t)ir_k64(fleft)->u64;
1706 if (k == 0) /* 0 o i ==> 0 */
1707 return LEFTFOLD;
1708 return NEXTFOLD;
1709 }
1710
1711 LJFOLD(BSAR KINT any)
LJFOLD(BROL KINT any)1712 LJFOLD(BROL KINT any)
1713 LJFOLD(BROR KINT any)
1714 LJFOLD(BSAR KINT64 any)
1715 LJFOLD(BROL KINT64 any)
1716 LJFOLD(BROR KINT64 any)
1717 LJFOLDF(simplify_shift2_ki)
1718 {
1719 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1720 (int64_t)ir_k64(fleft)->u64;
1721 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1722 return LEFTFOLD;
1723 return NEXTFOLD;
1724 }
1725
1726 LJFOLD(BSHL BAND KINT)
LJFOLD(BSHR BAND KINT)1727 LJFOLD(BSHR BAND KINT)
1728 LJFOLD(BROL BAND KINT)
1729 LJFOLD(BROR BAND KINT)
1730 LJFOLDF(simplify_shiftk_andk)
1731 {
1732 IRIns *irk = IR(fleft->op2);
1733 PHIBARRIER(fleft);
1734 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1735 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1736 fins->op1 = fleft->op1;
1737 fins->op1 = (IRRef1)lj_opt_fold(J);
1738 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1739 fins->ot = IRTI(IR_BAND);
1740 return RETRYFOLD;
1741 } else if (irk->o == IR_KINT64) {
1742 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1743 (IROp)fins->o);
1744 IROpT ot = fleft->ot;
1745 fins->op1 = fleft->op1;
1746 fins->op1 = (IRRef1)lj_opt_fold(J);
1747 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1748 fins->ot = ot;
1749 return RETRYFOLD;
1750 }
1751 return NEXTFOLD;
1752 }
1753
1754 LJFOLD(BAND BSHL KINT)
LJFOLD(BAND BSHR KINT)1755 LJFOLD(BAND BSHR KINT)
1756 LJFOLDF(simplify_andk_shiftk)
1757 {
1758 IRIns *irk = IR(fleft->op2);
1759 if (irk->o == IR_KINT &&
1760 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1761 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1762 return NEXTFOLD;
1763 }
1764
1765 LJFOLD(BAND BOR KINT)
LJFOLD(BOR BAND KINT)1766 LJFOLD(BOR BAND KINT)
1767 LJFOLDF(simplify_andor_k)
1768 {
1769 IRIns *irk = IR(fleft->op2);
1770 PHIBARRIER(fleft);
1771 if (irk->o == IR_KINT) {
1772 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1773 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1774 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1775 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1776 fins->op1 = fleft->op1;
1777 return RETRYFOLD;
1778 }
1779 }
1780 return NEXTFOLD;
1781 }
1782
1783 LJFOLD(BAND BOR KINT64)
LJFOLD(BOR BAND KINT64)1784 LJFOLD(BOR BAND KINT64)
1785 LJFOLDF(simplify_andor_k64)
1786 {
1787 #if LJ_HASFFI
1788 IRIns *irk = IR(fleft->op2);
1789 PHIBARRIER(fleft);
1790 if (irk->o == IR_KINT64) {
1791 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1792 (IROp)fins->o);
1793 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1794 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1795 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1796 fins->op1 = fleft->op1;
1797 return RETRYFOLD;
1798 }
1799 }
1800 return NEXTFOLD;
1801 #else
1802 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1803 #endif
1804 }
1805
1806 /* -- Reassociation ------------------------------------------------------- */
1807
1808 LJFOLD(ADD ADD KINT)
LJFOLD(MUL MUL KINT)1809 LJFOLD(MUL MUL KINT)
1810 LJFOLD(BAND BAND KINT)
1811 LJFOLD(BOR BOR KINT)
1812 LJFOLD(BXOR BXOR KINT)
1813 LJFOLDF(reassoc_intarith_k)
1814 {
1815 IRIns *irk = IR(fleft->op2);
1816 if (irk->o == IR_KINT) {
1817 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1818 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1819 return LEFTFOLD;
1820 PHIBARRIER(fleft);
1821 fins->op1 = fleft->op1;
1822 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1823 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1824 }
1825 return NEXTFOLD;
1826 }
1827
1828 LJFOLD(ADD ADD KINT64)
LJFOLD(MUL MUL KINT64)1829 LJFOLD(MUL MUL KINT64)
1830 LJFOLD(BAND BAND KINT64)
1831 LJFOLD(BOR BOR KINT64)
1832 LJFOLD(BXOR BXOR KINT64)
1833 LJFOLDF(reassoc_intarith_k64)
1834 {
1835 #if LJ_HASFFI
1836 IRIns *irk = IR(fleft->op2);
1837 if (irk->o == IR_KINT64) {
1838 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1839 (IROp)fins->o);
1840 PHIBARRIER(fleft);
1841 fins->op1 = fleft->op1;
1842 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1843 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1844 }
1845 return NEXTFOLD;
1846 #else
1847 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1848 #endif
1849 }
1850
1851 LJFOLD(BAND BAND any)
LJFOLD(BOR BOR any)1852 LJFOLD(BOR BOR any)
1853 LJFOLDF(reassoc_dup)
1854 {
1855 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1856 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1857 return NEXTFOLD;
1858 }
1859
1860 LJFOLD(MIN MIN any)
LJFOLD(MAX MAX any)1861 LJFOLD(MAX MAX any)
1862 LJFOLDF(reassoc_dup_minmax)
1863 {
1864 if (fins->op2 == fleft->op2)
1865 return LEFTFOLD; /* (a o b) o b ==> a o b */
1866 return NEXTFOLD;
1867 }
1868
1869 LJFOLD(BXOR BXOR any)
LJFOLDF(reassoc_bxor)1870 LJFOLDF(reassoc_bxor)
1871 {
1872 PHIBARRIER(fleft);
1873 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1874 return fleft->op2;
1875 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1876 return fleft->op1;
1877 return NEXTFOLD;
1878 }
1879
1880 LJFOLD(BSHL BSHL KINT)
LJFOLD(BSHR BSHR KINT)1881 LJFOLD(BSHR BSHR KINT)
1882 LJFOLD(BSAR BSAR KINT)
1883 LJFOLD(BROL BROL KINT)
1884 LJFOLD(BROR BROR KINT)
1885 LJFOLDF(reassoc_shift)
1886 {
1887 IRIns *irk = IR(fleft->op2);
1888 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1889 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1890 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1891 int32_t k = (irk->i & mask) + (fright->i & mask);
1892 if (k > mask) { /* Combined shift too wide? */
1893 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1894 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1895 else if (fins->o == IR_BSAR)
1896 k = mask;
1897 else
1898 k &= mask;
1899 }
1900 fins->op1 = fleft->op1;
1901 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1902 return RETRYFOLD;
1903 }
1904 return NEXTFOLD;
1905 }
1906
1907 LJFOLD(MIN MIN KINT)
LJFOLD(MAX MAX KINT)1908 LJFOLD(MAX MAX KINT)
1909 LJFOLDF(reassoc_minmax_k)
1910 {
1911 IRIns *irk = IR(fleft->op2);
1912 if (irk->o == IR_KINT) {
1913 int32_t a = irk->i;
1914 int32_t y = kfold_intop(a, fright->i, fins->o);
1915 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1916 return LEFTFOLD;
1917 PHIBARRIER(fleft);
1918 fins->op1 = fleft->op1;
1919 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1920 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1921 }
1922 return NEXTFOLD;
1923 }
1924
1925 /* -- Array bounds check elimination -------------------------------------- */
1926
1927 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1928 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1929 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1930 */
1931 LJFOLD(ABC any ADD)
LJFOLDF(abc_fwd)1932 LJFOLDF(abc_fwd)
1933 {
1934 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1935 if (irref_isk(fright->op2)) {
1936 IRIns *add2 = IR(fright->op1);
1937 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1938 IR(fright->op2)->i == -IR(add2->op2)->i) {
1939 IRRef ref = J->chain[IR_ABC];
1940 IRRef lim = add2->op1;
1941 if (fins->op1 > lim) lim = fins->op1;
1942 while (ref > lim) {
1943 IRIns *ir = IR(ref);
1944 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1945 return DROPFOLD;
1946 ref = ir->prev;
1947 }
1948 }
1949 }
1950 }
1951 return NEXTFOLD;
1952 }
1953
1954 /* Eliminate ABC for constants.
1955 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1956 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1957 */
1958 LJFOLD(ABC any KINT)
LJFOLDF(abc_k)1959 LJFOLDF(abc_k)
1960 {
1961 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1962 IRRef ref = J->chain[IR_ABC];
1963 IRRef asize = fins->op1;
1964 while (ref > asize) {
1965 IRIns *ir = IR(ref);
1966 if (ir->op1 == asize && irref_isk(ir->op2)) {
1967 int32_t k = IR(ir->op2)->i;
1968 if (fright->i > k)
1969 ir->op2 = fins->op2;
1970 return DROPFOLD;
1971 }
1972 ref = ir->prev;
1973 }
1974 return EMITFOLD; /* Already performed CSE. */
1975 }
1976 return NEXTFOLD;
1977 }
1978
1979 /* Eliminate invariant ABC inside loop. */
1980 LJFOLD(ABC any any)
LJFOLDF(abc_invar)1981 LJFOLDF(abc_invar)
1982 {
1983 /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1984 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1985 !irt_isphi(IR(fins->op1)->t))
1986 return DROPFOLD;
1987 return NEXTFOLD;
1988 }
1989
1990 /* -- Commutativity ------------------------------------------------------- */
1991
1992 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1993 ** Rationale behind this:
1994 ** - It (also) moves constants to the right.
1995 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1996 ** - It helps CSE to find more matches.
1997 ** - The assembler generates better code with constants at the right.
1998 */
1999
2000 LJFOLD(ADD any any)
LJFOLD(MUL any any)2001 LJFOLD(MUL any any)
2002 LJFOLD(ADDOV any any)
2003 LJFOLD(MULOV any any)
2004 LJFOLDF(comm_swap)
2005 {
2006 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
2007 IRRef1 tmp = fins->op1;
2008 fins->op1 = fins->op2;
2009 fins->op2 = tmp;
2010 return RETRYFOLD;
2011 }
2012 return NEXTFOLD;
2013 }
2014
2015 LJFOLD(EQ any any)
LJFOLD(NE any any)2016 LJFOLD(NE any any)
2017 LJFOLDF(comm_equal)
2018 {
2019 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
2020 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
2021 return CONDFOLD(fins->o == IR_EQ);
2022 return fold_comm_swap(J);
2023 }
2024
2025 LJFOLD(LT any any)
LJFOLD(GE any any)2026 LJFOLD(GE any any)
2027 LJFOLD(LE any any)
2028 LJFOLD(GT any any)
2029 LJFOLD(ULT any any)
2030 LJFOLD(UGE any any)
2031 LJFOLD(ULE any any)
2032 LJFOLD(UGT any any)
2033 LJFOLDF(comm_comp)
2034 {
2035 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
2036 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
2037 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
2038 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
2039 IRRef1 tmp = fins->op1;
2040 fins->op1 = fins->op2;
2041 fins->op2 = tmp;
2042 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
2043 return RETRYFOLD;
2044 }
2045 return NEXTFOLD;
2046 }
2047
2048 LJFOLD(BAND any any)
LJFOLD(BOR any any)2049 LJFOLD(BOR any any)
2050 LJFOLDF(comm_dup)
2051 {
2052 if (fins->op1 == fins->op2) /* x o x ==> x */
2053 return LEFTFOLD;
2054 return fold_comm_swap(J);
2055 }
2056
2057 LJFOLD(MIN any any)
LJFOLD(MAX any any)2058 LJFOLD(MAX any any)
2059 LJFOLDF(comm_dup_minmax)
2060 {
2061 if (fins->op1 == fins->op2) /* x o x ==> x */
2062 return LEFTFOLD;
2063 return NEXTFOLD;
2064 }
2065
2066 LJFOLD(BXOR any any)
LJFOLDF(comm_bxor)2067 LJFOLDF(comm_bxor)
2068 {
2069 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
2070 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
2071 return fold_comm_swap(J);
2072 }
2073
2074 /* -- Simplification of compound expressions ------------------------------ */
2075
kfold_xload(jit_State * J,IRIns * ir,const void * p)2076 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
2077 {
2078 int32_t k;
2079 switch (irt_type(ir->t)) {
2080 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
2081 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
2082 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
2083 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
2084 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
2085 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
2086 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
2087 default: return 0;
2088 }
2089 return lj_ir_kint(J, k);
2090 }
2091
2092 /* Turn: string.sub(str, a, b) == kstr
2093 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
2094 ** Note: this creates unaligned XLOADs on x86/x64.
2095 */
2096 LJFOLD(EQ SNEW KGC)
LJFOLD(NE SNEW KGC)2097 LJFOLD(NE SNEW KGC)
2098 LJFOLDF(merge_eqne_snew_kgc)
2099 {
2100 GCstr *kstr = ir_kstr(fright);
2101 int32_t len = (int32_t)kstr->len;
2102 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
2103
2104 #if LJ_TARGET_UNALIGNED
2105 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
2106 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
2107 #else
2108 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
2109 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
2110 #endif
2111
2112 PHIBARRIER(fleft);
2113 if (len <= FOLD_SNEW_MAX_LEN) {
2114 IROp op = (IROp)fins->o;
2115 IRRef strref = fleft->op1;
2116 if (IR(strref)->o != IR_STRREF)
2117 return NEXTFOLD;
2118 if (op == IR_EQ) {
2119 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
2120 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
2121 } else {
2122 /* NE is not expanded since this would need an OR of two conds. */
2123 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
2124 return NEXTFOLD;
2125 if (IR(fleft->op2)->i != len)
2126 return DROPFOLD;
2127 }
2128 if (len > 0) {
2129 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
2130 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
2131 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
2132 IRTI(IR_XLOAD));
2133 TRef tmp = emitir(ot, strref,
2134 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
2135 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
2136 if (len == 3)
2137 tmp = emitir(IRTI(IR_BAND), tmp,
2138 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
2139 fins->op1 = (IRRef1)tmp;
2140 fins->op2 = (IRRef1)val;
2141 fins->ot = (IROpT)IRTGI(op);
2142 return RETRYFOLD;
2143 } else {
2144 return DROPFOLD;
2145 }
2146 }
2147 return NEXTFOLD;
2148 }
2149
2150 /* -- Loads --------------------------------------------------------------- */
2151
2152 /* Loads cannot be folded or passed on to CSE in general.
2153 ** Alias analysis is needed to check for forwarding opportunities.
2154 **
2155 ** Caveat: *all* loads must be listed here or they end up at CSE!
2156 */
2157
2158 LJFOLD(ALOAD any)
LJFOLDX(lj_opt_fwd_aload)2159 LJFOLDX(lj_opt_fwd_aload)
2160
2161 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
2162 LJFOLD(HLOAD KKPTR)
2163 LJFOLDF(kfold_hload_kkptr)
2164 {
2165 UNUSED(J);
2166 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
2167 return TREF_NIL;
2168 }
2169
2170 LJFOLD(HLOAD any)
LJFOLDX(lj_opt_fwd_hload)2171 LJFOLDX(lj_opt_fwd_hload)
2172
2173 LJFOLD(ULOAD any)
2174 LJFOLDX(lj_opt_fwd_uload)
2175
2176 LJFOLD(ALEN any any)
2177 LJFOLDX(lj_opt_fwd_alen)
2178
2179 /* Upvalue refs are really loads, but there are no corresponding stores.
2180 ** So CSE is ok for them, except for UREFO across a GC step (see below).
2181 ** If the referenced function is const, its upvalue addresses are const, too.
2182 ** This can be used to improve CSE by looking for the same address,
2183 ** even if the upvalues originate from a different function.
2184 */
2185 LJFOLD(UREFO KGC any)
2186 LJFOLD(UREFC KGC any)
2187 LJFOLDF(cse_uref)
2188 {
2189 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2190 IRRef ref = J->chain[fins->o];
2191 GCfunc *fn = ir_kfunc(fleft);
2192 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
2193 while (ref > 0) {
2194 IRIns *ir = IR(ref);
2195 if (irref_isk(ir->op1)) {
2196 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
2197 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
2198 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
2199 break;
2200 return ref;
2201 }
2202 }
2203 ref = ir->prev;
2204 }
2205 }
2206 return EMITFOLD;
2207 }
2208
2209 LJFOLD(HREFK any any)
LJFOLDX(lj_opt_fwd_hrefk)2210 LJFOLDX(lj_opt_fwd_hrefk)
2211
2212 LJFOLD(HREF TNEW any)
2213 LJFOLDF(fwd_href_tnew)
2214 {
2215 if (lj_opt_fwd_href_nokey(J))
2216 return lj_ir_kkptr(J, niltvg(J2G(J)));
2217 return NEXTFOLD;
2218 }
2219
2220 LJFOLD(HREF TDUP KPRI)
LJFOLD(HREF TDUP KGC)2221 LJFOLD(HREF TDUP KGC)
2222 LJFOLD(HREF TDUP KNUM)
2223 LJFOLDF(fwd_href_tdup)
2224 {
2225 TValue keyv;
2226 lj_ir_kvalue(J->L, &keyv, fright);
2227 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
2228 lj_opt_fwd_href_nokey(J))
2229 return lj_ir_kkptr(J, niltvg(J2G(J)));
2230 return NEXTFOLD;
2231 }
2232
2233 /* We can safely FOLD/CSE array/hash refs and field loads, since there
2234 ** are no corresponding stores. But we need to check for any NEWREF with
2235 ** an aliased table, as it may invalidate all of the pointers and fields.
2236 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
2237 ** FLOADs. And NEWREF itself is treated like a store (see below).
2238 ** LREF is constant (per trace) since coroutine switches are not inlined.
2239 */
2240 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
LJFOLDF(fload_tab_tnew_asize)2241 LJFOLDF(fload_tab_tnew_asize)
2242 {
2243 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2244 return INTFOLD(fleft->op1);
2245 return NEXTFOLD;
2246 }
2247
2248 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
LJFOLDF(fload_tab_tnew_hmask)2249 LJFOLDF(fload_tab_tnew_hmask)
2250 {
2251 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2252 return INTFOLD((1 << fleft->op2)-1);
2253 return NEXTFOLD;
2254 }
2255
2256 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
LJFOLDF(fload_tab_tdup_asize)2257 LJFOLDF(fload_tab_tdup_asize)
2258 {
2259 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2260 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
2261 return NEXTFOLD;
2262 }
2263
2264 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
LJFOLDF(fload_tab_tdup_hmask)2265 LJFOLDF(fload_tab_tdup_hmask)
2266 {
2267 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2268 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
2269 return NEXTFOLD;
2270 }
2271
2272 LJFOLD(HREF any any)
LJFOLD(FLOAD any IRFL_TAB_ARRAY)2273 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
2274 LJFOLD(FLOAD any IRFL_TAB_NODE)
2275 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
2276 LJFOLD(FLOAD any IRFL_TAB_HMASK)
2277 LJFOLDF(fload_tab_ah)
2278 {
2279 TRef tr = lj_opt_cse(J);
2280 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
2281 }
2282
2283 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2284 LJFOLD(FLOAD KGC IRFL_STR_LEN)
LJFOLDF(fload_str_len_kgc)2285 LJFOLDF(fload_str_len_kgc)
2286 {
2287 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2288 return INTFOLD((int32_t)ir_kstr(fleft)->len);
2289 return NEXTFOLD;
2290 }
2291
2292 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
LJFOLDF(fload_str_len_snew)2293 LJFOLDF(fload_str_len_snew)
2294 {
2295 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2296 PHIBARRIER(fleft);
2297 return fleft->op2;
2298 }
2299 return NEXTFOLD;
2300 }
2301
2302 LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
LJFOLDF(fload_str_len_tostr)2303 LJFOLDF(fload_str_len_tostr)
2304 {
2305 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2306 return INTFOLD(1);
2307 return NEXTFOLD;
2308 }
2309
2310 LJFOLD(FLOAD any IRFL_SBUF_W)
LJFOLD(FLOAD any IRFL_SBUF_E)2311 LJFOLD(FLOAD any IRFL_SBUF_E)
2312 LJFOLD(FLOAD any IRFL_SBUF_B)
2313 LJFOLD(FLOAD any IRFL_SBUF_L)
2314 LJFOLD(FLOAD any IRFL_SBUF_REF)
2315 LJFOLD(FLOAD any IRFL_SBUF_R)
2316 LJFOLDF(fload_sbuf)
2317 {
2318 TRef tr = lj_opt_fwd_fload(J);
2319 return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2320 }
2321
2322 /* The fast function ID of function objects is immutable. */
2323 LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
LJFOLDF(fload_func_ffid_kgc)2324 LJFOLDF(fload_func_ffid_kgc)
2325 {
2326 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2327 return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
2328 return NEXTFOLD;
2329 }
2330
2331 /* The C type ID of cdata objects is immutable. */
2332 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
LJFOLDF(fload_cdata_typeid_kgc)2333 LJFOLDF(fload_cdata_typeid_kgc)
2334 {
2335 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2336 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2337 return NEXTFOLD;
2338 }
2339
2340 /* Get the contents of immutable cdata objects. */
2341 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
LJFOLD(FLOAD KGC IRFL_CDATA_INT)2342 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2343 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2344 LJFOLDF(fload_cdata_int64_kgc)
2345 {
2346 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2347 void *p = cdataptr(ir_kcdata(fleft));
2348 if (irt_is64(fins->t))
2349 return INT64FOLD(*(uint64_t *)p);
2350 else
2351 return INTFOLD(*(int32_t *)p);
2352 }
2353 return NEXTFOLD;
2354 }
2355
2356 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)2357 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2358 LJFOLDF(fload_cdata_typeid_cnew)
2359 {
2360 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2361 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2362 return NEXTFOLD;
2363 }
2364
2365 /* Pointer, int and int64 cdata objects are immutable. */
2366 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)2367 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2368 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2369 LJFOLDF(fload_cdata_ptr_int64_cnew)
2370 {
2371 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2372 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2373 return NEXTFOLD;
2374 }
2375
2376 LJFOLD(FLOAD any IRFL_STR_LEN)
LJFOLD(FLOAD any IRFL_FUNC_ENV)2377 LJFOLD(FLOAD any IRFL_FUNC_ENV)
2378 LJFOLD(FLOAD any IRFL_THREAD_ENV)
2379 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2380 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2381 LJFOLD(FLOAD any IRFL_CDATA_INT)
2382 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2383 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2384 LJFOLDX(lj_opt_cse)
2385
2386 /* All other field loads need alias analysis. */
2387 LJFOLD(FLOAD any any)
2388 LJFOLDX(lj_opt_fwd_fload)
2389
2390 /* This is for LOOP only. Recording handles SLOADs internally. */
2391 LJFOLD(SLOAD any any)
2392 LJFOLDF(fwd_sload)
2393 {
2394 if ((fins->op2 & IRSLOAD_FRAME)) {
2395 TRef tr = lj_opt_cse(J);
2396 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2397 } else {
2398 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2399 return J->slot[fins->op1];
2400 }
2401 }
2402
2403 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2404 LJFOLD(XLOAD KKPTR any)
LJFOLDF(xload_kptr)2405 LJFOLDF(xload_kptr)
2406 {
2407 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2408 return tr ? tr : NEXTFOLD;
2409 }
2410
2411 LJFOLD(XLOAD any any)
LJFOLDX(lj_opt_fwd_xload)2412 LJFOLDX(lj_opt_fwd_xload)
2413
2414 /* -- Frame handling ------------------------------------------------------ */
2415
2416 /* Prevent CSE of a REF_BASE operand across IR_RETF. */
2417 LJFOLD(SUB any BASE)
2418 LJFOLD(SUB BASE any)
2419 LJFOLD(EQ any BASE)
2420 LJFOLDF(fold_base)
2421 {
2422 return lj_opt_cselim(J, J->chain[IR_RETF]);
2423 }
2424
2425 /* -- Write barriers ------------------------------------------------------ */
2426
2427 /* Write barriers are amenable to CSE, but not across any incremental
2428 ** GC steps.
2429 **
2430 ** The same logic applies to open upvalue references, because a stack
2431 ** may be resized during a GC step (not the current stack, but maybe that
2432 ** of a coroutine).
2433 */
2434 LJFOLD(TBAR any)
LJFOLD(OBAR any any)2435 LJFOLD(OBAR any any)
2436 LJFOLD(UREFO any any)
2437 LJFOLDF(barrier_tab)
2438 {
2439 TRef tr = lj_opt_cse(J);
2440 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2441 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2442 return tr;
2443 }
2444
2445 LJFOLD(TBAR TNEW)
LJFOLD(TBAR TDUP)2446 LJFOLD(TBAR TDUP)
2447 LJFOLDF(barrier_tnew_tdup)
2448 {
2449 /* New tables are always white and never need a barrier. */
2450 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2451 return NEXTFOLD;
2452 return DROPFOLD;
2453 }
2454
2455 /* -- Profiling ----------------------------------------------------------- */
2456
2457 LJFOLD(PROF any any)
LJFOLDF(prof)2458 LJFOLDF(prof)
2459 {
2460 IRRef ref = J->chain[IR_PROF];
2461 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2462 return ref;
2463 return EMITFOLD;
2464 }
2465
2466 /* -- Stores and allocations ---------------------------------------------- */
2467
2468 /* Stores and allocations cannot be folded or passed on to CSE in general.
2469 ** But some stores can be eliminated with dead-store elimination (DSE).
2470 **
2471 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2472 */
2473
2474 LJFOLD(ASTORE any any)
LJFOLD(HSTORE any any)2475 LJFOLD(HSTORE any any)
2476 LJFOLDX(lj_opt_dse_ahstore)
2477
2478 LJFOLD(USTORE any any)
2479 LJFOLDX(lj_opt_dse_ustore)
2480
2481 LJFOLD(FSTORE any any)
2482 LJFOLDX(lj_opt_dse_fstore)
2483
2484 LJFOLD(XSTORE any any)
2485 LJFOLDX(lj_opt_dse_xstore)
2486
2487 LJFOLD(NEWREF any any) /* Treated like a store. */
2488 LJFOLD(TMPREF any any)
2489 LJFOLD(CALLA any any)
2490 LJFOLD(CALLL any any) /* Safeguard fallback. */
2491 LJFOLD(CALLS any any)
2492 LJFOLD(CALLXS any any)
2493 LJFOLD(XBAR)
2494 LJFOLD(RETF any any) /* Modifies BASE. */
2495 LJFOLD(TNEW any any)
2496 LJFOLD(TDUP any)
2497 LJFOLD(CNEW any any)
2498 LJFOLD(XSNEW any any)
2499 LJFOLDX(lj_ir_emit)
2500
2501 /* ------------------------------------------------------------------------ */
2502
2503 /* Every entry in the generated hash table is a 32 bit pattern:
2504 **
2505 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2506 **
2507 ** xxxxxxxx = 8 bit index into fold function table
2508 ** iiiiiii = 7 bit folded instruction opcode
2509 ** lllllll = 7 bit left instruction opcode
2510 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2511 */
2512
2513 #include "lj_folddef.h"
2514
2515 /* ------------------------------------------------------------------------ */
2516
2517 /* Fold IR instruction. */
2518 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2519 {
2520 uint32_t key, any;
2521 IRRef ref;
2522
2523 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2524 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2525 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2526 "bad JIT_F_OPT_DEFAULT");
2527 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2528 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2529 return lj_opt_cse(J);
2530
2531 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2532 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2533 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2534 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2535 return lj_ir_emit(J);
2536
2537 /* No FOLD or DSE? Emit raw IR for stores. */
2538 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2539 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2540 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2541 return lj_ir_emit(J);
2542 }
2543
2544 /* Fold engine start/retry point. */
2545 retry:
2546 /* Construct key from opcode and operand opcodes (unless literal/none). */
2547 key = ((uint32_t)fins->o << 17);
2548 if (fins->op1 >= J->cur.nk) {
2549 key += (uint32_t)IR(fins->op1)->o << 10;
2550 *fleft = *IR(fins->op1);
2551 if (fins->op1 < REF_TRUE)
2552 fleft[1] = IR(fins->op1)[1];
2553 }
2554 if (fins->op2 >= J->cur.nk) {
2555 key += (uint32_t)IR(fins->op2)->o;
2556 *fright = *IR(fins->op2);
2557 if (fins->op2 < REF_TRUE)
2558 fright[1] = IR(fins->op2)[1];
2559 } else {
2560 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2561 }
2562
2563 /* Check for a match in order from most specific to least specific. */
2564 any = 0;
2565 for (;;) {
2566 uint32_t k = key | (any & 0x1ffff);
2567 uint32_t h = fold_hashkey(k);
2568 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2569 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2570 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2571 if (ref != NEXTFOLD)
2572 break;
2573 }
2574 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2575 return lj_opt_cse(J);
2576 any = (any | (any >> 10)) ^ 0xffc00;
2577 }
2578
2579 /* Return value processing, ordered by frequency. */
2580 if (LJ_LIKELY(ref >= MAX_FOLD))
2581 return TREF(ref, irt_t(IR(ref)->t));
2582 if (ref == RETRYFOLD)
2583 goto retry;
2584 if (ref == KINTFOLD)
2585 return lj_ir_kint(J, fins->i);
2586 if (ref == FAILFOLD)
2587 lj_trace_err(J, LJ_TRERR_GFAIL);
2588 lj_assertJ(ref == DROPFOLD, "bad fold result");
2589 return REF_DROP;
2590 }
2591
2592 /* -- Common-Subexpression Elimination ------------------------------------ */
2593
2594 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
lj_opt_cse(jit_State * J)2595 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2596 {
2597 /* Avoid narrow to wide store-to-load forwarding stall */
2598 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2599 IROp op = fins->o;
2600 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2601 /* Limited search for same operands in per-opcode chain. */
2602 IRRef ref = J->chain[op];
2603 IRRef lim = fins->op1;
2604 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2605 while (ref > lim) {
2606 if (IR(ref)->op12 == op12)
2607 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2608 ref = IR(ref)->prev;
2609 }
2610 }
2611 /* Otherwise emit IR (inlined for speed). */
2612 {
2613 IRRef ref = lj_ir_nextins(J);
2614 IRIns *ir = IR(ref);
2615 ir->prev = J->chain[op];
2616 ir->op12 = op12;
2617 J->chain[op] = (IRRef1)ref;
2618 ir->o = fins->o;
2619 J->guardemit.irt |= fins->t.irt;
2620 return TREF(ref, irt_t((ir->t = fins->t)));
2621 }
2622 }
2623
2624 /* CSE with explicit search limit. */
lj_opt_cselim(jit_State * J,IRRef lim)2625 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2626 {
2627 IRRef ref = J->chain[fins->o];
2628 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2629 while (ref > lim) {
2630 if (IR(ref)->op12 == op12)
2631 return ref;
2632 ref = IR(ref)->prev;
2633 }
2634 return lj_ir_emit(J);
2635 }
2636
2637 /* ------------------------------------------------------------------------ */
2638
2639 #undef IR
2640 #undef fins
2641 #undef fleft
2642 #undef fright
2643 #undef knumleft
2644 #undef knumright
2645 #undef emitir
2646
2647 #endif
2648