1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "jit/x86-shared/Lowering-x86-shared.h"
8 
9 #include "mozilla/MathAlgorithms.h"
10 
11 #include "jit/Lowering.h"
12 #include "jit/MIR.h"
13 
14 #include "jit/shared/Lowering-shared-inl.h"
15 
16 using namespace js;
17 using namespace js::jit;
18 
19 using mozilla::Abs;
20 using mozilla::FloorLog2;
21 using mozilla::Maybe;
22 using mozilla::Nothing;
23 using mozilla::Some;
24 
newLTableSwitch(const LAllocation & in,const LDefinition & inputCopy,MTableSwitch * tableswitch)25 LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch(
26     const LAllocation& in, const LDefinition& inputCopy,
27     MTableSwitch* tableswitch) {
28   return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
29 }
30 
newLTableSwitchV(MTableSwitch * tableswitch)31 LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV(
32     MTableSwitch* tableswitch) {
33   return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
34                                      tempDouble(), temp(), tableswitch);
35 }
36 
visitPowHalf(MPowHalf * ins)37 void LIRGenerator::visitPowHalf(MPowHalf* ins) {
38   MDefinition* input = ins->input();
39   MOZ_ASSERT(input->type() == MIRType::Double);
40   LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input));
41   define(lir, ins);
42 }
43 
lowerForShift(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)44 void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
45                                           MDefinition* mir, MDefinition* lhs,
46                                           MDefinition* rhs) {
47   ins->setOperand(0, useRegisterAtStart(lhs));
48 
49   // Shift operand should be constant or, unless BMI2 is available, in register
50   // ecx. x86 can't shift a non-ecx register.
51   if (rhs->isConstant()) {
52     ins->setOperand(1, useOrConstantAtStart(rhs));
53   } else if (Assembler::HasBMI2() && !mir->isRotate()) {
54     ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
55                            ? useRegister(rhs)
56                            : useRegisterAtStart(rhs));
57   } else {
58     ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
59                            ? useFixed(rhs, ecx)
60                            : useFixedAtStart(rhs, ecx));
61   }
62 
63   defineReuseInput(ins, mir, 0);
64 }
65 
66 template <size_t Temps>
lowerForShiftInt64(LInstructionHelper<INT64_PIECES,INT64_PIECES+1,Temps> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)67 void LIRGeneratorX86Shared::lowerForShiftInt64(
68     LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
69     MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
70   ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
71 #if defined(JS_NUNBOX32)
72   if (mir->isRotate()) {
73     ins->setTemp(0, temp());
74   }
75 #endif
76 
77   static_assert(LShiftI64::Rhs == INT64_PIECES,
78                 "Assume Rhs is located at INT64_PIECES.");
79   static_assert(LRotateI64::Count == INT64_PIECES,
80                 "Assume Count is located at INT64_PIECES.");
81 
82   // Shift operand should be constant or, unless BMI2 is available, in register
83   // ecx. x86 can't shift a non-ecx register.
84   if (rhs->isConstant()) {
85     ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs));
86 #ifdef JS_CODEGEN_X64
87   } else if (Assembler::HasBMI2() && !mir->isRotate()) {
88     ins->setOperand(INT64_PIECES, useRegister(rhs));
89 #endif
90   } else {
91     // The operands are int64, but we only care about the lower 32 bits of
92     // the RHS. On 32-bit, the code below will load that part in ecx and
93     // will discard the upper half.
94     ensureDefined(rhs);
95     LUse use(ecx);
96     use.setVirtualRegister(rhs->virtualRegister());
97     ins->setOperand(INT64_PIECES, use);
98   }
99 
100   defineInt64ReuseInput(ins, mir, 0);
101 }
102 
103 template void LIRGeneratorX86Shared::lowerForShiftInt64(
104     LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
105     MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
106 template void LIRGeneratorX86Shared::lowerForShiftInt64(
107     LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
108     MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
109 
lowerForCompareI64AndBranch(MTest * mir,MCompare * comp,JSOp op,MDefinition * left,MDefinition * right,MBasicBlock * ifTrue,MBasicBlock * ifFalse)110 void LIRGeneratorX86Shared::lowerForCompareI64AndBranch(
111     MTest* mir, MCompare* comp, JSOp op, MDefinition* left, MDefinition* right,
112     MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
113   auto* lir = new (alloc())
114       LCompareI64AndBranch(comp, op, useInt64Register(left),
115                            useInt64OrConstant(right), ifTrue, ifFalse);
116   add(lir, mir);
117 }
118 
lowerForALU(LInstructionHelper<1,1,0> * ins,MDefinition * mir,MDefinition * input)119 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
120                                         MDefinition* mir, MDefinition* input) {
121   ins->setOperand(0, useRegisterAtStart(input));
122   defineReuseInput(ins, mir, 0);
123 }
124 
lowerForALU(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)125 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
126                                         MDefinition* mir, MDefinition* lhs,
127                                         MDefinition* rhs) {
128   ins->setOperand(0, useRegisterAtStart(lhs));
129   ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
130                          ? useOrConstant(rhs)
131                          : useOrConstantAtStart(rhs));
132   defineReuseInput(ins, mir, 0);
133 }
134 
135 template <size_t Temps>
lowerForFPU(LInstructionHelper<1,2,Temps> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)136 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
137                                         MDefinition* mir, MDefinition* lhs,
138                                         MDefinition* rhs) {
139   // Without AVX, we'll need to use the x86 encodings where one of the
140   // inputs must be the same location as the output.
141   if (!Assembler::HasAVX()) {
142     ins->setOperand(0, useRegisterAtStart(lhs));
143     ins->setOperand(
144         1, willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs));
145     defineReuseInput(ins, mir, 0);
146   } else {
147     ins->setOperand(0, useRegisterAtStart(lhs));
148     ins->setOperand(1, useAtStart(rhs));
149     define(ins, mir);
150   }
151 }
152 
153 template void LIRGeneratorX86Shared::lowerForFPU(
154     LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
155     MDefinition* rhs);
156 template void LIRGeneratorX86Shared::lowerForFPU(
157     LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, MDefinition* lhs,
158     MDefinition* rhs);
159 
lowerForBitAndAndBranch(LBitAndAndBranch * baab,MInstruction * mir,MDefinition * lhs,MDefinition * rhs)160 void LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
161                                                     MInstruction* mir,
162                                                     MDefinition* lhs,
163                                                     MDefinition* rhs) {
164   baab->setOperand(0, useRegisterAtStart(lhs));
165   baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
166   add(baab, mir);
167 }
168 
lowerNegI(MInstruction * ins,MDefinition * input)169 void LIRGeneratorX86Shared::lowerNegI(MInstruction* ins, MDefinition* input) {
170   defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(input)), ins, 0);
171 }
172 
lowerNegI64(MInstruction * ins,MDefinition * input)173 void LIRGeneratorX86Shared::lowerNegI64(MInstruction* ins, MDefinition* input) {
174   defineInt64ReuseInput(new (alloc()) LNegI64(useInt64RegisterAtStart(input)),
175                         ins, 0);
176 }
177 
visitAbs(MAbs * ins)178 void LIRGenerator::visitAbs(MAbs* ins) {
179   defineReuseInput(allocateAbs(ins, useRegisterAtStart(ins->input())), ins, 0);
180 }
181 
lowerMulI(MMul * mul,MDefinition * lhs,MDefinition * rhs)182 void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs,
183                                       MDefinition* rhs) {
184   // Note: If we need a negative zero check, lhs is used twice.
185   LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
186   LMulI* lir = new (alloc())
187       LMulI(useRegisterAtStart(lhs),
188             willHaveDifferentLIRNodes(lhs, rhs) ? useOrConstant(rhs)
189                                                 : useOrConstantAtStart(rhs),
190             lhsCopy);
191   if (mul->fallible()) {
192     assignSnapshot(lir, mul->bailoutKind());
193   }
194   defineReuseInput(lir, mul, 0);
195 }
196 
lowerDivI(MDiv * div)197 void LIRGeneratorX86Shared::lowerDivI(MDiv* div) {
198   if (div->isUnsigned()) {
199     lowerUDiv(div);
200     return;
201   }
202 
203   // Division instructions are slow. Division by constant denominators can be
204   // rewritten to use other instructions.
205   if (div->rhs()->isConstant()) {
206     int32_t rhs = div->rhs()->toConstant()->toInt32();
207 
208     // Division by powers of two can be done by shifting, and division by
209     // other numbers can be done by a reciprocal multiplication technique.
210     int32_t shift = FloorLog2(Abs(rhs));
211     if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
212       LAllocation lhs = useRegisterAtStart(div->lhs());
213       LDivPowTwoI* lir;
214       // When truncated with maybe a non-zero remainder, we have to round the
215       // result toward 0. This requires an extra register to round up/down
216       // whether the left-hand-side is signed.
217       bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated();
218       if (!needRoundNeg) {
219         // Numerator is unsigned, so does not need adjusting.
220         lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
221       } else {
222         // Numerator might be signed, and needs adjusting, and an extra lhs copy
223         // is needed to round the result of the integer division towards zero.
224         lir = new (alloc())
225             LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
226       }
227       if (div->fallible()) {
228         assignSnapshot(lir, div->bailoutKind());
229       }
230       defineReuseInput(lir, div, 0);
231       return;
232     }
233     if (rhs != 0) {
234       LDivOrModConstantI* lir;
235       lir = new (alloc())
236           LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
237       if (div->fallible()) {
238         assignSnapshot(lir, div->bailoutKind());
239       }
240       defineFixed(lir, div, LAllocation(AnyRegister(edx)));
241       return;
242     }
243   }
244 
245   LDivI* lir = new (alloc())
246       LDivI(useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
247   if (div->fallible()) {
248     assignSnapshot(lir, div->bailoutKind());
249   }
250   defineFixed(lir, div, LAllocation(AnyRegister(eax)));
251 }
252 
lowerModI(MMod * mod)253 void LIRGeneratorX86Shared::lowerModI(MMod* mod) {
254   if (mod->isUnsigned()) {
255     lowerUMod(mod);
256     return;
257   }
258 
259   if (mod->rhs()->isConstant()) {
260     int32_t rhs = mod->rhs()->toConstant()->toInt32();
261     int32_t shift = FloorLog2(Abs(rhs));
262     if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
263       LModPowTwoI* lir =
264           new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
265       if (mod->fallible()) {
266         assignSnapshot(lir, mod->bailoutKind());
267       }
268       defineReuseInput(lir, mod, 0);
269       return;
270     }
271     if (rhs != 0) {
272       LDivOrModConstantI* lir;
273       lir = new (alloc())
274           LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
275       if (mod->fallible()) {
276         assignSnapshot(lir, mod->bailoutKind());
277       }
278       defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
279       return;
280     }
281   }
282 
283   LModI* lir = new (alloc())
284       LModI(useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
285   if (mod->fallible()) {
286     assignSnapshot(lir, mod->bailoutKind());
287   }
288   defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
289 }
290 
visitWasmNeg(MWasmNeg * ins)291 void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
292   switch (ins->type()) {
293     case MIRType::Int32:
294       defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(ins->input())),
295                        ins, 0);
296       break;
297     case MIRType::Float32:
298       defineReuseInput(new (alloc()) LNegF(useRegisterAtStart(ins->input())),
299                        ins, 0);
300       break;
301     case MIRType::Double:
302       defineReuseInput(new (alloc()) LNegD(useRegisterAtStart(ins->input())),
303                        ins, 0);
304       break;
305     default:
306       MOZ_CRASH();
307   }
308 }
309 
lowerWasmSelectI(MWasmSelect * select)310 void LIRGeneratorX86Shared::lowerWasmSelectI(MWasmSelect* select) {
311   auto* lir = new (alloc())
312       LWasmSelect(useRegisterAtStart(select->trueExpr()),
313                   useAny(select->falseExpr()), useRegister(select->condExpr()));
314   defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
315 }
316 
lowerWasmSelectI64(MWasmSelect * select)317 void LIRGeneratorX86Shared::lowerWasmSelectI64(MWasmSelect* select) {
318   auto* lir = new (alloc()) LWasmSelectI64(
319       useInt64RegisterAtStart(select->trueExpr()),
320       useInt64(select->falseExpr()), useRegister(select->condExpr()));
321   defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
322 }
323 
visitAsmJSLoadHeap(MAsmJSLoadHeap * ins)324 void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
325   MDefinition* base = ins->base();
326   MOZ_ASSERT(base->type() == MIRType::Int32);
327 
328   MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
329   MOZ_ASSERT_IF(ins->needsBoundsCheck(),
330                 boundsCheckLimit->type() == MIRType::Int32);
331 
332   // For simplicity, require a register if we're going to emit a bounds-check
333   // branch, so that we don't have special cases for constants. This should
334   // only happen in rare constant-folding cases since asm.js sets the minimum
335   // heap size based when accessed via constant.
336   LAllocation baseAlloc = ins->needsBoundsCheck()
337                               ? useRegisterAtStart(base)
338                               : useRegisterOrZeroAtStart(base);
339 
340   LAllocation limitAlloc = ins->needsBoundsCheck()
341                                ? useRegisterAtStart(boundsCheckLimit)
342                                : LAllocation();
343   LAllocation memoryBaseAlloc = ins->hasMemoryBase()
344                                     ? useRegisterAtStart(ins->memoryBase())
345                                     : LAllocation();
346 
347   auto* lir =
348       new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc);
349   define(lir, ins);
350 }
351 
visitAsmJSStoreHeap(MAsmJSStoreHeap * ins)352 void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
353   MDefinition* base = ins->base();
354   MOZ_ASSERT(base->type() == MIRType::Int32);
355 
356   MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
357   MOZ_ASSERT_IF(ins->needsBoundsCheck(),
358                 boundsCheckLimit->type() == MIRType::Int32);
359 
360   // For simplicity, require a register if we're going to emit a bounds-check
361   // branch, so that we don't have special cases for constants. This should
362   // only happen in rare constant-folding cases since asm.js sets the minimum
363   // heap size based when accessed via constant.
364   LAllocation baseAlloc = ins->needsBoundsCheck()
365                               ? useRegisterAtStart(base)
366                               : useRegisterOrZeroAtStart(base);
367 
368   LAllocation limitAlloc = ins->needsBoundsCheck()
369                                ? useRegisterAtStart(boundsCheckLimit)
370                                : LAllocation();
371   LAllocation memoryBaseAlloc = ins->hasMemoryBase()
372                                     ? useRegisterAtStart(ins->memoryBase())
373                                     : LAllocation();
374 
375   LAsmJSStoreHeap* lir = nullptr;
376   switch (ins->access().type()) {
377     case Scalar::Int8:
378     case Scalar::Uint8:
379 #ifdef JS_CODEGEN_X86
380       // See comment for LIRGeneratorX86::useByteOpRegister.
381       lir = new (alloc()) LAsmJSStoreHeap(
382           baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc);
383       break;
384 #endif
385     case Scalar::Int16:
386     case Scalar::Uint16:
387     case Scalar::Int32:
388     case Scalar::Uint32:
389     case Scalar::Float32:
390     case Scalar::Float64:
391       // For now, don't allow constant values. The immediate operand affects
392       // instruction layout which affects patching.
393       lir = new (alloc())
394           LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
395                           limitAlloc, memoryBaseAlloc);
396       break;
397     case Scalar::Int64:
398     case Scalar::Simd128:
399       MOZ_CRASH("NYI");
400     case Scalar::Uint8Clamped:
401     case Scalar::BigInt64:
402     case Scalar::BigUint64:
403     case Scalar::MaxTypedArrayViewType:
404       MOZ_CRASH("unexpected array type");
405   }
406   add(lir, ins);
407 }
408 
lowerUDiv(MDiv * div)409 void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) {
410   if (div->rhs()->isConstant()) {
411     uint32_t rhs = div->rhs()->toConstant()->toInt32();
412     int32_t shift = FloorLog2(rhs);
413 
414     LAllocation lhs = useRegisterAtStart(div->lhs());
415     if (rhs != 0 && uint32_t(1) << shift == rhs) {
416       LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, false);
417       if (div->fallible()) {
418         assignSnapshot(lir, div->bailoutKind());
419       }
420       defineReuseInput(lir, div, 0);
421     } else {
422       LUDivOrModConstant* lir = new (alloc())
423           LUDivOrModConstant(useRegister(div->lhs()), rhs, tempFixed(eax));
424       if (div->fallible()) {
425         assignSnapshot(lir, div->bailoutKind());
426       }
427       defineFixed(lir, div, LAllocation(AnyRegister(edx)));
428     }
429     return;
430   }
431 
432   LUDivOrMod* lir = new (alloc()) LUDivOrMod(
433       useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
434   if (div->fallible()) {
435     assignSnapshot(lir, div->bailoutKind());
436   }
437   defineFixed(lir, div, LAllocation(AnyRegister(eax)));
438 }
439 
lowerUMod(MMod * mod)440 void LIRGeneratorX86Shared::lowerUMod(MMod* mod) {
441   if (mod->rhs()->isConstant()) {
442     uint32_t rhs = mod->rhs()->toConstant()->toInt32();
443     int32_t shift = FloorLog2(rhs);
444 
445     if (rhs != 0 && uint32_t(1) << shift == rhs) {
446       LModPowTwoI* lir =
447           new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
448       if (mod->fallible()) {
449         assignSnapshot(lir, mod->bailoutKind());
450       }
451       defineReuseInput(lir, mod, 0);
452     } else {
453       LUDivOrModConstant* lir = new (alloc())
454           LUDivOrModConstant(useRegister(mod->lhs()), rhs, tempFixed(edx));
455       if (mod->fallible()) {
456         assignSnapshot(lir, mod->bailoutKind());
457       }
458       defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
459     }
460     return;
461   }
462 
463   LUDivOrMod* lir = new (alloc()) LUDivOrMod(
464       useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
465   if (mod->fallible()) {
466     assignSnapshot(lir, mod->bailoutKind());
467   }
468   defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
469 }
470 
lowerUrshD(MUrsh * mir)471 void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) {
472   MDefinition* lhs = mir->lhs();
473   MDefinition* rhs = mir->rhs();
474 
475   MOZ_ASSERT(lhs->type() == MIRType::Int32);
476   MOZ_ASSERT(rhs->type() == MIRType::Int32);
477   MOZ_ASSERT(mir->type() == MIRType::Double);
478 
479 #ifdef JS_CODEGEN_X64
480   static_assert(ecx == rcx);
481 #endif
482 
483   // Without BMI2, x86 can only shift by ecx.
484   LUse lhsUse = useRegisterAtStart(lhs);
485   LAllocation rhsAlloc;
486   if (rhs->isConstant()) {
487     rhsAlloc = useOrConstant(rhs);
488   } else if (Assembler::HasBMI2()) {
489     rhsAlloc = useRegister(rhs);
490   } else {
491     rhsAlloc = useFixed(rhs, ecx);
492   }
493 
494   LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
495   define(lir, mir);
496 }
497 
lowerPowOfTwoI(MPow * mir)498 void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) {
499   int32_t base = mir->input()->toConstant()->toInt32();
500   MDefinition* power = mir->power();
501 
502   // Shift operand should be in register ecx, unless BMI2 is available.
503   // x86 can't shift a non-ecx register.
504   LAllocation powerAlloc =
505       Assembler::HasBMI2() ? useRegister(power) : useFixed(power, ecx);
506   auto* lir = new (alloc()) LPowOfTwoI(base, powerAlloc);
507   assignSnapshot(lir, mir->bailoutKind());
508   define(lir, mir);
509 }
510 
lowerBigIntLsh(MBigIntLsh * ins)511 void LIRGeneratorX86Shared::lowerBigIntLsh(MBigIntLsh* ins) {
512   // Shift operand should be in register ecx, unless BMI2 is available.
513   // x86 can't shift a non-ecx register.
514   LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
515   auto* lir =
516       new (alloc()) LBigIntLsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
517                                temp(), shiftAlloc, temp());
518   define(lir, ins);
519   assignSafepoint(lir, ins);
520 }
521 
lowerBigIntRsh(MBigIntRsh * ins)522 void LIRGeneratorX86Shared::lowerBigIntRsh(MBigIntRsh* ins) {
523   // Shift operand should be in register ecx, unless BMI2 is available.
524   // x86 can't shift a non-ecx register.
525   LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
526   auto* lir =
527       new (alloc()) LBigIntRsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
528                                temp(), shiftAlloc, temp());
529   define(lir, ins);
530   assignSafepoint(lir, ins);
531 }
532 
lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32 * ins)533 void LIRGeneratorX86Shared::lowerWasmBuiltinTruncateToInt32(
534     MWasmBuiltinTruncateToInt32* ins) {
535   MDefinition* opd = ins->input();
536   MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
537 
538   LDefinition maybeTemp =
539       Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
540   if (opd->type() == MIRType::Double) {
541     define(new (alloc()) LWasmBuiltinTruncateDToInt32(
542                useRegister(opd), useFixed(ins->tls(), WasmTlsReg), maybeTemp),
543            ins);
544     return;
545   }
546 
547   define(new (alloc()) LWasmBuiltinTruncateFToInt32(
548              useRegister(opd), useFixed(ins->tls(), WasmTlsReg), maybeTemp),
549          ins);
550 }
551 
lowerTruncateDToInt32(MTruncateToInt32 * ins)552 void LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) {
553   MDefinition* opd = ins->input();
554   MOZ_ASSERT(opd->type() == MIRType::Double);
555 
556   LDefinition maybeTemp =
557       Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
558   define(new (alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
559 }
560 
lowerTruncateFToInt32(MTruncateToInt32 * ins)561 void LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) {
562   MDefinition* opd = ins->input();
563   MOZ_ASSERT(opd->type() == MIRType::Float32);
564 
565   LDefinition maybeTemp =
566       Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
567   define(new (alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
568 }
569 
lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement * ins,bool useI386ByteRegisters)570 void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(
571     MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
572   MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
573   MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
574 
575   MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
576   MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
577 
578   const LUse elements = useRegister(ins->elements());
579   const LAllocation index =
580       useRegisterOrIndexConstant(ins->index(), ins->arrayType());
581 
582   // If the target is a floating register then we need a temp at the
583   // lower level; that temp must be eax.
584   //
585   // Otherwise the target (if used) is an integer register, which
586   // must be eax.  If the target is not used the machine code will
587   // still clobber eax, so just pretend it's used.
588   //
589   // oldval must be in a register.
590   //
591   // newval must be in a register.  If the source is a byte array
592   // then newval must be a register that has a byte size: on x86
593   // this must be ebx, ecx, or edx (eax is taken for the output).
594   //
595   // Bug #1077036 describes some further optimization opportunities.
596 
597   bool fixedOutput = false;
598   LDefinition tempDef = LDefinition::BogusTemp();
599   LAllocation newval;
600   if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
601     tempDef = tempFixed(eax);
602     newval = useRegister(ins->newval());
603   } else {
604     fixedOutput = true;
605     if (useI386ByteRegisters && ins->isByteArray()) {
606       newval = useFixed(ins->newval(), ebx);
607     } else {
608       newval = useRegister(ins->newval());
609     }
610   }
611 
612   const LAllocation oldval = useRegister(ins->oldval());
613 
614   LCompareExchangeTypedArrayElement* lir =
615       new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
616                                                       newval, tempDef);
617 
618   if (fixedOutput) {
619     defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
620   } else {
621     define(lir, ins);
622   }
623 }
624 
lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement * ins,bool useI386ByteRegisters)625 void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(
626     MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
627   MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
628 
629   MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
630   MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
631 
632   const LUse elements = useRegister(ins->elements());
633   const LAllocation index =
634       useRegisterOrIndexConstant(ins->index(), ins->arrayType());
635   const LAllocation value = useRegister(ins->value());
636 
637   // The underlying instruction is XCHG, which can operate on any
638   // register.
639   //
640   // If the target is a floating register (for Uint32) then we need
641   // a temp into which to exchange.
642   //
643   // If the source is a byte array then we need a register that has
644   // a byte size; in this case -- on x86 only -- pin the output to
645   // an appropriate register and use that as a temp in the back-end.
646 
647   LDefinition tempDef = LDefinition::BogusTemp();
648   if (ins->arrayType() == Scalar::Uint32) {
649     MOZ_ASSERT(ins->type() == MIRType::Double);
650     tempDef = temp();
651   }
652 
653   LAtomicExchangeTypedArrayElement* lir = new (alloc())
654       LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
655 
656   if (useI386ByteRegisters && ins->isByteArray()) {
657     defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
658   } else {
659     define(lir, ins);
660   }
661 }
662 
lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop * ins,bool useI386ByteRegisters)663 void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(
664     MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) {
665   MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
666   MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
667   MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
668 
669   MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
670   MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
671 
672   const LUse elements = useRegister(ins->elements());
673   const LAllocation index =
674       useRegisterOrIndexConstant(ins->index(), ins->arrayType());
675 
676   // Case 1: the result of the operation is not used.
677   //
678   // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
679   // LOCK OR, or LOCK XOR.  We can do this even for the Uint32 case.
680 
681   if (ins->isForEffect()) {
682     LAllocation value;
683     if (useI386ByteRegisters && ins->isByteArray() &&
684         !ins->value()->isConstant()) {
685       value = useFixed(ins->value(), ebx);
686     } else {
687       value = useRegisterOrConstant(ins->value());
688     }
689 
690     LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
691         LAtomicTypedArrayElementBinopForEffect(elements, index, value);
692 
693     add(lir, ins);
694     return;
695   }
696 
697   // Case 2: the result of the operation is used.
698   //
699   // For ADD and SUB we'll use XADD:
700   //
701   //    movl       src, output
702   //    lock xaddl output, mem
703   //
704   // For the 8-bit variants XADD needs a byte register for the output.
705   //
706   // For AND/OR/XOR we need to use a CMPXCHG loop:
707   //
708   //    movl          *mem, eax
709   // L: mov           eax, temp
710   //    andl          src, temp
711   //    lock cmpxchg  temp, mem  ; reads eax also
712   //    jnz           L
713   //    ; result in eax
714   //
715   // Note the placement of L, cmpxchg will update eax with *mem if
716   // *mem does not have the expected value, so reloading it at the
717   // top of the loop would be redundant.
718   //
719   // If the array is not a uint32 array then:
720   //  - eax should be the output (one result of the cmpxchg)
721   //  - there is a temp, which must have a byte register if
722   //    the array has 1-byte elements elements
723   //
724   // If the array is a uint32 array then:
725   //  - eax is the first temp
726   //  - we also need a second temp
727   //
728   // There are optimization opportunities:
729   //  - better register allocation in the x86 8-bit case, Bug #1077036.
730 
731   bool bitOp = !(ins->operation() == AtomicFetchAddOp ||
732                  ins->operation() == AtomicFetchSubOp);
733   bool fixedOutput = true;
734   bool reuseInput = false;
735   LDefinition tempDef1 = LDefinition::BogusTemp();
736   LDefinition tempDef2 = LDefinition::BogusTemp();
737   LAllocation value;
738 
739   if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
740     value = useRegisterOrConstant(ins->value());
741     fixedOutput = false;
742     if (bitOp) {
743       tempDef1 = tempFixed(eax);
744       tempDef2 = temp();
745     } else {
746       tempDef1 = temp();
747     }
748   } else if (useI386ByteRegisters && ins->isByteArray()) {
749     if (ins->value()->isConstant()) {
750       value = useRegisterOrConstant(ins->value());
751     } else {
752       value = useFixed(ins->value(), ebx);
753     }
754     if (bitOp) {
755       tempDef1 = tempFixed(ecx);
756     }
757   } else if (bitOp) {
758     value = useRegisterOrConstant(ins->value());
759     tempDef1 = temp();
760   } else if (ins->value()->isConstant()) {
761     fixedOutput = false;
762     value = useRegisterOrConstant(ins->value());
763   } else {
764     fixedOutput = false;
765     reuseInput = true;
766     value = useRegisterAtStart(ins->value());
767   }
768 
769   LAtomicTypedArrayElementBinop* lir = new (alloc())
770       LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
771 
772   if (fixedOutput) {
773     defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
774   } else if (reuseInput) {
775     defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
776   } else {
777     define(lir, ins);
778   }
779 }
780 
visitCopySign(MCopySign * ins)781 void LIRGenerator::visitCopySign(MCopySign* ins) {
782   MDefinition* lhs = ins->lhs();
783   MDefinition* rhs = ins->rhs();
784 
785   MOZ_ASSERT(IsFloatingPointType(lhs->type()));
786   MOZ_ASSERT(lhs->type() == rhs->type());
787   MOZ_ASSERT(lhs->type() == ins->type());
788 
789   LInstructionHelper<1, 2, 2>* lir;
790   if (lhs->type() == MIRType::Double) {
791     lir = new (alloc()) LCopySignD();
792   } else {
793     lir = new (alloc()) LCopySignF();
794   }
795 
796   // As lowerForFPU, but we want rhs to be in a FP register too.
797   lir->setOperand(0, useRegisterAtStart(lhs));
798   if (!Assembler::HasAVX()) {
799     lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
800                            ? useRegister(rhs)
801                            : useRegisterAtStart(rhs));
802     defineReuseInput(lir, ins, 0);
803   } else {
804     lir->setOperand(1, useRegisterAtStart(rhs));
805     define(lir, ins);
806   }
807 }
808 
809 // These lowerings are really x86-shared but some Masm APIs are not yet
810 // available on x86.
811 
812 // Ternary and binary operators require the dest register to be the same as
813 // their first input register, leading to a pattern of useRegisterAtStart +
814 // defineReuseInput.
815 
visitWasmBitselectSimd128(MWasmBitselectSimd128 * ins)816 void LIRGenerator::visitWasmBitselectSimd128(MWasmBitselectSimd128* ins) {
817 #ifdef ENABLE_WASM_SIMD
818   MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
819   MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
820   MOZ_ASSERT(ins->control()->type() == MIRType::Simd128);
821   MOZ_ASSERT(ins->type() == MIRType::Simd128);
822 
823   // Enforcing lhs == output avoids one setup move.  We would like to also
824   // enforce merging the control with the temp (with usRegisterAtStart(control)
825   // and tempCopy()), but the register allocator ignores those constraints
826   // at present.
827 
828   auto* lir = new (alloc()) LWasmBitselectSimd128(
829       useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()),
830       useRegister(ins->control()), tempSimd128());
831   defineReuseInput(lir, ins, LWasmBitselectSimd128::LhsDest);
832 #else
833   MOZ_CRASH("No SIMD");
834 #endif
835 }
836 
visitWasmBinarySimd128(MWasmBinarySimd128 * ins)837 void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
838 #ifdef ENABLE_WASM_SIMD
839   MDefinition* lhs = ins->lhs();
840   MDefinition* rhs = ins->rhs();
841   wasm::SimdOp op = ins->simdOp();
842 
843   MOZ_ASSERT(lhs->type() == MIRType::Simd128);
844   MOZ_ASSERT(rhs->type() == MIRType::Simd128);
845   MOZ_ASSERT(ins->type() == MIRType::Simd128);
846 
847   // Note MWasmBinarySimd128::foldsTo has already specialized operations that
848   // have a constant operand, so this takes care of more general cases of
849   // reordering, see ReorderCommutative.
850   if (ins->isCommutative()) {
851     ReorderCommutative(&lhs, &rhs, ins);
852   }
853 
854   // Swap operands and change operation if necessary, these are all x86/x64
855   // dependent transformations.  Except where noted, this is about avoiding
856   // unnecessary moves and fixups in the code generator macros.
857   bool swap = false;
858   switch (op) {
859     case wasm::SimdOp::V128AndNot: {
860       // Code generation requires the operands to be reversed.
861       swap = true;
862       break;
863     }
864     case wasm::SimdOp::I8x16LtS: {
865       swap = true;
866       op = wasm::SimdOp::I8x16GtS;
867       break;
868     }
869     case wasm::SimdOp::I8x16GeS: {
870       swap = true;
871       op = wasm::SimdOp::I8x16LeS;
872       break;
873     }
874     case wasm::SimdOp::I16x8LtS: {
875       swap = true;
876       op = wasm::SimdOp::I16x8GtS;
877       break;
878     }
879     case wasm::SimdOp::I16x8GeS: {
880       swap = true;
881       op = wasm::SimdOp::I16x8LeS;
882       break;
883     }
884     case wasm::SimdOp::I32x4LtS: {
885       swap = true;
886       op = wasm::SimdOp::I32x4GtS;
887       break;
888     }
889     case wasm::SimdOp::I32x4GeS: {
890       swap = true;
891       op = wasm::SimdOp::I32x4LeS;
892       break;
893     }
894     case wasm::SimdOp::F32x4Gt: {
895       swap = true;
896       op = wasm::SimdOp::F32x4Lt;
897       break;
898     }
899     case wasm::SimdOp::F32x4Ge: {
900       swap = true;
901       op = wasm::SimdOp::F32x4Le;
902       break;
903     }
904     case wasm::SimdOp::F64x2Gt: {
905       swap = true;
906       op = wasm::SimdOp::F64x2Lt;
907       break;
908     }
909     case wasm::SimdOp::F64x2Ge: {
910       swap = true;
911       op = wasm::SimdOp::F64x2Le;
912       break;
913     }
914     case wasm::SimdOp::F32x4PMin:
915     case wasm::SimdOp::F32x4PMax:
916     case wasm::SimdOp::F64x2PMin:
917     case wasm::SimdOp::F64x2PMax: {
918       // Code generation requires the operations to be reversed (the rhs is the
919       // output register).
920       swap = true;
921       break;
922     }
923     default:
924       break;
925   }
926   if (swap) {
927     MDefinition* tmp = lhs;
928     lhs = rhs;
929     rhs = tmp;
930   }
931 
932   // Allocate temp registers
933   LDefinition tempReg0 = LDefinition::BogusTemp();
934   LDefinition tempReg1 = LDefinition::BogusTemp();
935   switch (op) {
936     case wasm::SimdOp::I64x2Mul:
937       tempReg0 = tempSimd128();
938       break;
939     case wasm::SimdOp::F32x4Min:
940     case wasm::SimdOp::F32x4Max:
941     case wasm::SimdOp::F64x2Min:
942     case wasm::SimdOp::F64x2Max:
943     case wasm::SimdOp::I64x2LtS:
944     case wasm::SimdOp::I64x2GtS:
945     case wasm::SimdOp::I64x2LeS:
946     case wasm::SimdOp::I64x2GeS:
947       tempReg0 = tempSimd128();
948       tempReg1 = tempSimd128();
949       break;
950     default:
951       break;
952   }
953 
954   // For binary ops, the Masm API always is usually (rhs, lhsDest) and requires
955   // AtStart+ReuseInput for the lhs.
956   //
957   // For a few ops, the API is actually (rhsDest, lhs) and the rules are the
958   // same but the reversed.  We swapped operands above; they will be swapped
959   // again in the code generator to emit the right code.
960 
961   LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
962   LAllocation rhsAlloc = willHaveDifferentLIRNodes(lhs, rhs)
963                              ? useRegister(rhs)
964                              : useRegisterAtStart(rhs);
965   auto* lir = new (alloc())
966       LWasmBinarySimd128(op, lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
967   defineReuseInput(lir, ins, LWasmBinarySimd128::LhsDest);
968 #else
969   MOZ_CRASH("No SIMD");
970 #endif
971 }
972 
973 #ifdef ENABLE_WASM_SIMD
specializeConstantMaskAsShuffle(int8_t shuffle[16])974 bool MWasmBitselectSimd128::specializeConstantMaskAsShuffle(
975     int8_t shuffle[16]) {
976   // Optimization when control vector is a mask with all 0 or all 1 per lane.
977   // On x86, there is no bitselect, blend operations will be a win,
978   // e.g. via PBLENDVB or PBLENDW.
979   SimdConstant constant =
980       static_cast<MWasmFloatConstant*>(control())->toSimd128();
981   const SimdConstant::I8x16& bytes = constant.asInt8x16();
982   for (int8_t i = 0; i < 16; i++) {
983     if (bytes[i] == -1) {
984       shuffle[i] = i + 16;
985     } else if (bytes[i] == 0) {
986       shuffle[i] = i;
987     } else {
988       return false;
989     }
990   }
991   return true;
992 }
993 #endif
994 
specializeForConstantRhs()995 bool MWasmBinarySimd128::specializeForConstantRhs() {
996   // The order follows MacroAssembler.h, generally
997   switch (simdOp()) {
998     // Operations implemented by a single native instruction where it is
999     // plausible that the rhs (after commutation if available) could be a
1000     // constant.
1001     //
1002     // Swizzle is not here because it was handled earlier in the pipeline.
1003     //
1004     // Integer compares >= and < are not here because they are not supported in
1005     // the hardware.
1006     //
1007     // Floating compares are not here because our patching machinery can't
1008     // handle them yet.
1009     //
1010     // Floating-point min and max (including pmin and pmax) are not here because
1011     // they are not straightforward to implement.
1012     case wasm::SimdOp::I8x16Add:
1013     case wasm::SimdOp::I16x8Add:
1014     case wasm::SimdOp::I32x4Add:
1015     case wasm::SimdOp::I64x2Add:
1016     case wasm::SimdOp::I8x16Sub:
1017     case wasm::SimdOp::I16x8Sub:
1018     case wasm::SimdOp::I32x4Sub:
1019     case wasm::SimdOp::I64x2Sub:
1020     case wasm::SimdOp::I16x8Mul:
1021     case wasm::SimdOp::I32x4Mul:
1022     case wasm::SimdOp::I8x16AddSaturateS:
1023     case wasm::SimdOp::I8x16AddSaturateU:
1024     case wasm::SimdOp::I16x8AddSaturateS:
1025     case wasm::SimdOp::I16x8AddSaturateU:
1026     case wasm::SimdOp::I8x16SubSaturateS:
1027     case wasm::SimdOp::I8x16SubSaturateU:
1028     case wasm::SimdOp::I16x8SubSaturateS:
1029     case wasm::SimdOp::I16x8SubSaturateU:
1030     case wasm::SimdOp::I8x16MinS:
1031     case wasm::SimdOp::I8x16MinU:
1032     case wasm::SimdOp::I16x8MinS:
1033     case wasm::SimdOp::I16x8MinU:
1034     case wasm::SimdOp::I32x4MinS:
1035     case wasm::SimdOp::I32x4MinU:
1036     case wasm::SimdOp::I8x16MaxS:
1037     case wasm::SimdOp::I8x16MaxU:
1038     case wasm::SimdOp::I16x8MaxS:
1039     case wasm::SimdOp::I16x8MaxU:
1040     case wasm::SimdOp::I32x4MaxS:
1041     case wasm::SimdOp::I32x4MaxU:
1042     case wasm::SimdOp::V128And:
1043     case wasm::SimdOp::V128Or:
1044     case wasm::SimdOp::V128Xor:
1045     case wasm::SimdOp::I8x16Eq:
1046     case wasm::SimdOp::I8x16Ne:
1047     case wasm::SimdOp::I8x16GtS:
1048     case wasm::SimdOp::I8x16LeS:
1049     case wasm::SimdOp::I16x8Eq:
1050     case wasm::SimdOp::I16x8Ne:
1051     case wasm::SimdOp::I16x8GtS:
1052     case wasm::SimdOp::I16x8LeS:
1053     case wasm::SimdOp::I32x4Eq:
1054     case wasm::SimdOp::I32x4Ne:
1055     case wasm::SimdOp::I32x4GtS:
1056     case wasm::SimdOp::I32x4LeS:
1057     case wasm::SimdOp::F32x4Eq:
1058     case wasm::SimdOp::F32x4Ne:
1059     case wasm::SimdOp::F32x4Lt:
1060     case wasm::SimdOp::F32x4Le:
1061     case wasm::SimdOp::F64x2Eq:
1062     case wasm::SimdOp::F64x2Ne:
1063     case wasm::SimdOp::F64x2Lt:
1064     case wasm::SimdOp::F64x2Le:
1065     case wasm::SimdOp::I32x4DotSI16x8:
1066     case wasm::SimdOp::F32x4Add:
1067     case wasm::SimdOp::F64x2Add:
1068     case wasm::SimdOp::F32x4Sub:
1069     case wasm::SimdOp::F64x2Sub:
1070     case wasm::SimdOp::F32x4Div:
1071     case wasm::SimdOp::F64x2Div:
1072     case wasm::SimdOp::F32x4Mul:
1073     case wasm::SimdOp::F64x2Mul:
1074     case wasm::SimdOp::I8x16NarrowSI16x8:
1075     case wasm::SimdOp::I8x16NarrowUI16x8:
1076     case wasm::SimdOp::I16x8NarrowSI32x4:
1077     case wasm::SimdOp::I16x8NarrowUI32x4:
1078       return true;
1079     default:
1080       return false;
1081   }
1082 }
1083 
visitWasmBinarySimd128WithConstant(MWasmBinarySimd128WithConstant * ins)1084 void LIRGenerator::visitWasmBinarySimd128WithConstant(
1085     MWasmBinarySimd128WithConstant* ins) {
1086 #ifdef ENABLE_WASM_SIMD
1087   MDefinition* lhs = ins->lhs();
1088 
1089   MOZ_ASSERT(lhs->type() == MIRType::Simd128);
1090   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1091 
1092   // Always beneficial to reuse the lhs register here, see discussion in
1093   // visitWasmBinarySimd128() and also code in specializeForConstantRhs().
1094 
1095   LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
1096   auto* lir =
1097       new (alloc()) LWasmBinarySimd128WithConstant(lhsDestAlloc, ins->rhs());
1098   defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsDest);
1099 #else
1100   MOZ_CRASH("No SIMD");
1101 #endif
1102 }
1103 
visitWasmShiftSimd128(MWasmShiftSimd128 * ins)1104 void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
1105 #ifdef ENABLE_WASM_SIMD
1106   MDefinition* lhs = ins->lhs();
1107   MDefinition* rhs = ins->rhs();
1108 
1109   MOZ_ASSERT(lhs->type() == MIRType::Simd128);
1110   MOZ_ASSERT(rhs->type() == MIRType::Int32);
1111   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1112 
1113   if (rhs->isConstant()) {
1114     int32_t shiftCountMask;
1115     switch (ins->simdOp()) {
1116       case wasm::SimdOp::I8x16Shl:
1117       case wasm::SimdOp::I8x16ShrU:
1118       case wasm::SimdOp::I8x16ShrS:
1119         shiftCountMask = 7;
1120         break;
1121       case wasm::SimdOp::I16x8Shl:
1122       case wasm::SimdOp::I16x8ShrU:
1123       case wasm::SimdOp::I16x8ShrS:
1124         shiftCountMask = 15;
1125         break;
1126       case wasm::SimdOp::I32x4Shl:
1127       case wasm::SimdOp::I32x4ShrU:
1128       case wasm::SimdOp::I32x4ShrS:
1129         shiftCountMask = 31;
1130         break;
1131       case wasm::SimdOp::I64x2Shl:
1132       case wasm::SimdOp::I64x2ShrU:
1133       case wasm::SimdOp::I64x2ShrS:
1134         shiftCountMask = 63;
1135         break;
1136       default:
1137         MOZ_CRASH("Unexpected shift operation");
1138     }
1139 
1140     int32_t shiftCount = rhs->toConstant()->toInt32() & shiftCountMask;
1141     if (shiftCount == shiftCountMask) {
1142       // Check if possible to apply sign replication optimization.
1143       // For some ops the input shall be reused.
1144       switch (ins->simdOp()) {
1145         case wasm::SimdOp::I8x16ShrS: {
1146           auto* lir =
1147               new (alloc()) LWasmSignReplicationSimd128(useRegister(lhs));
1148           define(lir, ins);
1149           return;
1150         }
1151         case wasm::SimdOp::I16x8ShrS:
1152         case wasm::SimdOp::I32x4ShrS:
1153         case wasm::SimdOp::I64x2ShrS: {
1154           auto* lir = new (alloc())
1155               LWasmSignReplicationSimd128(useRegisterAtStart(lhs));
1156           defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
1157           return;
1158         }
1159         default:
1160           break;
1161       }
1162     }
1163 
1164 #  ifdef DEBUG
1165     js::wasm::ReportSimdAnalysis("shift -> constant shift");
1166 #  endif
1167     // Almost always beneficial, and never detrimental, to reuse the input if
1168     // possible.
1169     auto* lir = new (alloc())
1170         LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
1171     defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
1172     return;
1173   }
1174 
1175 #  ifdef DEBUG
1176   js::wasm::ReportSimdAnalysis("shift -> variable shift");
1177 #  endif
1178 
1179   LDefinition tempReg0 = LDefinition::BogusTemp();
1180   LDefinition tempReg1 = LDefinition::BogusTemp();
1181   switch (ins->simdOp()) {
1182     case wasm::SimdOp::I8x16Shl:
1183     case wasm::SimdOp::I8x16ShrS:
1184     case wasm::SimdOp::I8x16ShrU:
1185     case wasm::SimdOp::I64x2ShrS:
1186       tempReg0 = temp();
1187       tempReg1 = tempSimd128();
1188       break;
1189     default:
1190       tempReg0 = temp();
1191       break;
1192   }
1193 
1194   // Reusing the input if possible is never detrimental.
1195   LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
1196   LAllocation rhsAlloc = useRegisterAtStart(rhs);
1197   auto* lir = new (alloc())
1198       LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
1199   defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest);
1200 #else
1201   MOZ_CRASH("No SIMD");
1202 #endif
1203 }
1204 
visitWasmShuffleSimd128(MWasmShuffleSimd128 * ins)1205 void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
1206 #ifdef ENABLE_WASM_SIMD
1207   MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1208   MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
1209   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1210 
1211   Shuffle s = AnalyzeShuffle(ins);
1212 #  ifdef DEBUG
1213   ReportShuffleSpecialization(s);
1214 #  endif
1215   switch (s.opd) {
1216     case Shuffle::Operand::LEFT:
1217     case Shuffle::Operand::RIGHT: {
1218       LAllocation src;
1219       // All permute operators currently favor reusing the input register so
1220       // we're not currently exercising code paths below that do not reuse.
1221       // Those paths have been exercised in the past however and are believed
1222       // to be correct.
1223       bool useAtStartAndReuse = false;
1224       switch (*s.permuteOp) {
1225         case LWasmPermuteSimd128::MOVE:
1226         case LWasmPermuteSimd128::BROADCAST_8x16:
1227         case LWasmPermuteSimd128::BROADCAST_16x8:
1228         case LWasmPermuteSimd128::PERMUTE_8x16:
1229         case LWasmPermuteSimd128::PERMUTE_16x8:
1230         case LWasmPermuteSimd128::PERMUTE_32x4:
1231         case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
1232         case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
1233         case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
1234           useAtStartAndReuse = true;
1235           break;
1236         default:
1237           MOZ_CRASH("Unexpected operator");
1238       }
1239       if (s.opd == Shuffle::Operand::LEFT) {
1240         if (useAtStartAndReuse) {
1241           src = useRegisterAtStart(ins->lhs());
1242         } else {
1243           src = useRegister(ins->lhs());
1244         }
1245       } else {
1246         if (useAtStartAndReuse) {
1247           src = useRegisterAtStart(ins->rhs());
1248         } else {
1249           src = useRegister(ins->rhs());
1250         }
1251       }
1252       auto* lir =
1253           new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
1254       if (useAtStartAndReuse) {
1255         defineReuseInput(lir, ins, LWasmPermuteSimd128::Src);
1256       } else {
1257         define(lir, ins);
1258       }
1259       break;
1260     }
1261     case Shuffle::Operand::BOTH:
1262     case Shuffle::Operand::BOTH_SWAPPED: {
1263       LDefinition temp = LDefinition::BogusTemp();
1264       switch (*s.shuffleOp) {
1265         case LWasmShuffleSimd128::BLEND_8x16:
1266           temp = tempFixed(xmm0);
1267           break;
1268         default:
1269           break;
1270       }
1271       LAllocation lhs;
1272       LAllocation rhs;
1273       if (s.opd == Shuffle::Operand::BOTH) {
1274         lhs = useRegisterAtStart(ins->lhs());
1275         rhs = useRegister(ins->rhs());
1276       } else {
1277         lhs = useRegisterAtStart(ins->rhs());
1278         rhs = useRegister(ins->lhs());
1279       }
1280       auto* lir = new (alloc())
1281           LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
1282       defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsDest);
1283       break;
1284     }
1285   }
1286 #else
1287   MOZ_CRASH("No SIMD");
1288 #endif
1289 }
1290 
visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128 * ins)1291 void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
1292 #ifdef ENABLE_WASM_SIMD
1293   MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1294   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1295 
1296   // The Masm API is (rhs, lhsDest) and requires AtStart+ReuseInput for the lhs.
1297   // For type reasons, the rhs will never be the same as the lhs and is
1298   // therefore a plain Use.
1299 
1300   if (ins->rhs()->type() == MIRType::Int64) {
1301     auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
1302         useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
1303     defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsDest);
1304   } else {
1305     auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
1306         useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
1307     defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsDest);
1308   }
1309 #else
1310   MOZ_CRASH("No SIMD");
1311 #endif
1312 }
1313 
visitWasmScalarToSimd128(MWasmScalarToSimd128 * ins)1314 void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
1315 #ifdef ENABLE_WASM_SIMD
1316   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1317 
1318   switch (ins->input()->type()) {
1319     case MIRType::Int64: {
1320       // 64-bit integer splats.
1321       // Load-and-(sign|zero)extend.
1322       auto* lir = new (alloc())
1323           LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
1324       define(lir, ins);
1325       break;
1326     }
1327     case MIRType::Float32:
1328     case MIRType::Double: {
1329       // Floating-point splats.
1330       // Ideally we save a move on SSE systems by reusing the input register,
1331       // but since the input and output register types differ, we can't.
1332       auto* lir =
1333           new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
1334       define(lir, ins);
1335       break;
1336     }
1337     default: {
1338       // 32-bit integer splats.
1339       auto* lir =
1340           new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
1341       define(lir, ins);
1342       break;
1343     }
1344   }
1345 #else
1346   MOZ_CRASH("No SIMD");
1347 #endif
1348 }
1349 
visitWasmUnarySimd128(MWasmUnarySimd128 * ins)1350 void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
1351 #ifdef ENABLE_WASM_SIMD
1352   MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
1353   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1354 
1355   bool useAtStart = false;
1356   bool reuseInput = false;
1357   LDefinition tempReg = LDefinition::BogusTemp();
1358   switch (ins->simdOp()) {
1359     case wasm::SimdOp::I8x16Neg:
1360     case wasm::SimdOp::I16x8Neg:
1361     case wasm::SimdOp::I32x4Neg:
1362     case wasm::SimdOp::I64x2Neg:
1363       // Prefer src != dest to avoid an unconditional src->temp move.
1364       MOZ_ASSERT(!useAtStart && !reuseInput);
1365       break;
1366     case wasm::SimdOp::F32x4Neg:
1367     case wasm::SimdOp::F64x2Neg:
1368     case wasm::SimdOp::F32x4Abs:
1369     case wasm::SimdOp::F64x2Abs:
1370     case wasm::SimdOp::V128Not:
1371     case wasm::SimdOp::F32x4Sqrt:
1372     case wasm::SimdOp::F64x2Sqrt:
1373     case wasm::SimdOp::I8x16Abs:
1374     case wasm::SimdOp::I16x8Abs:
1375     case wasm::SimdOp::I32x4Abs:
1376     case wasm::SimdOp::I64x2Abs:
1377     case wasm::SimdOp::I32x4TruncSSatF32x4:
1378     case wasm::SimdOp::F32x4ConvertUI32x4:
1379     case wasm::SimdOp::I16x8ExtAddPairwiseI8x16S:
1380     case wasm::SimdOp::I16x8ExtAddPairwiseI8x16U:
1381     case wasm::SimdOp::I32x4ExtAddPairwiseI16x8S:
1382     case wasm::SimdOp::I32x4ExtAddPairwiseI16x8U:
1383       // Prefer src == dest to avoid an unconditional src->dest move.
1384       useAtStart = true;
1385       reuseInput = true;
1386       break;
1387     case wasm::SimdOp::I32x4TruncUSatF32x4:
1388     case wasm::SimdOp::I32x4TruncSatF64x2SZero:
1389     case wasm::SimdOp::I32x4TruncSatF64x2UZero:
1390     case wasm::SimdOp::I8x16Popcnt:
1391       tempReg = tempSimd128();
1392       // Prefer src == dest to avoid an unconditional src->dest move.
1393       useAtStart = true;
1394       reuseInput = true;
1395       break;
1396     case wasm::SimdOp::I16x8WidenLowSI8x16:
1397     case wasm::SimdOp::I16x8WidenHighSI8x16:
1398     case wasm::SimdOp::I16x8WidenLowUI8x16:
1399     case wasm::SimdOp::I16x8WidenHighUI8x16:
1400     case wasm::SimdOp::I32x4WidenLowSI16x8:
1401     case wasm::SimdOp::I32x4WidenHighSI16x8:
1402     case wasm::SimdOp::I32x4WidenLowUI16x8:
1403     case wasm::SimdOp::I32x4WidenHighUI16x8:
1404     case wasm::SimdOp::I64x2WidenLowSI32x4:
1405     case wasm::SimdOp::I64x2WidenHighSI32x4:
1406     case wasm::SimdOp::I64x2WidenLowUI32x4:
1407     case wasm::SimdOp::I64x2WidenHighUI32x4:
1408     case wasm::SimdOp::F32x4ConvertSI32x4:
1409     case wasm::SimdOp::F32x4Ceil:
1410     case wasm::SimdOp::F32x4Floor:
1411     case wasm::SimdOp::F32x4Trunc:
1412     case wasm::SimdOp::F32x4Nearest:
1413     case wasm::SimdOp::F64x2Ceil:
1414     case wasm::SimdOp::F64x2Floor:
1415     case wasm::SimdOp::F64x2Trunc:
1416     case wasm::SimdOp::F64x2Nearest:
1417     case wasm::SimdOp::F32x4DemoteF64x2Zero:
1418     case wasm::SimdOp::F64x2PromoteLowF32x4:
1419     case wasm::SimdOp::F64x2ConvertLowI32x4S:
1420     case wasm::SimdOp::F64x2ConvertLowI32x4U:
1421       // Prefer src == dest to exert the lowest register pressure on the
1422       // surrounding code.
1423       useAtStart = true;
1424       MOZ_ASSERT(!reuseInput);
1425       break;
1426     default:
1427       MOZ_CRASH("Unary SimdOp not implemented");
1428   }
1429 
1430   LUse inputUse =
1431       useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input());
1432   LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg);
1433   if (reuseInput) {
1434     defineReuseInput(lir, ins, LWasmUnarySimd128::Src);
1435   } else {
1436     define(lir, ins);
1437   }
1438 #else
1439   MOZ_CRASH("No SIMD");
1440 #endif
1441 }
1442 
visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128 * ins)1443 void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
1444 #ifdef ENABLE_WASM_SIMD
1445   LUse base = useRegisterAtStart(ins->base());
1446   LUse inputUse = useRegisterAtStart(ins->value());
1447   LAllocation memoryBase = ins->hasMemoryBase()
1448                                ? useRegisterAtStart(ins->memoryBase())
1449                                : LAllocation();
1450   LWasmLoadLaneSimd128* lir = new (alloc()) LWasmLoadLaneSimd128(
1451       base, inputUse, LDefinition::BogusTemp(), memoryBase);
1452   defineReuseInput(lir, ins, LWasmLoadLaneSimd128::Src);
1453 #else
1454   MOZ_CRASH("No SIMD");
1455 #endif
1456 }
1457 
visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128 * ins)1458 void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
1459 #ifdef ENABLE_WASM_SIMD
1460   LUse base = useRegisterAtStart(ins->base());
1461   LUse input = useRegisterAtStart(ins->value());
1462   LAllocation memoryBase = ins->hasMemoryBase()
1463                                ? useRegisterAtStart(ins->memoryBase())
1464                                : LAllocation();
1465   LWasmStoreLaneSimd128* lir = new (alloc())
1466       LWasmStoreLaneSimd128(base, input, LDefinition::BogusTemp(), memoryBase);
1467   add(lir, ins);
1468 #else
1469   MOZ_CRASH("No SIMD");
1470 #endif
1471 }
1472 
1473 #ifdef ENABLE_WASM_SIMD
1474 
canFoldReduceSimd128AndBranch(wasm::SimdOp op)1475 bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
1476   switch (op) {
1477     case wasm::SimdOp::V128AnyTrue:
1478     case wasm::SimdOp::I8x16AllTrue:
1479     case wasm::SimdOp::I16x8AllTrue:
1480     case wasm::SimdOp::I32x4AllTrue:
1481     case wasm::SimdOp::I64x2AllTrue:
1482     case wasm::SimdOp::I16x8Bitmask:
1483       return true;
1484     default:
1485       return false;
1486   }
1487 }
1488 
canEmitWasmReduceSimd128AtUses(MWasmReduceSimd128 * ins)1489 bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses(
1490     MWasmReduceSimd128* ins) {
1491   if (!ins->canEmitAtUses()) {
1492     return false;
1493   }
1494   // Only specific ops generating int32.
1495   if (ins->type() != MIRType::Int32) {
1496     return false;
1497   }
1498   if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
1499     return false;
1500   }
1501   // If never used then defer (it will be removed).
1502   MUseIterator iter(ins->usesBegin());
1503   if (iter == ins->usesEnd()) {
1504     return true;
1505   }
1506   // We require an MTest consumer.
1507   MNode* node = iter->consumer();
1508   if (!node->isDefinition() || !node->toDefinition()->isTest()) {
1509     return false;
1510   }
1511   // Defer only if there's only one use.
1512   iter++;
1513   return iter == ins->usesEnd();
1514 }
1515 
1516 #endif  // ENABLE_WASM_SIMD
1517 
visitWasmReduceSimd128(MWasmReduceSimd128 * ins)1518 void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
1519 #ifdef ENABLE_WASM_SIMD
1520   if (canEmitWasmReduceSimd128AtUses(ins)) {
1521     emitAtUses(ins);
1522     return;
1523   }
1524 
1525   // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
1526   // useRegisterAtStart:
1527   //
1528   // - In most cases, the input type differs from the output type, so there's no
1529   //   conflict and it doesn't really matter.
1530   //
1531   // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
1532   //   code being generated.
1533   //
1534   // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
1535   //   to be targeted lowers register pressure if it's the last use of the
1536   //   input.
1537 
1538   if (ins->type() == MIRType::Int64) {
1539     auto* lir = new (alloc())
1540         LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
1541     defineInt64(lir, ins);
1542   } else {
1543     // Ideally we would reuse the input register for floating extract_lane if
1544     // the lane is zero, but constraints in the register allocator require the
1545     // input and output register types to be the same.
1546     auto* lir = new (alloc()) LWasmReduceSimd128(
1547         useRegisterAtStart(ins->input()), LDefinition::BogusTemp());
1548     define(lir, ins);
1549   }
1550 #else
1551   MOZ_CRASH("No SIMD");
1552 #endif
1553 }
1554