1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "jit/x64/Lowering-x64.h"
8 
9 #include "jit/Lowering.h"
10 #include "jit/MIR.h"
11 #include "jit/x64/Assembler-x64.h"
12 
13 #include "jit/shared/Lowering-shared-inl.h"
14 
15 using namespace js;
16 using namespace js::jit;
17 
18 using mozilla::Maybe;
19 using mozilla::Nothing;
20 using mozilla::Some;
21 
useBoxFixed(MDefinition * mir,Register reg1,Register,bool useAtStart)22 LBoxAllocation LIRGeneratorX64::useBoxFixed(MDefinition* mir, Register reg1,
23                                             Register, bool useAtStart) {
24   MOZ_ASSERT(mir->type() == MIRType::Value);
25 
26   ensureDefined(mir);
27   return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart));
28 }
29 
useByteOpRegister(MDefinition * mir)30 LAllocation LIRGeneratorX64::useByteOpRegister(MDefinition* mir) {
31   return useRegister(mir);
32 }
33 
useByteOpRegisterAtStart(MDefinition * mir)34 LAllocation LIRGeneratorX64::useByteOpRegisterAtStart(MDefinition* mir) {
35   return useRegisterAtStart(mir);
36 }
37 
useByteOpRegisterOrNonDoubleConstant(MDefinition * mir)38 LAllocation LIRGeneratorX64::useByteOpRegisterOrNonDoubleConstant(
39     MDefinition* mir) {
40   return useRegisterOrNonDoubleConstant(mir);
41 }
42 
tempByteOpRegister()43 LDefinition LIRGeneratorX64::tempByteOpRegister() { return temp(); }
44 
tempToUnbox()45 LDefinition LIRGeneratorX64::tempToUnbox() { return temp(); }
46 
lowerForALUInt64(LInstructionHelper<INT64_PIECES,2* INT64_PIECES,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)47 void LIRGeneratorX64::lowerForALUInt64(
48     LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
49     MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
50   ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
51   ins->setInt64Operand(INT64_PIECES, lhs != rhs
52                                          ? useInt64OrConstant(rhs)
53                                          : useInt64OrConstantAtStart(rhs));
54   defineInt64ReuseInput(ins, mir, 0);
55 }
56 
lowerForMulInt64(LMulI64 * ins,MMul * mir,MDefinition * lhs,MDefinition * rhs)57 void LIRGeneratorX64::lowerForMulInt64(LMulI64* ins, MMul* mir,
58                                        MDefinition* lhs, MDefinition* rhs) {
59   // X64 doesn't need a temp for 64bit multiplication.
60   ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
61   ins->setInt64Operand(INT64_PIECES, lhs != rhs
62                                          ? useInt64OrConstant(rhs)
63                                          : useInt64OrConstantAtStart(rhs));
64   defineInt64ReuseInput(ins, mir, 0);
65 }
66 
visitBox(MBox * box)67 void LIRGenerator::visitBox(MBox* box) {
68   MDefinition* opd = box->getOperand(0);
69 
70   // If the operand is a constant, emit near its uses.
71   if (opd->isConstant() && box->canEmitAtUses()) {
72     emitAtUses(box);
73     return;
74   }
75 
76   if (opd->isConstant()) {
77     define(new (alloc()) LValue(opd->toConstant()->toJSValue()), box,
78            LDefinition(LDefinition::BOX));
79   } else {
80     LBox* ins = new (alloc()) LBox(useRegister(opd), opd->type());
81     define(ins, box, LDefinition(LDefinition::BOX));
82   }
83 }
84 
visitUnbox(MUnbox * unbox)85 void LIRGenerator::visitUnbox(MUnbox* unbox) {
86   MDefinition* box = unbox->getOperand(0);
87 
88   if (box->type() == MIRType::ObjectOrNull) {
89     LUnboxObjectOrNull* lir =
90         new (alloc()) LUnboxObjectOrNull(useRegisterAtStart(box));
91     if (unbox->fallible()) {
92       assignSnapshot(lir, unbox->bailoutKind());
93     }
94     defineReuseInput(lir, unbox, 0);
95     return;
96   }
97 
98   MOZ_ASSERT(box->type() == MIRType::Value);
99 
100   LUnboxBase* lir;
101   if (IsFloatingPointType(unbox->type())) {
102     lir = new (alloc())
103         LUnboxFloatingPoint(useRegisterAtStart(box), unbox->type());
104   } else if (unbox->fallible()) {
105     // If the unbox is fallible, load the Value in a register first to
106     // avoid multiple loads.
107     lir = new (alloc()) LUnbox(useRegisterAtStart(box));
108   } else {
109     lir = new (alloc()) LUnbox(useAtStart(box));
110   }
111 
112   if (unbox->fallible()) {
113     assignSnapshot(lir, unbox->bailoutKind());
114   }
115 
116   define(lir, unbox);
117 }
118 
visitReturn(MReturn * ret)119 void LIRGenerator::visitReturn(MReturn* ret) {
120   MDefinition* opd = ret->getOperand(0);
121   MOZ_ASSERT(opd->type() == MIRType::Value);
122 
123   LReturn* ins = new (alloc()) LReturn;
124   ins->setOperand(0, useFixed(opd, JSReturnReg));
125   add(ins);
126 }
127 
lowerUntypedPhiInput(MPhi * phi,uint32_t inputPosition,LBlock * block,size_t lirIndex)128 void LIRGeneratorX64::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition,
129                                            LBlock* block, size_t lirIndex) {
130   lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
131 }
132 
defineInt64Phi(MPhi * phi,size_t lirIndex)133 void LIRGeneratorX64::defineInt64Phi(MPhi* phi, size_t lirIndex) {
134   defineTypedPhi(phi, lirIndex);
135 }
136 
lowerInt64PhiInput(MPhi * phi,uint32_t inputPosition,LBlock * block,size_t lirIndex)137 void LIRGeneratorX64::lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition,
138                                          LBlock* block, size_t lirIndex) {
139   lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
140 }
141 
visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement * ins)142 void LIRGenerator::visitCompareExchangeTypedArrayElement(
143     MCompareExchangeTypedArrayElement* ins) {
144   lowerCompareExchangeTypedArrayElement(ins,
145                                         /* useI386ByteRegisters = */ false);
146 }
147 
visitAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement * ins)148 void LIRGenerator::visitAtomicExchangeTypedArrayElement(
149     MAtomicExchangeTypedArrayElement* ins) {
150   lowerAtomicExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false);
151 }
152 
visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop * ins)153 void LIRGenerator::visitAtomicTypedArrayElementBinop(
154     MAtomicTypedArrayElementBinop* ins) {
155   lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false);
156 }
157 
visitWasmUnsignedToDouble(MWasmUnsignedToDouble * ins)158 void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) {
159   MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
160   LWasmUint32ToDouble* lir =
161       new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input()));
162   define(lir, ins);
163 }
164 
visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32 * ins)165 void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) {
166   MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
167   LWasmUint32ToFloat32* lir =
168       new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input()));
169   define(lir, ins);
170 }
171 
visitWasmHeapBase(MWasmHeapBase * ins)172 void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) {
173   auto* lir = new (alloc()) LWasmHeapBase(LAllocation());
174   define(lir, ins);
175 }
176 
visitWasmLoad(MWasmLoad * ins)177 void LIRGenerator::visitWasmLoad(MWasmLoad* ins) {
178   MDefinition* base = ins->base();
179   MOZ_ASSERT(base->type() == MIRType::Int32);
180 
181   if (ins->type() != MIRType::Int64) {
182     auto* lir = new (alloc()) LWasmLoad(useRegisterOrZeroAtStart(base));
183     define(lir, ins);
184     return;
185   }
186 
187   auto* lir = new (alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
188   defineInt64(lir, ins);
189 }
190 
visitWasmStore(MWasmStore * ins)191 void LIRGenerator::visitWasmStore(MWasmStore* ins) {
192   MDefinition* base = ins->base();
193   MOZ_ASSERT(base->type() == MIRType::Int32);
194 
195   MDefinition* value = ins->value();
196   LAllocation valueAlloc;
197   switch (ins->access().type()) {
198     case Scalar::Int8:
199     case Scalar::Uint8:
200     case Scalar::Int16:
201     case Scalar::Uint16:
202     case Scalar::Int32:
203     case Scalar::Uint32:
204       valueAlloc = useRegisterOrConstantAtStart(value);
205       break;
206     case Scalar::Int64:
207       // No way to encode an int64-to-memory move on x64.
208       if (value->isConstant() && value->type() != MIRType::Int64) {
209         valueAlloc = useOrConstantAtStart(value);
210       } else {
211         valueAlloc = useRegisterAtStart(value);
212       }
213       break;
214     case Scalar::Float32:
215     case Scalar::Float64:
216       valueAlloc = useRegisterAtStart(value);
217       break;
218     case Scalar::Simd128:
219 #ifdef ENABLE_WASM_SIMD
220       valueAlloc = useRegisterAtStart(value);
221       break;
222 #else
223       MOZ_CRASH("unexpected array type");
224 #endif
225     case Scalar::BigInt64:
226     case Scalar::BigUint64:
227     case Scalar::Uint8Clamped:
228     case Scalar::MaxTypedArrayViewType:
229       MOZ_CRASH("unexpected array type");
230   }
231 
232   LAllocation baseAlloc = useRegisterOrZeroAtStart(base);
233   auto* lir = new (alloc()) LWasmStore(baseAlloc, valueAlloc);
234   add(lir, ins);
235 }
236 
visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap * ins)237 void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) {
238   MDefinition* base = ins->base();
239   MOZ_ASSERT(base->type() == MIRType::Int32);
240 
241   // The output may not be used but will be clobbered regardless, so
242   // pin the output to eax.
243   //
244   // The input values must both be in registers.
245 
246   const LAllocation oldval = useRegister(ins->oldValue());
247   const LAllocation newval = useRegister(ins->newValue());
248 
249   LWasmCompareExchangeHeap* lir =
250       new (alloc()) LWasmCompareExchangeHeap(useRegister(base), oldval, newval);
251 
252   defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
253 }
254 
visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap * ins)255 void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) {
256   MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
257 
258   const LAllocation base = useRegister(ins->base());
259   const LAllocation value = useRegister(ins->value());
260 
261   // The output may not be used but will be clobbered regardless,
262   // so ignore the case where we're not using the value and just
263   // use the output register as a temp.
264 
265   LWasmAtomicExchangeHeap* lir =
266       new (alloc()) LWasmAtomicExchangeHeap(base, value);
267   define(lir, ins);
268 }
269 
visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap * ins)270 void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) {
271   MDefinition* base = ins->base();
272   MOZ_ASSERT(base->type() == MIRType::Int32);
273 
274   // No support for 64-bit operations with constants at the masm level.
275 
276   bool canTakeConstant = ins->access().type() != Scalar::Int64;
277 
278   // Case 1: the result of the operation is not used.
279   //
280   // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
281   // LOCK OR, or LOCK XOR.
282 
283   if (!ins->hasUses()) {
284     LAllocation value = canTakeConstant ? useRegisterOrConstant(ins->value())
285                                         : useRegister(ins->value());
286     LWasmAtomicBinopHeapForEffect* lir =
287         new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base), value);
288     add(lir, ins);
289     return;
290   }
291 
292   // Case 2: the result of the operation is used.
293   //
294   // For ADD and SUB we'll use XADD with word and byte ops as
295   // appropriate.  Any output register can be used and if value is a
296   // register it's best if it's the same as output:
297   //
298   //    movl       value, output  ; if value != output
299   //    lock xaddl output, mem
300   //
301   // For AND/OR/XOR we need to use a CMPXCHG loop, and the output is
302   // always in rax:
303   //
304   //    movl          *mem, rax
305   // L: mov           rax, temp
306   //    andl          value, temp
307   //    lock cmpxchg  temp, mem  ; reads rax also
308   //    jnz           L
309   //    ; result in rax
310   //
311   // Note the placement of L, cmpxchg will update rax with *mem if
312   // *mem does not have the expected value, so reloading it at the
313   // top of the loop would be redundant.
314 
315   bool bitOp = !(ins->operation() == AtomicFetchAddOp ||
316                  ins->operation() == AtomicFetchSubOp);
317   bool reuseInput = false;
318   LAllocation value;
319 
320   if (bitOp || ins->value()->isConstant()) {
321     value = canTakeConstant ? useRegisterOrConstant(ins->value())
322                             : useRegister(ins->value());
323   } else {
324     reuseInput = true;
325     value = useRegisterAtStart(ins->value());
326   }
327 
328   auto* lir = new (alloc()) LWasmAtomicBinopHeap(
329       useRegister(base), value, bitOp ? temp() : LDefinition::BogusTemp());
330 
331   if (reuseInput) {
332     defineReuseInput(lir, ins, LWasmAtomicBinopHeap::valueOp);
333   } else if (bitOp) {
334     defineFixed(lir, ins, LAllocation(AnyRegister(rax)));
335   } else {
336     define(lir, ins);
337   }
338 }
339 
visitSubstr(MSubstr * ins)340 void LIRGenerator::visitSubstr(MSubstr* ins) {
341   LSubstr* lir = new (alloc())
342       LSubstr(useRegister(ins->string()), useRegister(ins->begin()),
343               useRegister(ins->length()), temp(), temp(), tempByteOpRegister());
344   define(lir, ins);
345   assignSafepoint(lir, ins);
346 }
347 
visitRandom(MRandom * ins)348 void LIRGenerator::visitRandom(MRandom* ins) {
349   LRandom* lir = new (alloc()) LRandom(temp(), temp(), temp());
350   defineFixed(lir, ins, LFloatReg(ReturnDoubleReg));
351 }
352 
lowerDivI64(MDiv * div)353 void LIRGeneratorX64::lowerDivI64(MDiv* div) {
354   if (div->isUnsigned()) {
355     lowerUDivI64(div);
356     return;
357   }
358 
359   LDivOrModI64* lir = new (alloc()) LDivOrModI64(
360       useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(rdx));
361   defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
362 }
363 
lowerModI64(MMod * mod)364 void LIRGeneratorX64::lowerModI64(MMod* mod) {
365   if (mod->isUnsigned()) {
366     lowerUModI64(mod);
367     return;
368   }
369 
370   LDivOrModI64* lir = new (alloc()) LDivOrModI64(
371       useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(rax));
372   defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
373 }
374 
lowerUDivI64(MDiv * div)375 void LIRGeneratorX64::lowerUDivI64(MDiv* div) {
376   LUDivOrModI64* lir = new (alloc()) LUDivOrModI64(
377       useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(rdx));
378   defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
379 }
380 
lowerUModI64(MMod * mod)381 void LIRGeneratorX64::lowerUModI64(MMod* mod) {
382   LUDivOrModI64* lir = new (alloc()) LUDivOrModI64(
383       useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(rax));
384   defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
385 }
386 
visitWasmTruncateToInt64(MWasmTruncateToInt64 * ins)387 void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) {
388   MDefinition* opd = ins->input();
389   MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
390 
391   LDefinition maybeTemp =
392       ins->isUnsigned() ? tempDouble() : LDefinition::BogusTemp();
393   defineInt64(new (alloc()) LWasmTruncateToInt64(useRegister(opd), maybeTemp),
394               ins);
395 }
396 
visitInt64ToFloatingPoint(MInt64ToFloatingPoint * ins)397 void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) {
398   MDefinition* opd = ins->input();
399   MOZ_ASSERT(opd->type() == MIRType::Int64);
400   MOZ_ASSERT(IsFloatingPointType(ins->type()));
401 
402   LDefinition maybeTemp = ins->isUnsigned() ? temp() : LDefinition::BogusTemp();
403   define(new (alloc()) LInt64ToFloatingPoint(useInt64Register(opd), maybeTemp),
404          ins);
405 }
406 
visitExtendInt32ToInt64(MExtendInt32ToInt64 * ins)407 void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) {
408   defineInt64(new (alloc()) LExtendInt32ToInt64(useAtStart(ins->input())), ins);
409 }
410 
visitSignExtendInt64(MSignExtendInt64 * ins)411 void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) {
412   defineInt64(new (alloc())
413                   LSignExtendInt64(useInt64RegisterAtStart(ins->input())),
414               ins);
415 }
416 
417 #ifdef ENABLE_WASM_SIMD
418 
419 // These lowerings are really x86-shared but some Masm APIs are not yet
420 // available on x86.
421 
422 // Ternary and binary operators require the dest register to be the same as
423 // their first input register, leading to a pattern of useRegisterAtStart +
424 // defineReuseInput.
425 
visitWasmBitselectSimd128(MWasmBitselectSimd128 * ins)426 void LIRGenerator::visitWasmBitselectSimd128(MWasmBitselectSimd128* ins) {
427   MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
428   MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
429   MOZ_ASSERT(ins->control()->type() == MIRType::Simd128);
430   MOZ_ASSERT(ins->type() == MIRType::Simd128);
431 
432   auto* lir = new (alloc()) LWasmBitselectSimd128(
433       useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()),
434       useRegister(ins->control()), tempSimd128());
435   defineReuseInput(lir, ins, LWasmBitselectSimd128::LhsDest);
436 }
437 
visitWasmBinarySimd128(MWasmBinarySimd128 * ins)438 void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
439   MDefinition* lhs = ins->lhs();
440   MDefinition* rhs = ins->rhs();
441 
442   MOZ_ASSERT(lhs->type() == MIRType::Simd128);
443   MOZ_ASSERT(rhs->type() == MIRType::Simd128);
444   MOZ_ASSERT(ins->type() == MIRType::Simd128);
445 
446   if (ins->isCommutative()) {
447     ReorderCommutative(&lhs, &rhs, ins);
448   }
449 
450   LDefinition tempReg0 = LDefinition::BogusTemp();
451   LDefinition tempReg1 = LDefinition::BogusTemp();
452   switch (ins->simdOp()) {
453     case wasm::SimdOp::V128AndNot: {
454       // x86/x64 specific: Code generation requires the operands to be reversed.
455       MDefinition* tmp = lhs;
456       lhs = rhs;
457       rhs = tmp;
458       break;
459     }
460     case wasm::SimdOp::F32x4Max:
461     case wasm::SimdOp::F64x2Max:
462     case wasm::SimdOp::V8x16Swizzle:
463       tempReg0 = tempSimd128();
464       break;
465     case wasm::SimdOp::I8x16LtU:
466     case wasm::SimdOp::I8x16GtU:
467     case wasm::SimdOp::I8x16LeU:
468     case wasm::SimdOp::I8x16GeU:
469     case wasm::SimdOp::I16x8LtU:
470     case wasm::SimdOp::I16x8GtU:
471     case wasm::SimdOp::I16x8LeU:
472     case wasm::SimdOp::I16x8GeU:
473     case wasm::SimdOp::I32x4LtU:
474     case wasm::SimdOp::I32x4GtU:
475     case wasm::SimdOp::I32x4LeU:
476     case wasm::SimdOp::I32x4GeU:
477       tempReg0 = tempSimd128();
478       tempReg1 = tempSimd128();
479       break;
480     default:
481       break;
482   }
483 
484   LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
485   LAllocation rhsAlloc =
486       lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs);
487   if (ins->simdOp() == wasm::SimdOp::I64x2Mul) {
488     auto* lir =
489         new (alloc()) LWasmI64x2Mul(lhsDestAlloc, rhsAlloc, tempInt64());
490     defineReuseInput(lir, ins, LWasmI64x2Mul::LhsDest);
491   } else {
492     auto* lir = new (alloc())
493         LWasmBinarySimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
494     defineReuseInput(lir, ins, LWasmBinarySimd128::LhsDest);
495   }
496 }
497 
visitWasmShiftSimd128(MWasmShiftSimd128 * ins)498 void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
499   MDefinition* lhs = ins->lhs();
500   MDefinition* rhs = ins->rhs();
501 
502   MOZ_ASSERT(lhs->type() == MIRType::Simd128);
503   MOZ_ASSERT(rhs->type() == MIRType::Int32);
504   MOZ_ASSERT(ins->type() == MIRType::Simd128);
505 
506   if (rhs->isConstant()) {
507     LDefinition temp = LDefinition::BogusTemp();
508     int32_t shiftCount = rhs->toConstant()->toInt32();
509     switch (ins->simdOp()) {
510       case wasm::SimdOp::I8x16Shl:
511       case wasm::SimdOp::I8x16ShrU:
512         shiftCount &= 7;
513         break;
514       case wasm::SimdOp::I8x16ShrS:
515         shiftCount &= 7;
516         temp = tempSimd128();
517         break;
518       case wasm::SimdOp::I16x8Shl:
519       case wasm::SimdOp::I16x8ShrU:
520       case wasm::SimdOp::I16x8ShrS:
521         shiftCount &= 15;
522         break;
523       case wasm::SimdOp::I32x4Shl:
524       case wasm::SimdOp::I32x4ShrU:
525       case wasm::SimdOp::I32x4ShrS:
526         shiftCount &= 31;
527         break;
528       case wasm::SimdOp::I64x2Shl:
529       case wasm::SimdOp::I64x2ShrU:
530       case wasm::SimdOp::I64x2ShrS:
531         shiftCount &= 63;
532         break;
533       default:
534         MOZ_CRASH("Unexpected shift operation");
535     }
536 #  ifdef DEBUG
537     js::wasm::ReportSimdAnalysis("shift -> constant shift");
538 #  endif
539     auto* lir = new (alloc())
540         LWasmConstantShiftSimd128(useRegisterAtStart(lhs), temp, shiftCount);
541     defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
542     return;
543   }
544 
545 #  ifdef DEBUG
546   js::wasm::ReportSimdAnalysis("shift -> variable shift");
547 #  endif
548 
549   LDefinition tempReg0 = LDefinition::BogusTemp();
550   LDefinition tempReg1 = LDefinition::BogusTemp();
551   switch (ins->simdOp()) {
552     case wasm::SimdOp::I64x2ShrS:
553       break;
554     case wasm::SimdOp::I8x16Shl:
555     case wasm::SimdOp::I8x16ShrS:
556     case wasm::SimdOp::I8x16ShrU:
557       tempReg0 = temp();
558       tempReg1 = tempSimd128();
559       break;
560     default:
561       tempReg0 = temp();
562       break;
563   }
564 
565   LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
566   LAllocation rhsAlloc = ins->simdOp() == wasm::SimdOp::I64x2ShrS
567                              ? useFixed(rhs, ecx)
568                              : useRegister(rhs);
569   auto* lir = new (alloc())
570       LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
571   defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest);
572 }
573 
574 // Optimization of v8x16.shuffle.  The general byte shuffle+blend is very
575 // expensive (equivalent to at least a dozen instructions), and we want to avoid
576 // that if we can.  So look for special cases - there are many.
577 //
578 // The strategy is to sort the operation into one of three buckets depending
579 // on the shuffle pattern and inputs:
580 //
581 //  - single operand; shuffles on these values are rotations, reversals,
582 //    transpositions, and general permutations
583 //  - single-operand-with-interesting-constant (especially zero); shuffles on
584 //    these values are often byte shift or scatter operations
585 //  - dual operand; shuffles on these operations are blends, catenated
586 //    shifts, and (in the worst case) general shuffle+blends
587 //
588 // We're not trying to solve the general problem, only to lower reasonably
589 // expressed patterns that express common operations.  Producers that produce
590 // dense and convoluted patterns will end up with the general byte shuffle.
591 // Producers that produce simpler patterns that easily map to hardware will
592 // get faster code.
593 //
594 // In particular, these matchers do not try to combine transformations, so a
595 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
596 // usually going to end up as a general byte shuffle.
597 
598 // Representation of the result of the analysis.
599 struct Shuffle {
600   enum class Operand {
601     // Both inputs, in the original lhs-rhs order
602     BOTH,
603     // Both inputs, but in rhs-lhs order
604     BOTH_SWAPPED,
605     // Only the lhs input
606     LEFT,
607     // Only the rhs input
608     RIGHT,
609   };
610 
611   Operand opd;
612   SimdConstant control;
613   Maybe<LWasmPermuteSimd128::Op> permuteOp;  // Single operands
614   Maybe<LWasmShuffleSimd128::Op> shuffleOp;  // Double operands
615 
permuteShuffle616   static Shuffle permute(Operand opd, SimdConstant control,
617                          LWasmPermuteSimd128::Op op) {
618     MOZ_ASSERT(opd == Operand::LEFT || opd == Operand::RIGHT);
619     Shuffle s{opd, control, Some(op), Nothing()};
620     return s;
621   }
622 
shuffleShuffle623   static Shuffle shuffle(Operand opd, SimdConstant control,
624                          LWasmShuffleSimd128::Op op) {
625     MOZ_ASSERT(opd == Operand::BOTH || opd == Operand::BOTH_SWAPPED);
626     Shuffle s{opd, control, Nothing(), Some(op)};
627     return s;
628   }
629 };
630 
631 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
632 // true, updating *control.
ByteMaskToWordMask(SimdConstant * control)633 static bool ByteMaskToWordMask(SimdConstant* control) {
634   const SimdConstant::I8x16& lanes = control->asInt8x16();
635   int16_t controlWords[8];
636   for (int i = 0; i < 16; i += 2) {
637     if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
638       return false;
639     }
640     controlWords[i / 2] = lanes[i] / 2;
641   }
642   *control = SimdConstant::CreateX8(controlWords);
643   return true;
644 }
645 
646 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
647 // true, updating *control.
ByteMaskToDWordMask(SimdConstant * control)648 static bool ByteMaskToDWordMask(SimdConstant* control) {
649   const SimdConstant::I8x16& lanes = control->asInt8x16();
650   int32_t controlDWords[4];
651   for (int i = 0; i < 16; i += 4) {
652     if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
653           lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
654       return false;
655     }
656     controlDWords[i / 4] = lanes[i] / 4;
657   }
658   *control = SimdConstant::CreateX4(controlDWords);
659   return true;
660 }
661 
662 // Skip across consecutive values in lanes starting at i, returning the index
663 // after the last element.  Lane values must be <= len-1 ("masked").
664 //
665 // Since every element is a 1-element run, the return value is never the same as
666 // the starting i.
667 template <typename T>
ScanIncreasingMasked(const T * lanes,int i)668 static int ScanIncreasingMasked(const T* lanes, int i) {
669   int len = int(16 / sizeof(T));
670   MOZ_ASSERT(i < len);
671   MOZ_ASSERT(lanes[i] <= len - 1);
672   i++;
673   while (i < len && lanes[i] == lanes[i - 1] + 1) {
674     MOZ_ASSERT(lanes[i] <= len - 1);
675     i++;
676   }
677   return i;
678 }
679 
680 // Skip across consecutive values in lanes starting at i, returning the index
681 // after the last element.  Lane values must be <= len*2-1 ("unmasked"); the
682 // values len-1 and len are not considered consecutive.
683 //
684 // Since every element is a 1-element run, the return value is never the same as
685 // the starting i.
686 template <typename T>
ScanIncreasingUnmasked(const T * lanes,int i)687 static int ScanIncreasingUnmasked(const T* lanes, int i) {
688   int len = int(16 / sizeof(T));
689   MOZ_ASSERT(i < len);
690   if (lanes[i] < len) {
691     i++;
692     while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
693       i++;
694     }
695   } else {
696     i++;
697     while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
698       i++;
699     }
700   }
701   return i;
702 }
703 
704 // Skip lanes that equal v starting at i, returning the index just beyond the
705 // last of those.  There is no requirement that the initial lanes[i] == v.
706 template <typename T>
ScanConstant(const T * lanes,int v,int i)707 static int ScanConstant(const T* lanes, int v, int i) {
708   int len = int(16 / sizeof(T));
709   MOZ_ASSERT(i <= len);
710   while (i < len && lanes[i] == v) {
711     i++;
712   }
713   return i;
714 }
715 
716 // Mask lane values denoting rhs elements into lhs elements.
717 template <typename T>
MaskLanes(T * result,const T * input)718 static void MaskLanes(T* result, const T* input) {
719   int len = int(16 / sizeof(T));
720   for (int i = 0; i < len; i++) {
721     result[i] = input[i] & (len - 1);
722   }
723 }
724 
725 // Apply a transformation to each lane value.
726 template <typename T>
MapLanes(T * result,const T * input,int (* f)(int))727 static void MapLanes(T* result, const T* input, int (*f)(int)) {
728   int len = int(16 / sizeof(T));
729   for (int i = 0; i < len; i++) {
730     result[i] = f(input[i]);
731   }
732 }
733 
734 // Recognize an identity permutation, assuming lanes is masked.
735 template <typename T>
IsIdentity(const T * lanes)736 static bool IsIdentity(const T* lanes) {
737   return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
738 }
739 
740 // Recognize part of an identity permutation starting at start, with
741 // the first value of the permutation expected to be bias.
742 template <typename T>
IsIdentity(const T * lanes,int start,int len,int bias)743 static bool IsIdentity(const T* lanes, int start, int len, int bias) {
744   if (lanes[start] != bias) {
745     return false;
746   }
747   for (int i = start + 1; i < start + len; i++) {
748     if (lanes[i] != lanes[i - 1] + 1) {
749       return false;
750     }
751   }
752   return true;
753 }
754 
755 // We can permute by dwords if the mask is reducible to a dword mask, and in
756 // this case a single PSHUFD is enough.
TryPermute32x4(SimdConstant * control)757 static bool TryPermute32x4(SimdConstant* control) {
758   SimdConstant tmp = *control;
759   if (!ByteMaskToDWordMask(&tmp)) {
760     return false;
761   }
762   *control = tmp;
763   return true;
764 }
765 
766 // Can we perform a byte rotate right?  We can use PALIGNR.  The shift count is
767 // just lanes[0], and *control is unchanged.
TryRotateRight8x16(SimdConstant * control)768 static bool TryRotateRight8x16(SimdConstant* control) {
769   const SimdConstant::I8x16& lanes = control->asInt8x16();
770   // Look for the first run of consecutive bytes.
771   int i = ScanIncreasingMasked(lanes, 0);
772 
773   // If we reach the end of the vector, the vector must start at 0.
774   if (i == 16) {
775     return lanes[0] == 0;
776   }
777 
778   // Second run must start at source lane zero
779   if (lanes[i] != 0) {
780     return false;
781   }
782 
783   // Second run must end at the end of the lane vector.
784   return ScanIncreasingMasked(lanes, i) == 16;
785 }
786 
787 // We can permute by words if the mask is reducible to a word mask, but the x64
788 // lowering is only efficient if we can permute the high and low quadwords
789 // separately, possibly after swapping quadwords.
TryPermute16x8(SimdConstant * control)790 static bool TryPermute16x8(SimdConstant* control) {
791   SimdConstant tmp = *control;
792   if (!ByteMaskToWordMask(&tmp)) {
793     return false;
794   }
795   const SimdConstant::I16x8& lanes = tmp.asInt16x8();
796   SimdConstant::I16x8 mapped;
797   MapLanes(mapped, lanes, [](int x) -> int { return x < 4 ? 0 : 1; });
798   int i = ScanConstant(mapped, mapped[0], 0);
799   if (i != 4) {
800     return false;
801   }
802   i = ScanConstant(mapped, mapped[4], 4);
803   if (i != 8) {
804     return false;
805   }
806   // Now compute the operation bits.  `mapped` holds the adjusted lane mask.
807   memcpy(mapped, lanes, sizeof(mapped));
808   int16_t op = 0;
809   if (mapped[0] > mapped[4]) {
810     op |= LWasmPermuteSimd128::SWAP_QWORDS;
811   }
812   for (int i = 0; i < 8; i++) {
813     mapped[i] &= 3;
814   }
815   if (!IsIdentity(mapped, 0, 4, 0)) {
816     op |= LWasmPermuteSimd128::PERM_LOW;
817   }
818   if (!IsIdentity(mapped, 4, 4, 0)) {
819     op |= LWasmPermuteSimd128::PERM_HIGH;
820   }
821   MOZ_ASSERT(op != 0);
822   mapped[0] |= op << 8;
823   *control = SimdConstant::CreateX8(mapped);
824   return true;
825 }
826 
827 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
TryBroadcast16x8(SimdConstant * control)828 static bool TryBroadcast16x8(SimdConstant* control) {
829   SimdConstant tmp = *control;
830   if (!ByteMaskToWordMask(&tmp)) {
831     return false;
832   }
833   const SimdConstant::I16x8& lanes = tmp.asInt16x8();
834   if (ScanConstant(lanes, lanes[0], 0) < 8) {
835     return false;
836   }
837   *control = tmp;
838   return true;
839 }
840 
841 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
842 // PSHUFD.
TryBroadcast8x16(SimdConstant * control)843 static bool TryBroadcast8x16(SimdConstant* control) {
844   const SimdConstant::I8x16& lanes = control->asInt8x16();
845   if (ScanConstant(lanes, lanes[0], 0) < 16) {
846     return false;
847   }
848   return true;
849 }
850 
851 // Look for permutations of a single operand.
AnalyzePermute(SimdConstant * control)852 static LWasmPermuteSimd128::Op AnalyzePermute(SimdConstant* control) {
853   // Lane indices are input-agnostic for single-operand permutations.
854   SimdConstant::I8x16 controlBytes;
855   MaskLanes(controlBytes, control->asInt8x16());
856 
857   // Get rid of no-ops immediately, so nobody else needs to check.
858   if (IsIdentity(controlBytes)) {
859     return LWasmPermuteSimd128::MOVE;
860   }
861 
862   // Default control is the masked bytes.
863   *control = SimdConstant::CreateX16(controlBytes);
864 
865   // Analysis order matters here and is architecture-dependent or even
866   // microarchitecture-dependent: ideally the cheapest implementation first.
867   // The Intel manual says that the cost of a PSHUFB is about five other
868   // operations, so make that our cutoff.
869   //
870   // Word, dword, and qword reversals are handled optimally by general permutes.
871   //
872   // Byte reversals are probably best left to PSHUFB, no alternative rendition
873   // seems to reliably go below five instructions.  (Discuss.)
874   //
875   // Word swaps within doublewords and dword swaps within quadwords are handled
876   // optimally by general permutes.
877   //
878   // Dword and qword broadcasts are handled by dword permute.
879 
880   if (TryPermute32x4(control)) {
881     return LWasmPermuteSimd128::PERMUTE_32x4;
882   }
883   if (TryRotateRight8x16(control)) {
884     return LWasmPermuteSimd128::ROTATE_RIGHT_8x16;
885   }
886   if (TryPermute16x8(control)) {
887     return LWasmPermuteSimd128::PERMUTE_16x8;
888   }
889   if (TryBroadcast16x8(control)) {
890     return LWasmPermuteSimd128::BROADCAST_16x8;
891   }
892   if (TryBroadcast8x16(control)) {
893     return LWasmPermuteSimd128::BROADCAST_8x16;
894   }
895 
896   // TODO: (From v8) Unzip and transpose generally have renditions that slightly
897   // beat a general permute (three or four instructions)
898   //
899   // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
900   // used when merging two values.
901   //
902   // TODO: Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB.
903 
904   // The default operation is to permute bytes with the default control.
905   return LWasmPermuteSimd128::PERMUTE_8x16;
906 }
907 
908 // Can we shift the bytes left or right by a constant?  A shift is a run of
909 // lanes from the rhs (which is zero) on one end and a run of values from the
910 // lhs on the other end.
TryShift8x16(SimdConstant * control)911 static Maybe<LWasmPermuteSimd128::Op> TryShift8x16(SimdConstant* control) {
912   const SimdConstant::I8x16& lanes = control->asInt8x16();
913 
914   // Represent all zero lanes by 16
915   SimdConstant::I8x16 zeroesMasked;
916   MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
917 
918   int i = ScanConstant(zeroesMasked, 16, 0);
919   int shiftLeft = i;
920   if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
921     return Nothing();
922   }
923 
924   i = ScanIncreasingUnmasked(zeroesMasked, i);
925   int shiftRight = 16 - i;
926   if (shiftRight > 0 && lanes[i - 1] != 15) {
927     return Nothing();
928   }
929 
930   i = ScanConstant(zeroesMasked, 16, i);
931   if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
932       (shiftRight == 0 && shiftLeft == 0)) {
933     return Nothing();
934   }
935 
936   if (shiftRight) {
937     *control = SimdConstant::SplatX16(shiftRight);
938     return Some(LWasmPermuteSimd128::SHIFT_RIGHT_8x16);
939   }
940   *control = SimdConstant::SplatX16(shiftLeft);
941   return Some(LWasmPermuteSimd128::SHIFT_LEFT_8x16);
942 }
943 
AnalyzeShuffleWithZero(SimdConstant * control)944 static Maybe<LWasmPermuteSimd128::Op> AnalyzeShuffleWithZero(
945     SimdConstant* control) {
946   Maybe<LWasmPermuteSimd128::Op> op;
947   op = TryShift8x16(control);
948   if (op) {
949     return op;
950   }
951 
952   // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
953   // PAND.  This may beat the general byte blend code below.
954   return Nothing();
955 }
956 
957 // Concat: if the result is the suffix (high bytes) of the rhs in front of a
958 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
959 // swapped.
TryConcatRightShift8x16(SimdConstant * control,bool * swapOperands)960 static Maybe<LWasmShuffleSimd128::Op> TryConcatRightShift8x16(
961     SimdConstant* control, bool* swapOperands) {
962   const SimdConstant::I8x16& lanes = control->asInt8x16();
963   int i = ScanIncreasingUnmasked(lanes, 0);
964   MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
965   // First run must end with 15 % 16
966   if ((lanes[i - 1] & 15) != 15) {
967     return Nothing();
968   }
969   // Second run must start with 0 % 16
970   if ((lanes[i] & 15) != 0) {
971     return Nothing();
972   }
973   // The two runs must come from different inputs
974   if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
975     return Nothing();
976   }
977   int suffixLength = i;
978 
979   i = ScanIncreasingUnmasked(lanes, i);
980   // Must end at the left end
981   if (i != 16) {
982     return Nothing();
983   }
984 
985   // If the suffix is from the lhs then swap the operands
986   if (lanes[0] < 16) {
987     *swapOperands = !*swapOperands;
988   }
989   *control = SimdConstant::SplatX16(suffixLength);
990   return Some(LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16);
991 }
992 
993 // Blend words: if we pick words from both operands without a pattern but all
994 // the input words stay in their position then this is PBLENDW (immediate mask);
995 // this also handles all larger sizes on x64.
TryBlendInt16x8(SimdConstant * control)996 static Maybe<LWasmShuffleSimd128::Op> TryBlendInt16x8(SimdConstant* control) {
997   SimdConstant tmp(*control);
998   if (!ByteMaskToWordMask(&tmp)) {
999     return Nothing();
1000   }
1001   SimdConstant::I16x8 masked;
1002   MaskLanes(masked, tmp.asInt16x8());
1003   if (!IsIdentity(masked)) {
1004     return Nothing();
1005   }
1006   SimdConstant::I16x8 mapped;
1007   MapLanes(mapped, tmp.asInt16x8(),
1008            [](int x) -> int { return x < 8 ? 0 : -1; });
1009   *control = SimdConstant::CreateX8(mapped);
1010   return Some(LWasmShuffleSimd128::BLEND_16x8);
1011 }
1012 
1013 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
1014 // handled with a CONST, PAND, PANDNOT, and POR.
1015 //
1016 // TODO: Optimization opportunity? If we pick all but one lanes from one with at
1017 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
1018 // element is not in its source location).
TryBlendInt8x16(SimdConstant * control)1019 static Maybe<LWasmShuffleSimd128::Op> TryBlendInt8x16(SimdConstant* control) {
1020   SimdConstant::I8x16 masked;
1021   MaskLanes(masked, control->asInt8x16());
1022   if (!IsIdentity(masked)) {
1023     return Nothing();
1024   }
1025   SimdConstant::I8x16 mapped;
1026   MapLanes(mapped, control->asInt8x16(),
1027            [](int x) -> int { return x < 16 ? 0 : -1; });
1028   *control = SimdConstant::CreateX16(mapped);
1029   return Some(LWasmShuffleSimd128::BLEND_8x16);
1030 }
1031 
1032 template <typename T>
MatchInterleave(const T * lanes,int lhs,int rhs,int len)1033 static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
1034   for (int i = 0; i < len; i++) {
1035     if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
1036       return false;
1037     }
1038   }
1039   return true;
1040 }
1041 
1042 // Unpack/interleave:
1043 //  - if we interleave the low (bytes/words/doublewords) of the inputs into
1044 //    the output then this is UNPCKL*W (possibly with a swap of operands).
1045 //  - if we interleave the high ditto then it is UNPCKH*W (ditto)
1046 template <typename T>
TryInterleave(const T * lanes,int lhs,int rhs,bool * swapOperands,LWasmShuffleSimd128::Op lowOp,LWasmShuffleSimd128::Op highOp)1047 static Maybe<LWasmShuffleSimd128::Op> TryInterleave(
1048     const T* lanes, int lhs, int rhs, bool* swapOperands,
1049     LWasmShuffleSimd128::Op lowOp, LWasmShuffleSimd128::Op highOp) {
1050   int len = int(32 / (sizeof(T) * 4));
1051   if (MatchInterleave(lanes, lhs, rhs, len)) {
1052     return Some(lowOp);
1053   }
1054   if (MatchInterleave(lanes, rhs, lhs, len)) {
1055     *swapOperands = !*swapOperands;
1056     return Some(lowOp);
1057   }
1058   if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
1059     return Some(highOp);
1060   }
1061   if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
1062     *swapOperands = !*swapOperands;
1063     return Some(highOp);
1064   }
1065   return Nothing();
1066 }
1067 
TryInterleave32x4(SimdConstant * control,bool * swapOperands)1068 static Maybe<LWasmShuffleSimd128::Op> TryInterleave32x4(SimdConstant* control,
1069                                                         bool* swapOperands) {
1070   SimdConstant tmp = *control;
1071   if (!ByteMaskToDWordMask(&tmp)) {
1072     return Nothing();
1073   }
1074   const SimdConstant::I32x4& lanes = tmp.asInt32x4();
1075   return TryInterleave(lanes, 0, 4, swapOperands,
1076                        LWasmShuffleSimd128::INTERLEAVE_LOW_32x4,
1077                        LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4);
1078 }
1079 
TryInterleave16x8(SimdConstant * control,bool * swapOperands)1080 static Maybe<LWasmShuffleSimd128::Op> TryInterleave16x8(SimdConstant* control,
1081                                                         bool* swapOperands) {
1082   SimdConstant tmp = *control;
1083   if (!ByteMaskToWordMask(&tmp)) {
1084     return Nothing();
1085   }
1086   const SimdConstant::I16x8& lanes = tmp.asInt16x8();
1087   return TryInterleave(lanes, 0, 8, swapOperands,
1088                        LWasmShuffleSimd128::INTERLEAVE_LOW_16x8,
1089                        LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8);
1090 }
1091 
TryInterleave8x16(SimdConstant * control,bool * swapOperands)1092 static Maybe<LWasmShuffleSimd128::Op> TryInterleave8x16(SimdConstant* control,
1093                                                         bool* swapOperands) {
1094   const SimdConstant::I8x16& lanes = control->asInt8x16();
1095   return TryInterleave(lanes, 0, 16, swapOperands,
1096                        LWasmShuffleSimd128::INTERLEAVE_LOW_8x16,
1097                        LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16);
1098 }
1099 
AnalyzeTwoArgShuffle(SimdConstant * control,bool * swapOperands)1100 static LWasmShuffleSimd128::Op AnalyzeTwoArgShuffle(SimdConstant* control,
1101                                                     bool* swapOperands) {
1102   Maybe<LWasmShuffleSimd128::Op> op;
1103   op = TryConcatRightShift8x16(control, swapOperands);
1104   if (!op) {
1105     op = TryBlendInt16x8(control);
1106   }
1107   if (!op) {
1108     op = TryBlendInt8x16(control);
1109   }
1110   if (!op) {
1111     op = TryInterleave32x4(control, swapOperands);
1112   }
1113   if (!op) {
1114     op = TryInterleave16x8(control, swapOperands);
1115   }
1116   if (!op) {
1117     op = TryInterleave8x16(control, swapOperands);
1118   }
1119   if (!op) {
1120     op = Some(LWasmShuffleSimd128::SHUFFLE_BLEND_8x16);
1121   }
1122   return *op;
1123 }
1124 
1125 // Reorder the operands if that seems useful, notably, move a constant to the
1126 // right hand side.  Rewrites the control to account for any move.
MaybeReorderShuffleOperands(MDefinition ** lhs,MDefinition ** rhs,SimdConstant * control)1127 static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
1128                                         SimdConstant* control) {
1129   if ((*lhs)->isWasmFloatConstant()) {
1130     MDefinition* tmp = *lhs;
1131     *lhs = *rhs;
1132     *rhs = tmp;
1133 
1134     int8_t controlBytes[16];
1135     const SimdConstant::I8x16& lanes = control->asInt8x16();
1136     for (unsigned i = 0; i < 16; i++) {
1137       controlBytes[i] = lanes[i] ^ 16;
1138     }
1139     *control = SimdConstant::CreateX16(controlBytes);
1140 
1141     return true;
1142   }
1143   return false;
1144 }
1145 
AnalyzeShuffle(MWasmShuffleSimd128 * ins)1146 static Shuffle AnalyzeShuffle(MWasmShuffleSimd128* ins) {
1147   // Control may be updated, but only once we commit to an operation or when we
1148   // swap operands.
1149   SimdConstant control = ins->control();
1150   MDefinition* lhs = ins->lhs();
1151   MDefinition* rhs = ins->rhs();
1152 
1153   // If only one of the inputs is used, determine which.
1154   bool useLeft = true;
1155   bool useRight = true;
1156   if (lhs == rhs) {
1157     useRight = false;
1158   } else {
1159     bool allAbove = true;
1160     bool allBelow = true;
1161     const SimdConstant::I8x16& lanes = control.asInt8x16();
1162     for (unsigned i = 0; i < 16; i++) {
1163       allAbove = allAbove && lanes[i] >= 16;
1164       allBelow = allBelow && lanes[i] < 16;
1165     }
1166     if (allAbove) {
1167       useLeft = false;
1168     } else if (allBelow) {
1169       useRight = false;
1170     }
1171   }
1172 
1173   // Deal with one-ignored-input.
1174   if (!(useLeft && useRight)) {
1175     LWasmPermuteSimd128::Op op = AnalyzePermute(&control);
1176     return Shuffle::permute(
1177         useLeft ? Shuffle::Operand::LEFT : Shuffle::Operand::RIGHT, control,
1178         op);
1179   }
1180 
1181   // Move constants to rhs.
1182   bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
1183 
1184   // Deal with constant rhs.
1185   if (rhs->isWasmFloatConstant()) {
1186     SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
1187     if (rhsConstant.isIntegerZero()) {
1188       Maybe<LWasmPermuteSimd128::Op> op = AnalyzeShuffleWithZero(&control);
1189       if (op) {
1190         return Shuffle::permute(
1191             swapOperands ? Shuffle::Operand::RIGHT : Shuffle::Operand::LEFT,
1192             control, *op);
1193       }
1194     }
1195   }
1196 
1197   // Two operands both of which are used.  If there's one constant operand it is
1198   // now on the rhs.
1199   LWasmShuffleSimd128::Op op = AnalyzeTwoArgShuffle(&control, &swapOperands);
1200   return Shuffle::shuffle(
1201       swapOperands ? Shuffle::Operand::BOTH_SWAPPED : Shuffle::Operand::BOTH,
1202       control, op);
1203 }
1204 
1205 #  ifdef DEBUG
ReportShuffleSpecialization(const Shuffle & s)1206 static void ReportShuffleSpecialization(const Shuffle& s) {
1207   switch (s.opd) {
1208     case Shuffle::Operand::BOTH:
1209     case Shuffle::Operand::BOTH_SWAPPED:
1210       switch (*s.shuffleOp) {
1211         case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16:
1212           js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
1213           break;
1214         case LWasmShuffleSimd128::BLEND_8x16:
1215           js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
1216           break;
1217         case LWasmShuffleSimd128::BLEND_16x8:
1218           js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
1219           break;
1220         case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16:
1221           js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
1222           break;
1223         case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16:
1224           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
1225           break;
1226         case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8:
1227           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
1228           break;
1229         case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4:
1230           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
1231           break;
1232         case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16:
1233           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
1234           break;
1235         case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8:
1236           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
1237           break;
1238         case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4:
1239           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
1240           break;
1241         default:
1242           MOZ_CRASH("Unexpected shuffle op");
1243       }
1244       break;
1245     case Shuffle::Operand::LEFT:
1246     case Shuffle::Operand::RIGHT:
1247       switch (*s.permuteOp) {
1248         case LWasmPermuteSimd128::BROADCAST_8x16:
1249           js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
1250           break;
1251         case LWasmPermuteSimd128::BROADCAST_16x8:
1252           js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
1253           break;
1254         case LWasmPermuteSimd128::MOVE:
1255           js::wasm::ReportSimdAnalysis("shuffle -> move");
1256           break;
1257         case LWasmPermuteSimd128::PERMUTE_8x16:
1258           js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
1259           break;
1260         case LWasmPermuteSimd128::PERMUTE_16x8: {
1261           int op = s.control.asInt16x8()[0] >> 8;
1262           char buf[256];
1263           sprintf(buf, "shuffle -> permute 16x8%s%s%s",
1264                   op & LWasmPermuteSimd128::SWAP_QWORDS ? " swap" : "",
1265                   op & LWasmPermuteSimd128::PERM_HIGH ? " high" : "",
1266                   op & LWasmPermuteSimd128::PERM_LOW ? " low" : "");
1267           js::wasm::ReportSimdAnalysis(buf);
1268           break;
1269         }
1270         case LWasmPermuteSimd128::PERMUTE_32x4:
1271           js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
1272           break;
1273         case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
1274           js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
1275           break;
1276         case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
1277           js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
1278           break;
1279         case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
1280           js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
1281           break;
1282         default:
1283           MOZ_CRASH("Unexpected permute op");
1284       }
1285       break;
1286   }
1287 }
1288 #  endif
1289 
visitWasmShuffleSimd128(MWasmShuffleSimd128 * ins)1290 void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
1291   MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1292   MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
1293   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1294 
1295   Shuffle s = AnalyzeShuffle(ins);
1296 #  ifdef DEBUG
1297   ReportShuffleSpecialization(s);
1298 #  endif
1299   switch (s.opd) {
1300     case Shuffle::Operand::LEFT:
1301     case Shuffle::Operand::RIGHT: {
1302       LAllocation src;
1303       if (s.opd == Shuffle::Operand::LEFT) {
1304         if (*s.permuteOp == LWasmPermuteSimd128::MOVE) {
1305           src = useRegisterAtStart(ins->lhs());
1306         } else {
1307           src = useRegister(ins->lhs());
1308         }
1309       } else {
1310         if (*s.permuteOp == LWasmPermuteSimd128::MOVE) {
1311           src = useRegisterAtStart(ins->rhs());
1312         } else {
1313           src = useRegister(ins->rhs());
1314         }
1315       }
1316       auto* lir =
1317           new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
1318       if (*s.permuteOp == LWasmPermuteSimd128::MOVE) {
1319         defineReuseInput(lir, ins, LWasmPermuteSimd128::Src);
1320       } else {
1321         define(lir, ins);
1322       }
1323       break;
1324     }
1325     case Shuffle::Operand::BOTH:
1326     case Shuffle::Operand::BOTH_SWAPPED: {
1327       LDefinition temp = LDefinition::BogusTemp();
1328       switch (*s.shuffleOp) {
1329         case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16:
1330         case LWasmShuffleSimd128::BLEND_8x16:
1331           temp = tempSimd128();
1332           break;
1333         default:
1334           break;
1335       }
1336       LAllocation lhs;
1337       LAllocation rhs;
1338       if (s.opd == Shuffle::Operand::BOTH) {
1339         lhs = useRegisterAtStart(ins->lhs());
1340         rhs = useRegister(ins->rhs());
1341       } else {
1342         lhs = useRegisterAtStart(ins->rhs());
1343         rhs = useRegister(ins->lhs());
1344       }
1345       auto* lir = new (alloc())
1346           LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
1347       defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsDest);
1348       break;
1349     }
1350   }
1351 }
1352 
visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128 * ins)1353 void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
1354   MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1355   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1356 
1357   if (ins->rhs()->type() == MIRType::Int64) {
1358     auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
1359         useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
1360     defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsDest);
1361   } else {
1362     auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
1363         useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
1364     defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsDest);
1365   }
1366 }
1367 
1368 // For unary operations we currently avoid using useRegisterAtStart() and
1369 // reusing the input for the output, as that frequently leads to longer code
1370 // sequences as we end up using scratch to hold an intermediate result.
1371 
visitWasmScalarToSimd128(MWasmScalarToSimd128 * ins)1372 void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
1373   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1374 
1375   if (ins->input()->type() == MIRType::Int64) {
1376     auto* lir =
1377         new (alloc()) LWasmInt64ToSimd128(useInt64Register(ins->input()));
1378     define(lir, ins);
1379   } else {
1380     auto* lir = new (alloc()) LWasmScalarToSimd128(useRegister(ins->input()));
1381     define(lir, ins);
1382   }
1383 }
1384 
visitWasmUnarySimd128(MWasmUnarySimd128 * ins)1385 void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
1386   MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
1387   MOZ_ASSERT(ins->type() == MIRType::Simd128);
1388 
1389   LDefinition tempReg = LDefinition::BogusTemp();
1390   switch (ins->simdOp()) {
1391     case wasm::SimdOp::I32x4TruncUSatF32x4:
1392       tempReg = tempSimd128();
1393       break;
1394     default:
1395       break;
1396   }
1397 
1398   LWasmUnarySimd128* lir =
1399       new (alloc()) LWasmUnarySimd128(useRegister(ins->input()), tempReg);
1400   define(lir, ins);
1401 }
1402 
visitWasmReduceSimd128(MWasmReduceSimd128 * ins)1403 void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
1404   if (ins->type() == MIRType::Int64) {
1405     auto* lir =
1406         new (alloc()) LWasmReduceSimd128ToInt64(useRegister(ins->input()));
1407     defineInt64(lir, ins);
1408   } else {
1409     auto* lir = new (alloc()) LWasmReduceSimd128(useRegister(ins->input()));
1410     define(lir, ins);
1411   }
1412 }
1413 
1414 #endif  // ENABLE_WASM_SIMD
1415