1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "jit/x64/Lowering-x64.h"
8
9 #include "jit/Lowering.h"
10 #include "jit/MIR.h"
11 #include "jit/x64/Assembler-x64.h"
12
13 #include "jit/shared/Lowering-shared-inl.h"
14
15 using namespace js;
16 using namespace js::jit;
17
18 using mozilla::Maybe;
19 using mozilla::Nothing;
20 using mozilla::Some;
21
useBoxFixed(MDefinition * mir,Register reg1,Register,bool useAtStart)22 LBoxAllocation LIRGeneratorX64::useBoxFixed(MDefinition* mir, Register reg1,
23 Register, bool useAtStart) {
24 MOZ_ASSERT(mir->type() == MIRType::Value);
25
26 ensureDefined(mir);
27 return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart));
28 }
29
useByteOpRegister(MDefinition * mir)30 LAllocation LIRGeneratorX64::useByteOpRegister(MDefinition* mir) {
31 return useRegister(mir);
32 }
33
useByteOpRegisterAtStart(MDefinition * mir)34 LAllocation LIRGeneratorX64::useByteOpRegisterAtStart(MDefinition* mir) {
35 return useRegisterAtStart(mir);
36 }
37
useByteOpRegisterOrNonDoubleConstant(MDefinition * mir)38 LAllocation LIRGeneratorX64::useByteOpRegisterOrNonDoubleConstant(
39 MDefinition* mir) {
40 return useRegisterOrNonDoubleConstant(mir);
41 }
42
tempByteOpRegister()43 LDefinition LIRGeneratorX64::tempByteOpRegister() { return temp(); }
44
tempToUnbox()45 LDefinition LIRGeneratorX64::tempToUnbox() { return temp(); }
46
lowerForALUInt64(LInstructionHelper<INT64_PIECES,2* INT64_PIECES,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)47 void LIRGeneratorX64::lowerForALUInt64(
48 LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
49 MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
50 ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
51 ins->setInt64Operand(INT64_PIECES, lhs != rhs
52 ? useInt64OrConstant(rhs)
53 : useInt64OrConstantAtStart(rhs));
54 defineInt64ReuseInput(ins, mir, 0);
55 }
56
lowerForMulInt64(LMulI64 * ins,MMul * mir,MDefinition * lhs,MDefinition * rhs)57 void LIRGeneratorX64::lowerForMulInt64(LMulI64* ins, MMul* mir,
58 MDefinition* lhs, MDefinition* rhs) {
59 // X64 doesn't need a temp for 64bit multiplication.
60 ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
61 ins->setInt64Operand(INT64_PIECES, lhs != rhs
62 ? useInt64OrConstant(rhs)
63 : useInt64OrConstantAtStart(rhs));
64 defineInt64ReuseInput(ins, mir, 0);
65 }
66
visitBox(MBox * box)67 void LIRGenerator::visitBox(MBox* box) {
68 MDefinition* opd = box->getOperand(0);
69
70 // If the operand is a constant, emit near its uses.
71 if (opd->isConstant() && box->canEmitAtUses()) {
72 emitAtUses(box);
73 return;
74 }
75
76 if (opd->isConstant()) {
77 define(new (alloc()) LValue(opd->toConstant()->toJSValue()), box,
78 LDefinition(LDefinition::BOX));
79 } else {
80 LBox* ins = new (alloc()) LBox(useRegister(opd), opd->type());
81 define(ins, box, LDefinition(LDefinition::BOX));
82 }
83 }
84
visitUnbox(MUnbox * unbox)85 void LIRGenerator::visitUnbox(MUnbox* unbox) {
86 MDefinition* box = unbox->getOperand(0);
87
88 if (box->type() == MIRType::ObjectOrNull) {
89 LUnboxObjectOrNull* lir =
90 new (alloc()) LUnboxObjectOrNull(useRegisterAtStart(box));
91 if (unbox->fallible()) {
92 assignSnapshot(lir, unbox->bailoutKind());
93 }
94 defineReuseInput(lir, unbox, 0);
95 return;
96 }
97
98 MOZ_ASSERT(box->type() == MIRType::Value);
99
100 LUnboxBase* lir;
101 if (IsFloatingPointType(unbox->type())) {
102 lir = new (alloc())
103 LUnboxFloatingPoint(useRegisterAtStart(box), unbox->type());
104 } else if (unbox->fallible()) {
105 // If the unbox is fallible, load the Value in a register first to
106 // avoid multiple loads.
107 lir = new (alloc()) LUnbox(useRegisterAtStart(box));
108 } else {
109 lir = new (alloc()) LUnbox(useAtStart(box));
110 }
111
112 if (unbox->fallible()) {
113 assignSnapshot(lir, unbox->bailoutKind());
114 }
115
116 define(lir, unbox);
117 }
118
visitReturn(MReturn * ret)119 void LIRGenerator::visitReturn(MReturn* ret) {
120 MDefinition* opd = ret->getOperand(0);
121 MOZ_ASSERT(opd->type() == MIRType::Value);
122
123 LReturn* ins = new (alloc()) LReturn;
124 ins->setOperand(0, useFixed(opd, JSReturnReg));
125 add(ins);
126 }
127
lowerUntypedPhiInput(MPhi * phi,uint32_t inputPosition,LBlock * block,size_t lirIndex)128 void LIRGeneratorX64::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition,
129 LBlock* block, size_t lirIndex) {
130 lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
131 }
132
defineInt64Phi(MPhi * phi,size_t lirIndex)133 void LIRGeneratorX64::defineInt64Phi(MPhi* phi, size_t lirIndex) {
134 defineTypedPhi(phi, lirIndex);
135 }
136
lowerInt64PhiInput(MPhi * phi,uint32_t inputPosition,LBlock * block,size_t lirIndex)137 void LIRGeneratorX64::lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition,
138 LBlock* block, size_t lirIndex) {
139 lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
140 }
141
visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement * ins)142 void LIRGenerator::visitCompareExchangeTypedArrayElement(
143 MCompareExchangeTypedArrayElement* ins) {
144 lowerCompareExchangeTypedArrayElement(ins,
145 /* useI386ByteRegisters = */ false);
146 }
147
visitAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement * ins)148 void LIRGenerator::visitAtomicExchangeTypedArrayElement(
149 MAtomicExchangeTypedArrayElement* ins) {
150 lowerAtomicExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ false);
151 }
152
visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop * ins)153 void LIRGenerator::visitAtomicTypedArrayElementBinop(
154 MAtomicTypedArrayElementBinop* ins) {
155 lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ false);
156 }
157
visitWasmUnsignedToDouble(MWasmUnsignedToDouble * ins)158 void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) {
159 MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
160 LWasmUint32ToDouble* lir =
161 new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input()));
162 define(lir, ins);
163 }
164
visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32 * ins)165 void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) {
166 MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
167 LWasmUint32ToFloat32* lir =
168 new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input()));
169 define(lir, ins);
170 }
171
visitWasmHeapBase(MWasmHeapBase * ins)172 void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) {
173 auto* lir = new (alloc()) LWasmHeapBase(LAllocation());
174 define(lir, ins);
175 }
176
visitWasmLoad(MWasmLoad * ins)177 void LIRGenerator::visitWasmLoad(MWasmLoad* ins) {
178 MDefinition* base = ins->base();
179 MOZ_ASSERT(base->type() == MIRType::Int32);
180
181 if (ins->type() != MIRType::Int64) {
182 auto* lir = new (alloc()) LWasmLoad(useRegisterOrZeroAtStart(base));
183 define(lir, ins);
184 return;
185 }
186
187 auto* lir = new (alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
188 defineInt64(lir, ins);
189 }
190
visitWasmStore(MWasmStore * ins)191 void LIRGenerator::visitWasmStore(MWasmStore* ins) {
192 MDefinition* base = ins->base();
193 MOZ_ASSERT(base->type() == MIRType::Int32);
194
195 MDefinition* value = ins->value();
196 LAllocation valueAlloc;
197 switch (ins->access().type()) {
198 case Scalar::Int8:
199 case Scalar::Uint8:
200 case Scalar::Int16:
201 case Scalar::Uint16:
202 case Scalar::Int32:
203 case Scalar::Uint32:
204 valueAlloc = useRegisterOrConstantAtStart(value);
205 break;
206 case Scalar::Int64:
207 // No way to encode an int64-to-memory move on x64.
208 if (value->isConstant() && value->type() != MIRType::Int64) {
209 valueAlloc = useOrConstantAtStart(value);
210 } else {
211 valueAlloc = useRegisterAtStart(value);
212 }
213 break;
214 case Scalar::Float32:
215 case Scalar::Float64:
216 valueAlloc = useRegisterAtStart(value);
217 break;
218 case Scalar::Simd128:
219 #ifdef ENABLE_WASM_SIMD
220 valueAlloc = useRegisterAtStart(value);
221 break;
222 #else
223 MOZ_CRASH("unexpected array type");
224 #endif
225 case Scalar::BigInt64:
226 case Scalar::BigUint64:
227 case Scalar::Uint8Clamped:
228 case Scalar::MaxTypedArrayViewType:
229 MOZ_CRASH("unexpected array type");
230 }
231
232 LAllocation baseAlloc = useRegisterOrZeroAtStart(base);
233 auto* lir = new (alloc()) LWasmStore(baseAlloc, valueAlloc);
234 add(lir, ins);
235 }
236
visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap * ins)237 void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) {
238 MDefinition* base = ins->base();
239 MOZ_ASSERT(base->type() == MIRType::Int32);
240
241 // The output may not be used but will be clobbered regardless, so
242 // pin the output to eax.
243 //
244 // The input values must both be in registers.
245
246 const LAllocation oldval = useRegister(ins->oldValue());
247 const LAllocation newval = useRegister(ins->newValue());
248
249 LWasmCompareExchangeHeap* lir =
250 new (alloc()) LWasmCompareExchangeHeap(useRegister(base), oldval, newval);
251
252 defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
253 }
254
visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap * ins)255 void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) {
256 MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
257
258 const LAllocation base = useRegister(ins->base());
259 const LAllocation value = useRegister(ins->value());
260
261 // The output may not be used but will be clobbered regardless,
262 // so ignore the case where we're not using the value and just
263 // use the output register as a temp.
264
265 LWasmAtomicExchangeHeap* lir =
266 new (alloc()) LWasmAtomicExchangeHeap(base, value);
267 define(lir, ins);
268 }
269
visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap * ins)270 void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) {
271 MDefinition* base = ins->base();
272 MOZ_ASSERT(base->type() == MIRType::Int32);
273
274 // No support for 64-bit operations with constants at the masm level.
275
276 bool canTakeConstant = ins->access().type() != Scalar::Int64;
277
278 // Case 1: the result of the operation is not used.
279 //
280 // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
281 // LOCK OR, or LOCK XOR.
282
283 if (!ins->hasUses()) {
284 LAllocation value = canTakeConstant ? useRegisterOrConstant(ins->value())
285 : useRegister(ins->value());
286 LWasmAtomicBinopHeapForEffect* lir =
287 new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base), value);
288 add(lir, ins);
289 return;
290 }
291
292 // Case 2: the result of the operation is used.
293 //
294 // For ADD and SUB we'll use XADD with word and byte ops as
295 // appropriate. Any output register can be used and if value is a
296 // register it's best if it's the same as output:
297 //
298 // movl value, output ; if value != output
299 // lock xaddl output, mem
300 //
301 // For AND/OR/XOR we need to use a CMPXCHG loop, and the output is
302 // always in rax:
303 //
304 // movl *mem, rax
305 // L: mov rax, temp
306 // andl value, temp
307 // lock cmpxchg temp, mem ; reads rax also
308 // jnz L
309 // ; result in rax
310 //
311 // Note the placement of L, cmpxchg will update rax with *mem if
312 // *mem does not have the expected value, so reloading it at the
313 // top of the loop would be redundant.
314
315 bool bitOp = !(ins->operation() == AtomicFetchAddOp ||
316 ins->operation() == AtomicFetchSubOp);
317 bool reuseInput = false;
318 LAllocation value;
319
320 if (bitOp || ins->value()->isConstant()) {
321 value = canTakeConstant ? useRegisterOrConstant(ins->value())
322 : useRegister(ins->value());
323 } else {
324 reuseInput = true;
325 value = useRegisterAtStart(ins->value());
326 }
327
328 auto* lir = new (alloc()) LWasmAtomicBinopHeap(
329 useRegister(base), value, bitOp ? temp() : LDefinition::BogusTemp());
330
331 if (reuseInput) {
332 defineReuseInput(lir, ins, LWasmAtomicBinopHeap::valueOp);
333 } else if (bitOp) {
334 defineFixed(lir, ins, LAllocation(AnyRegister(rax)));
335 } else {
336 define(lir, ins);
337 }
338 }
339
visitSubstr(MSubstr * ins)340 void LIRGenerator::visitSubstr(MSubstr* ins) {
341 LSubstr* lir = new (alloc())
342 LSubstr(useRegister(ins->string()), useRegister(ins->begin()),
343 useRegister(ins->length()), temp(), temp(), tempByteOpRegister());
344 define(lir, ins);
345 assignSafepoint(lir, ins);
346 }
347
visitRandom(MRandom * ins)348 void LIRGenerator::visitRandom(MRandom* ins) {
349 LRandom* lir = new (alloc()) LRandom(temp(), temp(), temp());
350 defineFixed(lir, ins, LFloatReg(ReturnDoubleReg));
351 }
352
lowerDivI64(MDiv * div)353 void LIRGeneratorX64::lowerDivI64(MDiv* div) {
354 if (div->isUnsigned()) {
355 lowerUDivI64(div);
356 return;
357 }
358
359 LDivOrModI64* lir = new (alloc()) LDivOrModI64(
360 useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(rdx));
361 defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
362 }
363
lowerModI64(MMod * mod)364 void LIRGeneratorX64::lowerModI64(MMod* mod) {
365 if (mod->isUnsigned()) {
366 lowerUModI64(mod);
367 return;
368 }
369
370 LDivOrModI64* lir = new (alloc()) LDivOrModI64(
371 useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(rax));
372 defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
373 }
374
lowerUDivI64(MDiv * div)375 void LIRGeneratorX64::lowerUDivI64(MDiv* div) {
376 LUDivOrModI64* lir = new (alloc()) LUDivOrModI64(
377 useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(rdx));
378 defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
379 }
380
lowerUModI64(MMod * mod)381 void LIRGeneratorX64::lowerUModI64(MMod* mod) {
382 LUDivOrModI64* lir = new (alloc()) LUDivOrModI64(
383 useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(rax));
384 defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
385 }
386
visitWasmTruncateToInt64(MWasmTruncateToInt64 * ins)387 void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) {
388 MDefinition* opd = ins->input();
389 MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
390
391 LDefinition maybeTemp =
392 ins->isUnsigned() ? tempDouble() : LDefinition::BogusTemp();
393 defineInt64(new (alloc()) LWasmTruncateToInt64(useRegister(opd), maybeTemp),
394 ins);
395 }
396
visitInt64ToFloatingPoint(MInt64ToFloatingPoint * ins)397 void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) {
398 MDefinition* opd = ins->input();
399 MOZ_ASSERT(opd->type() == MIRType::Int64);
400 MOZ_ASSERT(IsFloatingPointType(ins->type()));
401
402 LDefinition maybeTemp = ins->isUnsigned() ? temp() : LDefinition::BogusTemp();
403 define(new (alloc()) LInt64ToFloatingPoint(useInt64Register(opd), maybeTemp),
404 ins);
405 }
406
visitExtendInt32ToInt64(MExtendInt32ToInt64 * ins)407 void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) {
408 defineInt64(new (alloc()) LExtendInt32ToInt64(useAtStart(ins->input())), ins);
409 }
410
visitSignExtendInt64(MSignExtendInt64 * ins)411 void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) {
412 defineInt64(new (alloc())
413 LSignExtendInt64(useInt64RegisterAtStart(ins->input())),
414 ins);
415 }
416
417 #ifdef ENABLE_WASM_SIMD
418
419 // These lowerings are really x86-shared but some Masm APIs are not yet
420 // available on x86.
421
422 // Ternary and binary operators require the dest register to be the same as
423 // their first input register, leading to a pattern of useRegisterAtStart +
424 // defineReuseInput.
425
visitWasmBitselectSimd128(MWasmBitselectSimd128 * ins)426 void LIRGenerator::visitWasmBitselectSimd128(MWasmBitselectSimd128* ins) {
427 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
428 MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
429 MOZ_ASSERT(ins->control()->type() == MIRType::Simd128);
430 MOZ_ASSERT(ins->type() == MIRType::Simd128);
431
432 auto* lir = new (alloc()) LWasmBitselectSimd128(
433 useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()),
434 useRegister(ins->control()), tempSimd128());
435 defineReuseInput(lir, ins, LWasmBitselectSimd128::LhsDest);
436 }
437
visitWasmBinarySimd128(MWasmBinarySimd128 * ins)438 void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
439 MDefinition* lhs = ins->lhs();
440 MDefinition* rhs = ins->rhs();
441
442 MOZ_ASSERT(lhs->type() == MIRType::Simd128);
443 MOZ_ASSERT(rhs->type() == MIRType::Simd128);
444 MOZ_ASSERT(ins->type() == MIRType::Simd128);
445
446 if (ins->isCommutative()) {
447 ReorderCommutative(&lhs, &rhs, ins);
448 }
449
450 LDefinition tempReg0 = LDefinition::BogusTemp();
451 LDefinition tempReg1 = LDefinition::BogusTemp();
452 switch (ins->simdOp()) {
453 case wasm::SimdOp::V128AndNot: {
454 // x86/x64 specific: Code generation requires the operands to be reversed.
455 MDefinition* tmp = lhs;
456 lhs = rhs;
457 rhs = tmp;
458 break;
459 }
460 case wasm::SimdOp::F32x4Max:
461 case wasm::SimdOp::F64x2Max:
462 case wasm::SimdOp::V8x16Swizzle:
463 tempReg0 = tempSimd128();
464 break;
465 case wasm::SimdOp::I8x16LtU:
466 case wasm::SimdOp::I8x16GtU:
467 case wasm::SimdOp::I8x16LeU:
468 case wasm::SimdOp::I8x16GeU:
469 case wasm::SimdOp::I16x8LtU:
470 case wasm::SimdOp::I16x8GtU:
471 case wasm::SimdOp::I16x8LeU:
472 case wasm::SimdOp::I16x8GeU:
473 case wasm::SimdOp::I32x4LtU:
474 case wasm::SimdOp::I32x4GtU:
475 case wasm::SimdOp::I32x4LeU:
476 case wasm::SimdOp::I32x4GeU:
477 tempReg0 = tempSimd128();
478 tempReg1 = tempSimd128();
479 break;
480 default:
481 break;
482 }
483
484 LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
485 LAllocation rhsAlloc =
486 lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs);
487 if (ins->simdOp() == wasm::SimdOp::I64x2Mul) {
488 auto* lir =
489 new (alloc()) LWasmI64x2Mul(lhsDestAlloc, rhsAlloc, tempInt64());
490 defineReuseInput(lir, ins, LWasmI64x2Mul::LhsDest);
491 } else {
492 auto* lir = new (alloc())
493 LWasmBinarySimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
494 defineReuseInput(lir, ins, LWasmBinarySimd128::LhsDest);
495 }
496 }
497
visitWasmShiftSimd128(MWasmShiftSimd128 * ins)498 void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
499 MDefinition* lhs = ins->lhs();
500 MDefinition* rhs = ins->rhs();
501
502 MOZ_ASSERT(lhs->type() == MIRType::Simd128);
503 MOZ_ASSERT(rhs->type() == MIRType::Int32);
504 MOZ_ASSERT(ins->type() == MIRType::Simd128);
505
506 if (rhs->isConstant()) {
507 LDefinition temp = LDefinition::BogusTemp();
508 int32_t shiftCount = rhs->toConstant()->toInt32();
509 switch (ins->simdOp()) {
510 case wasm::SimdOp::I8x16Shl:
511 case wasm::SimdOp::I8x16ShrU:
512 shiftCount &= 7;
513 break;
514 case wasm::SimdOp::I8x16ShrS:
515 shiftCount &= 7;
516 temp = tempSimd128();
517 break;
518 case wasm::SimdOp::I16x8Shl:
519 case wasm::SimdOp::I16x8ShrU:
520 case wasm::SimdOp::I16x8ShrS:
521 shiftCount &= 15;
522 break;
523 case wasm::SimdOp::I32x4Shl:
524 case wasm::SimdOp::I32x4ShrU:
525 case wasm::SimdOp::I32x4ShrS:
526 shiftCount &= 31;
527 break;
528 case wasm::SimdOp::I64x2Shl:
529 case wasm::SimdOp::I64x2ShrU:
530 case wasm::SimdOp::I64x2ShrS:
531 shiftCount &= 63;
532 break;
533 default:
534 MOZ_CRASH("Unexpected shift operation");
535 }
536 # ifdef DEBUG
537 js::wasm::ReportSimdAnalysis("shift -> constant shift");
538 # endif
539 auto* lir = new (alloc())
540 LWasmConstantShiftSimd128(useRegisterAtStart(lhs), temp, shiftCount);
541 defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
542 return;
543 }
544
545 # ifdef DEBUG
546 js::wasm::ReportSimdAnalysis("shift -> variable shift");
547 # endif
548
549 LDefinition tempReg0 = LDefinition::BogusTemp();
550 LDefinition tempReg1 = LDefinition::BogusTemp();
551 switch (ins->simdOp()) {
552 case wasm::SimdOp::I64x2ShrS:
553 break;
554 case wasm::SimdOp::I8x16Shl:
555 case wasm::SimdOp::I8x16ShrS:
556 case wasm::SimdOp::I8x16ShrU:
557 tempReg0 = temp();
558 tempReg1 = tempSimd128();
559 break;
560 default:
561 tempReg0 = temp();
562 break;
563 }
564
565 LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
566 LAllocation rhsAlloc = ins->simdOp() == wasm::SimdOp::I64x2ShrS
567 ? useFixed(rhs, ecx)
568 : useRegister(rhs);
569 auto* lir = new (alloc())
570 LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
571 defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest);
572 }
573
574 // Optimization of v8x16.shuffle. The general byte shuffle+blend is very
575 // expensive (equivalent to at least a dozen instructions), and we want to avoid
576 // that if we can. So look for special cases - there are many.
577 //
578 // The strategy is to sort the operation into one of three buckets depending
579 // on the shuffle pattern and inputs:
580 //
581 // - single operand; shuffles on these values are rotations, reversals,
582 // transpositions, and general permutations
583 // - single-operand-with-interesting-constant (especially zero); shuffles on
584 // these values are often byte shift or scatter operations
585 // - dual operand; shuffles on these operations are blends, catenated
586 // shifts, and (in the worst case) general shuffle+blends
587 //
588 // We're not trying to solve the general problem, only to lower reasonably
589 // expressed patterns that express common operations. Producers that produce
590 // dense and convoluted patterns will end up with the general byte shuffle.
591 // Producers that produce simpler patterns that easily map to hardware will
592 // get faster code.
593 //
594 // In particular, these matchers do not try to combine transformations, so a
595 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
596 // usually going to end up as a general byte shuffle.
597
598 // Representation of the result of the analysis.
599 struct Shuffle {
600 enum class Operand {
601 // Both inputs, in the original lhs-rhs order
602 BOTH,
603 // Both inputs, but in rhs-lhs order
604 BOTH_SWAPPED,
605 // Only the lhs input
606 LEFT,
607 // Only the rhs input
608 RIGHT,
609 };
610
611 Operand opd;
612 SimdConstant control;
613 Maybe<LWasmPermuteSimd128::Op> permuteOp; // Single operands
614 Maybe<LWasmShuffleSimd128::Op> shuffleOp; // Double operands
615
permuteShuffle616 static Shuffle permute(Operand opd, SimdConstant control,
617 LWasmPermuteSimd128::Op op) {
618 MOZ_ASSERT(opd == Operand::LEFT || opd == Operand::RIGHT);
619 Shuffle s{opd, control, Some(op), Nothing()};
620 return s;
621 }
622
shuffleShuffle623 static Shuffle shuffle(Operand opd, SimdConstant control,
624 LWasmShuffleSimd128::Op op) {
625 MOZ_ASSERT(opd == Operand::BOTH || opd == Operand::BOTH_SWAPPED);
626 Shuffle s{opd, control, Nothing(), Some(op)};
627 return s;
628 }
629 };
630
631 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
632 // true, updating *control.
ByteMaskToWordMask(SimdConstant * control)633 static bool ByteMaskToWordMask(SimdConstant* control) {
634 const SimdConstant::I8x16& lanes = control->asInt8x16();
635 int16_t controlWords[8];
636 for (int i = 0; i < 16; i += 2) {
637 if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
638 return false;
639 }
640 controlWords[i / 2] = lanes[i] / 2;
641 }
642 *control = SimdConstant::CreateX8(controlWords);
643 return true;
644 }
645
646 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
647 // true, updating *control.
ByteMaskToDWordMask(SimdConstant * control)648 static bool ByteMaskToDWordMask(SimdConstant* control) {
649 const SimdConstant::I8x16& lanes = control->asInt8x16();
650 int32_t controlDWords[4];
651 for (int i = 0; i < 16; i += 4) {
652 if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
653 lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
654 return false;
655 }
656 controlDWords[i / 4] = lanes[i] / 4;
657 }
658 *control = SimdConstant::CreateX4(controlDWords);
659 return true;
660 }
661
662 // Skip across consecutive values in lanes starting at i, returning the index
663 // after the last element. Lane values must be <= len-1 ("masked").
664 //
665 // Since every element is a 1-element run, the return value is never the same as
666 // the starting i.
667 template <typename T>
ScanIncreasingMasked(const T * lanes,int i)668 static int ScanIncreasingMasked(const T* lanes, int i) {
669 int len = int(16 / sizeof(T));
670 MOZ_ASSERT(i < len);
671 MOZ_ASSERT(lanes[i] <= len - 1);
672 i++;
673 while (i < len && lanes[i] == lanes[i - 1] + 1) {
674 MOZ_ASSERT(lanes[i] <= len - 1);
675 i++;
676 }
677 return i;
678 }
679
680 // Skip across consecutive values in lanes starting at i, returning the index
681 // after the last element. Lane values must be <= len*2-1 ("unmasked"); the
682 // values len-1 and len are not considered consecutive.
683 //
684 // Since every element is a 1-element run, the return value is never the same as
685 // the starting i.
686 template <typename T>
ScanIncreasingUnmasked(const T * lanes,int i)687 static int ScanIncreasingUnmasked(const T* lanes, int i) {
688 int len = int(16 / sizeof(T));
689 MOZ_ASSERT(i < len);
690 if (lanes[i] < len) {
691 i++;
692 while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
693 i++;
694 }
695 } else {
696 i++;
697 while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
698 i++;
699 }
700 }
701 return i;
702 }
703
704 // Skip lanes that equal v starting at i, returning the index just beyond the
705 // last of those. There is no requirement that the initial lanes[i] == v.
706 template <typename T>
ScanConstant(const T * lanes,int v,int i)707 static int ScanConstant(const T* lanes, int v, int i) {
708 int len = int(16 / sizeof(T));
709 MOZ_ASSERT(i <= len);
710 while (i < len && lanes[i] == v) {
711 i++;
712 }
713 return i;
714 }
715
716 // Mask lane values denoting rhs elements into lhs elements.
717 template <typename T>
MaskLanes(T * result,const T * input)718 static void MaskLanes(T* result, const T* input) {
719 int len = int(16 / sizeof(T));
720 for (int i = 0; i < len; i++) {
721 result[i] = input[i] & (len - 1);
722 }
723 }
724
725 // Apply a transformation to each lane value.
726 template <typename T>
MapLanes(T * result,const T * input,int (* f)(int))727 static void MapLanes(T* result, const T* input, int (*f)(int)) {
728 int len = int(16 / sizeof(T));
729 for (int i = 0; i < len; i++) {
730 result[i] = f(input[i]);
731 }
732 }
733
734 // Recognize an identity permutation, assuming lanes is masked.
735 template <typename T>
IsIdentity(const T * lanes)736 static bool IsIdentity(const T* lanes) {
737 return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
738 }
739
740 // Recognize part of an identity permutation starting at start, with
741 // the first value of the permutation expected to be bias.
742 template <typename T>
IsIdentity(const T * lanes,int start,int len,int bias)743 static bool IsIdentity(const T* lanes, int start, int len, int bias) {
744 if (lanes[start] != bias) {
745 return false;
746 }
747 for (int i = start + 1; i < start + len; i++) {
748 if (lanes[i] != lanes[i - 1] + 1) {
749 return false;
750 }
751 }
752 return true;
753 }
754
755 // We can permute by dwords if the mask is reducible to a dword mask, and in
756 // this case a single PSHUFD is enough.
TryPermute32x4(SimdConstant * control)757 static bool TryPermute32x4(SimdConstant* control) {
758 SimdConstant tmp = *control;
759 if (!ByteMaskToDWordMask(&tmp)) {
760 return false;
761 }
762 *control = tmp;
763 return true;
764 }
765
766 // Can we perform a byte rotate right? We can use PALIGNR. The shift count is
767 // just lanes[0], and *control is unchanged.
TryRotateRight8x16(SimdConstant * control)768 static bool TryRotateRight8x16(SimdConstant* control) {
769 const SimdConstant::I8x16& lanes = control->asInt8x16();
770 // Look for the first run of consecutive bytes.
771 int i = ScanIncreasingMasked(lanes, 0);
772
773 // If we reach the end of the vector, the vector must start at 0.
774 if (i == 16) {
775 return lanes[0] == 0;
776 }
777
778 // Second run must start at source lane zero
779 if (lanes[i] != 0) {
780 return false;
781 }
782
783 // Second run must end at the end of the lane vector.
784 return ScanIncreasingMasked(lanes, i) == 16;
785 }
786
787 // We can permute by words if the mask is reducible to a word mask, but the x64
788 // lowering is only efficient if we can permute the high and low quadwords
789 // separately, possibly after swapping quadwords.
TryPermute16x8(SimdConstant * control)790 static bool TryPermute16x8(SimdConstant* control) {
791 SimdConstant tmp = *control;
792 if (!ByteMaskToWordMask(&tmp)) {
793 return false;
794 }
795 const SimdConstant::I16x8& lanes = tmp.asInt16x8();
796 SimdConstant::I16x8 mapped;
797 MapLanes(mapped, lanes, [](int x) -> int { return x < 4 ? 0 : 1; });
798 int i = ScanConstant(mapped, mapped[0], 0);
799 if (i != 4) {
800 return false;
801 }
802 i = ScanConstant(mapped, mapped[4], 4);
803 if (i != 8) {
804 return false;
805 }
806 // Now compute the operation bits. `mapped` holds the adjusted lane mask.
807 memcpy(mapped, lanes, sizeof(mapped));
808 int16_t op = 0;
809 if (mapped[0] > mapped[4]) {
810 op |= LWasmPermuteSimd128::SWAP_QWORDS;
811 }
812 for (int i = 0; i < 8; i++) {
813 mapped[i] &= 3;
814 }
815 if (!IsIdentity(mapped, 0, 4, 0)) {
816 op |= LWasmPermuteSimd128::PERM_LOW;
817 }
818 if (!IsIdentity(mapped, 4, 4, 0)) {
819 op |= LWasmPermuteSimd128::PERM_HIGH;
820 }
821 MOZ_ASSERT(op != 0);
822 mapped[0] |= op << 8;
823 *control = SimdConstant::CreateX8(mapped);
824 return true;
825 }
826
827 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
TryBroadcast16x8(SimdConstant * control)828 static bool TryBroadcast16x8(SimdConstant* control) {
829 SimdConstant tmp = *control;
830 if (!ByteMaskToWordMask(&tmp)) {
831 return false;
832 }
833 const SimdConstant::I16x8& lanes = tmp.asInt16x8();
834 if (ScanConstant(lanes, lanes[0], 0) < 8) {
835 return false;
836 }
837 *control = tmp;
838 return true;
839 }
840
841 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
842 // PSHUFD.
TryBroadcast8x16(SimdConstant * control)843 static bool TryBroadcast8x16(SimdConstant* control) {
844 const SimdConstant::I8x16& lanes = control->asInt8x16();
845 if (ScanConstant(lanes, lanes[0], 0) < 16) {
846 return false;
847 }
848 return true;
849 }
850
851 // Look for permutations of a single operand.
AnalyzePermute(SimdConstant * control)852 static LWasmPermuteSimd128::Op AnalyzePermute(SimdConstant* control) {
853 // Lane indices are input-agnostic for single-operand permutations.
854 SimdConstant::I8x16 controlBytes;
855 MaskLanes(controlBytes, control->asInt8x16());
856
857 // Get rid of no-ops immediately, so nobody else needs to check.
858 if (IsIdentity(controlBytes)) {
859 return LWasmPermuteSimd128::MOVE;
860 }
861
862 // Default control is the masked bytes.
863 *control = SimdConstant::CreateX16(controlBytes);
864
865 // Analysis order matters here and is architecture-dependent or even
866 // microarchitecture-dependent: ideally the cheapest implementation first.
867 // The Intel manual says that the cost of a PSHUFB is about five other
868 // operations, so make that our cutoff.
869 //
870 // Word, dword, and qword reversals are handled optimally by general permutes.
871 //
872 // Byte reversals are probably best left to PSHUFB, no alternative rendition
873 // seems to reliably go below five instructions. (Discuss.)
874 //
875 // Word swaps within doublewords and dword swaps within quadwords are handled
876 // optimally by general permutes.
877 //
878 // Dword and qword broadcasts are handled by dword permute.
879
880 if (TryPermute32x4(control)) {
881 return LWasmPermuteSimd128::PERMUTE_32x4;
882 }
883 if (TryRotateRight8x16(control)) {
884 return LWasmPermuteSimd128::ROTATE_RIGHT_8x16;
885 }
886 if (TryPermute16x8(control)) {
887 return LWasmPermuteSimd128::PERMUTE_16x8;
888 }
889 if (TryBroadcast16x8(control)) {
890 return LWasmPermuteSimd128::BROADCAST_16x8;
891 }
892 if (TryBroadcast8x16(control)) {
893 return LWasmPermuteSimd128::BROADCAST_8x16;
894 }
895
896 // TODO: (From v8) Unzip and transpose generally have renditions that slightly
897 // beat a general permute (three or four instructions)
898 //
899 // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
900 // used when merging two values.
901 //
902 // TODO: Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB.
903
904 // The default operation is to permute bytes with the default control.
905 return LWasmPermuteSimd128::PERMUTE_8x16;
906 }
907
908 // Can we shift the bytes left or right by a constant? A shift is a run of
909 // lanes from the rhs (which is zero) on one end and a run of values from the
910 // lhs on the other end.
TryShift8x16(SimdConstant * control)911 static Maybe<LWasmPermuteSimd128::Op> TryShift8x16(SimdConstant* control) {
912 const SimdConstant::I8x16& lanes = control->asInt8x16();
913
914 // Represent all zero lanes by 16
915 SimdConstant::I8x16 zeroesMasked;
916 MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
917
918 int i = ScanConstant(zeroesMasked, 16, 0);
919 int shiftLeft = i;
920 if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
921 return Nothing();
922 }
923
924 i = ScanIncreasingUnmasked(zeroesMasked, i);
925 int shiftRight = 16 - i;
926 if (shiftRight > 0 && lanes[i - 1] != 15) {
927 return Nothing();
928 }
929
930 i = ScanConstant(zeroesMasked, 16, i);
931 if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
932 (shiftRight == 0 && shiftLeft == 0)) {
933 return Nothing();
934 }
935
936 if (shiftRight) {
937 *control = SimdConstant::SplatX16(shiftRight);
938 return Some(LWasmPermuteSimd128::SHIFT_RIGHT_8x16);
939 }
940 *control = SimdConstant::SplatX16(shiftLeft);
941 return Some(LWasmPermuteSimd128::SHIFT_LEFT_8x16);
942 }
943
AnalyzeShuffleWithZero(SimdConstant * control)944 static Maybe<LWasmPermuteSimd128::Op> AnalyzeShuffleWithZero(
945 SimdConstant* control) {
946 Maybe<LWasmPermuteSimd128::Op> op;
947 op = TryShift8x16(control);
948 if (op) {
949 return op;
950 }
951
952 // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
953 // PAND. This may beat the general byte blend code below.
954 return Nothing();
955 }
956
957 // Concat: if the result is the suffix (high bytes) of the rhs in front of a
958 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
959 // swapped.
TryConcatRightShift8x16(SimdConstant * control,bool * swapOperands)960 static Maybe<LWasmShuffleSimd128::Op> TryConcatRightShift8x16(
961 SimdConstant* control, bool* swapOperands) {
962 const SimdConstant::I8x16& lanes = control->asInt8x16();
963 int i = ScanIncreasingUnmasked(lanes, 0);
964 MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
965 // First run must end with 15 % 16
966 if ((lanes[i - 1] & 15) != 15) {
967 return Nothing();
968 }
969 // Second run must start with 0 % 16
970 if ((lanes[i] & 15) != 0) {
971 return Nothing();
972 }
973 // The two runs must come from different inputs
974 if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
975 return Nothing();
976 }
977 int suffixLength = i;
978
979 i = ScanIncreasingUnmasked(lanes, i);
980 // Must end at the left end
981 if (i != 16) {
982 return Nothing();
983 }
984
985 // If the suffix is from the lhs then swap the operands
986 if (lanes[0] < 16) {
987 *swapOperands = !*swapOperands;
988 }
989 *control = SimdConstant::SplatX16(suffixLength);
990 return Some(LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16);
991 }
992
993 // Blend words: if we pick words from both operands without a pattern but all
994 // the input words stay in their position then this is PBLENDW (immediate mask);
995 // this also handles all larger sizes on x64.
TryBlendInt16x8(SimdConstant * control)996 static Maybe<LWasmShuffleSimd128::Op> TryBlendInt16x8(SimdConstant* control) {
997 SimdConstant tmp(*control);
998 if (!ByteMaskToWordMask(&tmp)) {
999 return Nothing();
1000 }
1001 SimdConstant::I16x8 masked;
1002 MaskLanes(masked, tmp.asInt16x8());
1003 if (!IsIdentity(masked)) {
1004 return Nothing();
1005 }
1006 SimdConstant::I16x8 mapped;
1007 MapLanes(mapped, tmp.asInt16x8(),
1008 [](int x) -> int { return x < 8 ? 0 : -1; });
1009 *control = SimdConstant::CreateX8(mapped);
1010 return Some(LWasmShuffleSimd128::BLEND_16x8);
1011 }
1012
1013 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
1014 // handled with a CONST, PAND, PANDNOT, and POR.
1015 //
1016 // TODO: Optimization opportunity? If we pick all but one lanes from one with at
1017 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
1018 // element is not in its source location).
TryBlendInt8x16(SimdConstant * control)1019 static Maybe<LWasmShuffleSimd128::Op> TryBlendInt8x16(SimdConstant* control) {
1020 SimdConstant::I8x16 masked;
1021 MaskLanes(masked, control->asInt8x16());
1022 if (!IsIdentity(masked)) {
1023 return Nothing();
1024 }
1025 SimdConstant::I8x16 mapped;
1026 MapLanes(mapped, control->asInt8x16(),
1027 [](int x) -> int { return x < 16 ? 0 : -1; });
1028 *control = SimdConstant::CreateX16(mapped);
1029 return Some(LWasmShuffleSimd128::BLEND_8x16);
1030 }
1031
1032 template <typename T>
MatchInterleave(const T * lanes,int lhs,int rhs,int len)1033 static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
1034 for (int i = 0; i < len; i++) {
1035 if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
1036 return false;
1037 }
1038 }
1039 return true;
1040 }
1041
1042 // Unpack/interleave:
1043 // - if we interleave the low (bytes/words/doublewords) of the inputs into
1044 // the output then this is UNPCKL*W (possibly with a swap of operands).
1045 // - if we interleave the high ditto then it is UNPCKH*W (ditto)
1046 template <typename T>
TryInterleave(const T * lanes,int lhs,int rhs,bool * swapOperands,LWasmShuffleSimd128::Op lowOp,LWasmShuffleSimd128::Op highOp)1047 static Maybe<LWasmShuffleSimd128::Op> TryInterleave(
1048 const T* lanes, int lhs, int rhs, bool* swapOperands,
1049 LWasmShuffleSimd128::Op lowOp, LWasmShuffleSimd128::Op highOp) {
1050 int len = int(32 / (sizeof(T) * 4));
1051 if (MatchInterleave(lanes, lhs, rhs, len)) {
1052 return Some(lowOp);
1053 }
1054 if (MatchInterleave(lanes, rhs, lhs, len)) {
1055 *swapOperands = !*swapOperands;
1056 return Some(lowOp);
1057 }
1058 if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
1059 return Some(highOp);
1060 }
1061 if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
1062 *swapOperands = !*swapOperands;
1063 return Some(highOp);
1064 }
1065 return Nothing();
1066 }
1067
TryInterleave32x4(SimdConstant * control,bool * swapOperands)1068 static Maybe<LWasmShuffleSimd128::Op> TryInterleave32x4(SimdConstant* control,
1069 bool* swapOperands) {
1070 SimdConstant tmp = *control;
1071 if (!ByteMaskToDWordMask(&tmp)) {
1072 return Nothing();
1073 }
1074 const SimdConstant::I32x4& lanes = tmp.asInt32x4();
1075 return TryInterleave(lanes, 0, 4, swapOperands,
1076 LWasmShuffleSimd128::INTERLEAVE_LOW_32x4,
1077 LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4);
1078 }
1079
TryInterleave16x8(SimdConstant * control,bool * swapOperands)1080 static Maybe<LWasmShuffleSimd128::Op> TryInterleave16x8(SimdConstant* control,
1081 bool* swapOperands) {
1082 SimdConstant tmp = *control;
1083 if (!ByteMaskToWordMask(&tmp)) {
1084 return Nothing();
1085 }
1086 const SimdConstant::I16x8& lanes = tmp.asInt16x8();
1087 return TryInterleave(lanes, 0, 8, swapOperands,
1088 LWasmShuffleSimd128::INTERLEAVE_LOW_16x8,
1089 LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8);
1090 }
1091
TryInterleave8x16(SimdConstant * control,bool * swapOperands)1092 static Maybe<LWasmShuffleSimd128::Op> TryInterleave8x16(SimdConstant* control,
1093 bool* swapOperands) {
1094 const SimdConstant::I8x16& lanes = control->asInt8x16();
1095 return TryInterleave(lanes, 0, 16, swapOperands,
1096 LWasmShuffleSimd128::INTERLEAVE_LOW_8x16,
1097 LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16);
1098 }
1099
AnalyzeTwoArgShuffle(SimdConstant * control,bool * swapOperands)1100 static LWasmShuffleSimd128::Op AnalyzeTwoArgShuffle(SimdConstant* control,
1101 bool* swapOperands) {
1102 Maybe<LWasmShuffleSimd128::Op> op;
1103 op = TryConcatRightShift8x16(control, swapOperands);
1104 if (!op) {
1105 op = TryBlendInt16x8(control);
1106 }
1107 if (!op) {
1108 op = TryBlendInt8x16(control);
1109 }
1110 if (!op) {
1111 op = TryInterleave32x4(control, swapOperands);
1112 }
1113 if (!op) {
1114 op = TryInterleave16x8(control, swapOperands);
1115 }
1116 if (!op) {
1117 op = TryInterleave8x16(control, swapOperands);
1118 }
1119 if (!op) {
1120 op = Some(LWasmShuffleSimd128::SHUFFLE_BLEND_8x16);
1121 }
1122 return *op;
1123 }
1124
1125 // Reorder the operands if that seems useful, notably, move a constant to the
1126 // right hand side. Rewrites the control to account for any move.
MaybeReorderShuffleOperands(MDefinition ** lhs,MDefinition ** rhs,SimdConstant * control)1127 static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
1128 SimdConstant* control) {
1129 if ((*lhs)->isWasmFloatConstant()) {
1130 MDefinition* tmp = *lhs;
1131 *lhs = *rhs;
1132 *rhs = tmp;
1133
1134 int8_t controlBytes[16];
1135 const SimdConstant::I8x16& lanes = control->asInt8x16();
1136 for (unsigned i = 0; i < 16; i++) {
1137 controlBytes[i] = lanes[i] ^ 16;
1138 }
1139 *control = SimdConstant::CreateX16(controlBytes);
1140
1141 return true;
1142 }
1143 return false;
1144 }
1145
AnalyzeShuffle(MWasmShuffleSimd128 * ins)1146 static Shuffle AnalyzeShuffle(MWasmShuffleSimd128* ins) {
1147 // Control may be updated, but only once we commit to an operation or when we
1148 // swap operands.
1149 SimdConstant control = ins->control();
1150 MDefinition* lhs = ins->lhs();
1151 MDefinition* rhs = ins->rhs();
1152
1153 // If only one of the inputs is used, determine which.
1154 bool useLeft = true;
1155 bool useRight = true;
1156 if (lhs == rhs) {
1157 useRight = false;
1158 } else {
1159 bool allAbove = true;
1160 bool allBelow = true;
1161 const SimdConstant::I8x16& lanes = control.asInt8x16();
1162 for (unsigned i = 0; i < 16; i++) {
1163 allAbove = allAbove && lanes[i] >= 16;
1164 allBelow = allBelow && lanes[i] < 16;
1165 }
1166 if (allAbove) {
1167 useLeft = false;
1168 } else if (allBelow) {
1169 useRight = false;
1170 }
1171 }
1172
1173 // Deal with one-ignored-input.
1174 if (!(useLeft && useRight)) {
1175 LWasmPermuteSimd128::Op op = AnalyzePermute(&control);
1176 return Shuffle::permute(
1177 useLeft ? Shuffle::Operand::LEFT : Shuffle::Operand::RIGHT, control,
1178 op);
1179 }
1180
1181 // Move constants to rhs.
1182 bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
1183
1184 // Deal with constant rhs.
1185 if (rhs->isWasmFloatConstant()) {
1186 SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
1187 if (rhsConstant.isIntegerZero()) {
1188 Maybe<LWasmPermuteSimd128::Op> op = AnalyzeShuffleWithZero(&control);
1189 if (op) {
1190 return Shuffle::permute(
1191 swapOperands ? Shuffle::Operand::RIGHT : Shuffle::Operand::LEFT,
1192 control, *op);
1193 }
1194 }
1195 }
1196
1197 // Two operands both of which are used. If there's one constant operand it is
1198 // now on the rhs.
1199 LWasmShuffleSimd128::Op op = AnalyzeTwoArgShuffle(&control, &swapOperands);
1200 return Shuffle::shuffle(
1201 swapOperands ? Shuffle::Operand::BOTH_SWAPPED : Shuffle::Operand::BOTH,
1202 control, op);
1203 }
1204
1205 # ifdef DEBUG
ReportShuffleSpecialization(const Shuffle & s)1206 static void ReportShuffleSpecialization(const Shuffle& s) {
1207 switch (s.opd) {
1208 case Shuffle::Operand::BOTH:
1209 case Shuffle::Operand::BOTH_SWAPPED:
1210 switch (*s.shuffleOp) {
1211 case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16:
1212 js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
1213 break;
1214 case LWasmShuffleSimd128::BLEND_8x16:
1215 js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
1216 break;
1217 case LWasmShuffleSimd128::BLEND_16x8:
1218 js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
1219 break;
1220 case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16:
1221 js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
1222 break;
1223 case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16:
1224 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
1225 break;
1226 case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8:
1227 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
1228 break;
1229 case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4:
1230 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
1231 break;
1232 case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16:
1233 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
1234 break;
1235 case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8:
1236 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
1237 break;
1238 case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4:
1239 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
1240 break;
1241 default:
1242 MOZ_CRASH("Unexpected shuffle op");
1243 }
1244 break;
1245 case Shuffle::Operand::LEFT:
1246 case Shuffle::Operand::RIGHT:
1247 switch (*s.permuteOp) {
1248 case LWasmPermuteSimd128::BROADCAST_8x16:
1249 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
1250 break;
1251 case LWasmPermuteSimd128::BROADCAST_16x8:
1252 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
1253 break;
1254 case LWasmPermuteSimd128::MOVE:
1255 js::wasm::ReportSimdAnalysis("shuffle -> move");
1256 break;
1257 case LWasmPermuteSimd128::PERMUTE_8x16:
1258 js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
1259 break;
1260 case LWasmPermuteSimd128::PERMUTE_16x8: {
1261 int op = s.control.asInt16x8()[0] >> 8;
1262 char buf[256];
1263 sprintf(buf, "shuffle -> permute 16x8%s%s%s",
1264 op & LWasmPermuteSimd128::SWAP_QWORDS ? " swap" : "",
1265 op & LWasmPermuteSimd128::PERM_HIGH ? " high" : "",
1266 op & LWasmPermuteSimd128::PERM_LOW ? " low" : "");
1267 js::wasm::ReportSimdAnalysis(buf);
1268 break;
1269 }
1270 case LWasmPermuteSimd128::PERMUTE_32x4:
1271 js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
1272 break;
1273 case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
1274 js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
1275 break;
1276 case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
1277 js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
1278 break;
1279 case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
1280 js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
1281 break;
1282 default:
1283 MOZ_CRASH("Unexpected permute op");
1284 }
1285 break;
1286 }
1287 }
1288 # endif
1289
visitWasmShuffleSimd128(MWasmShuffleSimd128 * ins)1290 void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
1291 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1292 MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
1293 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1294
1295 Shuffle s = AnalyzeShuffle(ins);
1296 # ifdef DEBUG
1297 ReportShuffleSpecialization(s);
1298 # endif
1299 switch (s.opd) {
1300 case Shuffle::Operand::LEFT:
1301 case Shuffle::Operand::RIGHT: {
1302 LAllocation src;
1303 if (s.opd == Shuffle::Operand::LEFT) {
1304 if (*s.permuteOp == LWasmPermuteSimd128::MOVE) {
1305 src = useRegisterAtStart(ins->lhs());
1306 } else {
1307 src = useRegister(ins->lhs());
1308 }
1309 } else {
1310 if (*s.permuteOp == LWasmPermuteSimd128::MOVE) {
1311 src = useRegisterAtStart(ins->rhs());
1312 } else {
1313 src = useRegister(ins->rhs());
1314 }
1315 }
1316 auto* lir =
1317 new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
1318 if (*s.permuteOp == LWasmPermuteSimd128::MOVE) {
1319 defineReuseInput(lir, ins, LWasmPermuteSimd128::Src);
1320 } else {
1321 define(lir, ins);
1322 }
1323 break;
1324 }
1325 case Shuffle::Operand::BOTH:
1326 case Shuffle::Operand::BOTH_SWAPPED: {
1327 LDefinition temp = LDefinition::BogusTemp();
1328 switch (*s.shuffleOp) {
1329 case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16:
1330 case LWasmShuffleSimd128::BLEND_8x16:
1331 temp = tempSimd128();
1332 break;
1333 default:
1334 break;
1335 }
1336 LAllocation lhs;
1337 LAllocation rhs;
1338 if (s.opd == Shuffle::Operand::BOTH) {
1339 lhs = useRegisterAtStart(ins->lhs());
1340 rhs = useRegister(ins->rhs());
1341 } else {
1342 lhs = useRegisterAtStart(ins->rhs());
1343 rhs = useRegister(ins->lhs());
1344 }
1345 auto* lir = new (alloc())
1346 LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
1347 defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsDest);
1348 break;
1349 }
1350 }
1351 }
1352
visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128 * ins)1353 void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
1354 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1355 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1356
1357 if (ins->rhs()->type() == MIRType::Int64) {
1358 auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
1359 useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
1360 defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsDest);
1361 } else {
1362 auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
1363 useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
1364 defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsDest);
1365 }
1366 }
1367
1368 // For unary operations we currently avoid using useRegisterAtStart() and
1369 // reusing the input for the output, as that frequently leads to longer code
1370 // sequences as we end up using scratch to hold an intermediate result.
1371
visitWasmScalarToSimd128(MWasmScalarToSimd128 * ins)1372 void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
1373 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1374
1375 if (ins->input()->type() == MIRType::Int64) {
1376 auto* lir =
1377 new (alloc()) LWasmInt64ToSimd128(useInt64Register(ins->input()));
1378 define(lir, ins);
1379 } else {
1380 auto* lir = new (alloc()) LWasmScalarToSimd128(useRegister(ins->input()));
1381 define(lir, ins);
1382 }
1383 }
1384
visitWasmUnarySimd128(MWasmUnarySimd128 * ins)1385 void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
1386 MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
1387 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1388
1389 LDefinition tempReg = LDefinition::BogusTemp();
1390 switch (ins->simdOp()) {
1391 case wasm::SimdOp::I32x4TruncUSatF32x4:
1392 tempReg = tempSimd128();
1393 break;
1394 default:
1395 break;
1396 }
1397
1398 LWasmUnarySimd128* lir =
1399 new (alloc()) LWasmUnarySimd128(useRegister(ins->input()), tempReg);
1400 define(lir, ins);
1401 }
1402
visitWasmReduceSimd128(MWasmReduceSimd128 * ins)1403 void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
1404 if (ins->type() == MIRType::Int64) {
1405 auto* lir =
1406 new (alloc()) LWasmReduceSimd128ToInt64(useRegister(ins->input()));
1407 defineInt64(lir, ins);
1408 } else {
1409 auto* lir = new (alloc()) LWasmReduceSimd128(useRegister(ins->input()));
1410 define(lir, ins);
1411 }
1412 }
1413
1414 #endif // ENABLE_WASM_SIMD
1415