1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "jit/x86-shared/Lowering-x86-shared.h"
8
9 #include "mozilla/MathAlgorithms.h"
10
11 #include "jit/Lowering.h"
12 #include "jit/MIR.h"
13
14 #include "jit/shared/Lowering-shared-inl.h"
15
16 using namespace js;
17 using namespace js::jit;
18
19 using mozilla::Abs;
20 using mozilla::FloorLog2;
21 using mozilla::Maybe;
22 using mozilla::Nothing;
23 using mozilla::Some;
24
newLTableSwitch(const LAllocation & in,const LDefinition & inputCopy,MTableSwitch * tableswitch)25 LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch(
26 const LAllocation& in, const LDefinition& inputCopy,
27 MTableSwitch* tableswitch) {
28 return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
29 }
30
newLTableSwitchV(MTableSwitch * tableswitch)31 LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV(
32 MTableSwitch* tableswitch) {
33 return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
34 tempDouble(), temp(), tableswitch);
35 }
36
visitPowHalf(MPowHalf * ins)37 void LIRGenerator::visitPowHalf(MPowHalf* ins) {
38 MDefinition* input = ins->input();
39 MOZ_ASSERT(input->type() == MIRType::Double);
40 LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input));
41 define(lir, ins);
42 }
43
lowerForShift(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)44 void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
45 MDefinition* mir, MDefinition* lhs,
46 MDefinition* rhs) {
47 ins->setOperand(0, useRegisterAtStart(lhs));
48
49 // Shift operand should be constant or, unless BMI2 is available, in register
50 // ecx. x86 can't shift a non-ecx register.
51 if (rhs->isConstant()) {
52 ins->setOperand(1, useOrConstantAtStart(rhs));
53 } else if (Assembler::HasBMI2() && !mir->isRotate()) {
54 ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
55 ? useRegister(rhs)
56 : useRegisterAtStart(rhs));
57 } else {
58 ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
59 ? useFixed(rhs, ecx)
60 : useFixedAtStart(rhs, ecx));
61 }
62
63 defineReuseInput(ins, mir, 0);
64 }
65
66 template <size_t Temps>
lowerForShiftInt64(LInstructionHelper<INT64_PIECES,INT64_PIECES+1,Temps> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)67 void LIRGeneratorX86Shared::lowerForShiftInt64(
68 LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
69 MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
70 ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
71 #if defined(JS_NUNBOX32)
72 if (mir->isRotate()) {
73 ins->setTemp(0, temp());
74 }
75 #endif
76
77 static_assert(LShiftI64::Rhs == INT64_PIECES,
78 "Assume Rhs is located at INT64_PIECES.");
79 static_assert(LRotateI64::Count == INT64_PIECES,
80 "Assume Count is located at INT64_PIECES.");
81
82 // Shift operand should be constant or, unless BMI2 is available, in register
83 // ecx. x86 can't shift a non-ecx register.
84 if (rhs->isConstant()) {
85 ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs));
86 #ifdef JS_CODEGEN_X64
87 } else if (Assembler::HasBMI2() && !mir->isRotate()) {
88 ins->setOperand(INT64_PIECES, useRegister(rhs));
89 #endif
90 } else {
91 // The operands are int64, but we only care about the lower 32 bits of
92 // the RHS. On 32-bit, the code below will load that part in ecx and
93 // will discard the upper half.
94 ensureDefined(rhs);
95 LUse use(ecx);
96 use.setVirtualRegister(rhs->virtualRegister());
97 ins->setOperand(INT64_PIECES, use);
98 }
99
100 defineInt64ReuseInput(ins, mir, 0);
101 }
102
103 template void LIRGeneratorX86Shared::lowerForShiftInt64(
104 LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
105 MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
106 template void LIRGeneratorX86Shared::lowerForShiftInt64(
107 LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
108 MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
109
lowerForCompareI64AndBranch(MTest * mir,MCompare * comp,JSOp op,MDefinition * left,MDefinition * right,MBasicBlock * ifTrue,MBasicBlock * ifFalse)110 void LIRGeneratorX86Shared::lowerForCompareI64AndBranch(
111 MTest* mir, MCompare* comp, JSOp op, MDefinition* left, MDefinition* right,
112 MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
113 auto* lir = new (alloc())
114 LCompareI64AndBranch(comp, op, useInt64Register(left),
115 useInt64OrConstant(right), ifTrue, ifFalse);
116 add(lir, mir);
117 }
118
lowerForALU(LInstructionHelper<1,1,0> * ins,MDefinition * mir,MDefinition * input)119 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
120 MDefinition* mir, MDefinition* input) {
121 ins->setOperand(0, useRegisterAtStart(input));
122 defineReuseInput(ins, mir, 0);
123 }
124
lowerForALU(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)125 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
126 MDefinition* mir, MDefinition* lhs,
127 MDefinition* rhs) {
128 ins->setOperand(0, useRegisterAtStart(lhs));
129 ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
130 ? useOrConstant(rhs)
131 : useOrConstantAtStart(rhs));
132 defineReuseInput(ins, mir, 0);
133 }
134
135 template <size_t Temps>
lowerForFPU(LInstructionHelper<1,2,Temps> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)136 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
137 MDefinition* mir, MDefinition* lhs,
138 MDefinition* rhs) {
139 // Without AVX, we'll need to use the x86 encodings where one of the
140 // inputs must be the same location as the output.
141 if (!Assembler::HasAVX()) {
142 ins->setOperand(0, useRegisterAtStart(lhs));
143 ins->setOperand(
144 1, willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs));
145 defineReuseInput(ins, mir, 0);
146 } else {
147 ins->setOperand(0, useRegisterAtStart(lhs));
148 ins->setOperand(1, useAtStart(rhs));
149 define(ins, mir);
150 }
151 }
152
153 template void LIRGeneratorX86Shared::lowerForFPU(
154 LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
155 MDefinition* rhs);
156 template void LIRGeneratorX86Shared::lowerForFPU(
157 LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, MDefinition* lhs,
158 MDefinition* rhs);
159
lowerForBitAndAndBranch(LBitAndAndBranch * baab,MInstruction * mir,MDefinition * lhs,MDefinition * rhs)160 void LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
161 MInstruction* mir,
162 MDefinition* lhs,
163 MDefinition* rhs) {
164 baab->setOperand(0, useRegisterAtStart(lhs));
165 baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
166 add(baab, mir);
167 }
168
lowerNegI(MInstruction * ins,MDefinition * input)169 void LIRGeneratorX86Shared::lowerNegI(MInstruction* ins, MDefinition* input) {
170 defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(input)), ins, 0);
171 }
172
lowerNegI64(MInstruction * ins,MDefinition * input)173 void LIRGeneratorX86Shared::lowerNegI64(MInstruction* ins, MDefinition* input) {
174 defineInt64ReuseInput(new (alloc()) LNegI64(useInt64RegisterAtStart(input)),
175 ins, 0);
176 }
177
visitAbs(MAbs * ins)178 void LIRGenerator::visitAbs(MAbs* ins) {
179 defineReuseInput(allocateAbs(ins, useRegisterAtStart(ins->input())), ins, 0);
180 }
181
lowerMulI(MMul * mul,MDefinition * lhs,MDefinition * rhs)182 void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs,
183 MDefinition* rhs) {
184 // Note: If we need a negative zero check, lhs is used twice.
185 LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
186 LMulI* lir = new (alloc())
187 LMulI(useRegisterAtStart(lhs),
188 willHaveDifferentLIRNodes(lhs, rhs) ? useOrConstant(rhs)
189 : useOrConstantAtStart(rhs),
190 lhsCopy);
191 if (mul->fallible()) {
192 assignSnapshot(lir, mul->bailoutKind());
193 }
194 defineReuseInput(lir, mul, 0);
195 }
196
lowerDivI(MDiv * div)197 void LIRGeneratorX86Shared::lowerDivI(MDiv* div) {
198 if (div->isUnsigned()) {
199 lowerUDiv(div);
200 return;
201 }
202
203 // Division instructions are slow. Division by constant denominators can be
204 // rewritten to use other instructions.
205 if (div->rhs()->isConstant()) {
206 int32_t rhs = div->rhs()->toConstant()->toInt32();
207
208 // Division by powers of two can be done by shifting, and division by
209 // other numbers can be done by a reciprocal multiplication technique.
210 int32_t shift = FloorLog2(Abs(rhs));
211 if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
212 LAllocation lhs = useRegisterAtStart(div->lhs());
213 LDivPowTwoI* lir;
214 // When truncated with maybe a non-zero remainder, we have to round the
215 // result toward 0. This requires an extra register to round up/down
216 // whether the left-hand-side is signed.
217 bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated();
218 if (!needRoundNeg) {
219 // Numerator is unsigned, so does not need adjusting.
220 lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
221 } else {
222 // Numerator might be signed, and needs adjusting, and an extra lhs copy
223 // is needed to round the result of the integer division towards zero.
224 lir = new (alloc())
225 LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
226 }
227 if (div->fallible()) {
228 assignSnapshot(lir, div->bailoutKind());
229 }
230 defineReuseInput(lir, div, 0);
231 return;
232 }
233 if (rhs != 0) {
234 LDivOrModConstantI* lir;
235 lir = new (alloc())
236 LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
237 if (div->fallible()) {
238 assignSnapshot(lir, div->bailoutKind());
239 }
240 defineFixed(lir, div, LAllocation(AnyRegister(edx)));
241 return;
242 }
243 }
244
245 LDivI* lir = new (alloc())
246 LDivI(useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
247 if (div->fallible()) {
248 assignSnapshot(lir, div->bailoutKind());
249 }
250 defineFixed(lir, div, LAllocation(AnyRegister(eax)));
251 }
252
lowerModI(MMod * mod)253 void LIRGeneratorX86Shared::lowerModI(MMod* mod) {
254 if (mod->isUnsigned()) {
255 lowerUMod(mod);
256 return;
257 }
258
259 if (mod->rhs()->isConstant()) {
260 int32_t rhs = mod->rhs()->toConstant()->toInt32();
261 int32_t shift = FloorLog2(Abs(rhs));
262 if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
263 LModPowTwoI* lir =
264 new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
265 if (mod->fallible()) {
266 assignSnapshot(lir, mod->bailoutKind());
267 }
268 defineReuseInput(lir, mod, 0);
269 return;
270 }
271 if (rhs != 0) {
272 LDivOrModConstantI* lir;
273 lir = new (alloc())
274 LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
275 if (mod->fallible()) {
276 assignSnapshot(lir, mod->bailoutKind());
277 }
278 defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
279 return;
280 }
281 }
282
283 LModI* lir = new (alloc())
284 LModI(useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
285 if (mod->fallible()) {
286 assignSnapshot(lir, mod->bailoutKind());
287 }
288 defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
289 }
290
visitWasmNeg(MWasmNeg * ins)291 void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
292 switch (ins->type()) {
293 case MIRType::Int32:
294 defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(ins->input())),
295 ins, 0);
296 break;
297 case MIRType::Float32:
298 defineReuseInput(new (alloc()) LNegF(useRegisterAtStart(ins->input())),
299 ins, 0);
300 break;
301 case MIRType::Double:
302 defineReuseInput(new (alloc()) LNegD(useRegisterAtStart(ins->input())),
303 ins, 0);
304 break;
305 default:
306 MOZ_CRASH();
307 }
308 }
309
lowerWasmSelectI(MWasmSelect * select)310 void LIRGeneratorX86Shared::lowerWasmSelectI(MWasmSelect* select) {
311 auto* lir = new (alloc())
312 LWasmSelect(useRegisterAtStart(select->trueExpr()),
313 useAny(select->falseExpr()), useRegister(select->condExpr()));
314 defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
315 }
316
lowerWasmSelectI64(MWasmSelect * select)317 void LIRGeneratorX86Shared::lowerWasmSelectI64(MWasmSelect* select) {
318 auto* lir = new (alloc()) LWasmSelectI64(
319 useInt64RegisterAtStart(select->trueExpr()),
320 useInt64(select->falseExpr()), useRegister(select->condExpr()));
321 defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
322 }
323
visitAsmJSLoadHeap(MAsmJSLoadHeap * ins)324 void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
325 MDefinition* base = ins->base();
326 MOZ_ASSERT(base->type() == MIRType::Int32);
327
328 MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
329 MOZ_ASSERT_IF(ins->needsBoundsCheck(),
330 boundsCheckLimit->type() == MIRType::Int32);
331
332 // For simplicity, require a register if we're going to emit a bounds-check
333 // branch, so that we don't have special cases for constants. This should
334 // only happen in rare constant-folding cases since asm.js sets the minimum
335 // heap size based when accessed via constant.
336 LAllocation baseAlloc = ins->needsBoundsCheck()
337 ? useRegisterAtStart(base)
338 : useRegisterOrZeroAtStart(base);
339
340 LAllocation limitAlloc = ins->needsBoundsCheck()
341 ? useRegisterAtStart(boundsCheckLimit)
342 : LAllocation();
343 LAllocation memoryBaseAlloc = ins->hasMemoryBase()
344 ? useRegisterAtStart(ins->memoryBase())
345 : LAllocation();
346
347 auto* lir =
348 new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc);
349 define(lir, ins);
350 }
351
visitAsmJSStoreHeap(MAsmJSStoreHeap * ins)352 void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
353 MDefinition* base = ins->base();
354 MOZ_ASSERT(base->type() == MIRType::Int32);
355
356 MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
357 MOZ_ASSERT_IF(ins->needsBoundsCheck(),
358 boundsCheckLimit->type() == MIRType::Int32);
359
360 // For simplicity, require a register if we're going to emit a bounds-check
361 // branch, so that we don't have special cases for constants. This should
362 // only happen in rare constant-folding cases since asm.js sets the minimum
363 // heap size based when accessed via constant.
364 LAllocation baseAlloc = ins->needsBoundsCheck()
365 ? useRegisterAtStart(base)
366 : useRegisterOrZeroAtStart(base);
367
368 LAllocation limitAlloc = ins->needsBoundsCheck()
369 ? useRegisterAtStart(boundsCheckLimit)
370 : LAllocation();
371 LAllocation memoryBaseAlloc = ins->hasMemoryBase()
372 ? useRegisterAtStart(ins->memoryBase())
373 : LAllocation();
374
375 LAsmJSStoreHeap* lir = nullptr;
376 switch (ins->access().type()) {
377 case Scalar::Int8:
378 case Scalar::Uint8:
379 #ifdef JS_CODEGEN_X86
380 // See comment for LIRGeneratorX86::useByteOpRegister.
381 lir = new (alloc()) LAsmJSStoreHeap(
382 baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc);
383 break;
384 #endif
385 case Scalar::Int16:
386 case Scalar::Uint16:
387 case Scalar::Int32:
388 case Scalar::Uint32:
389 case Scalar::Float32:
390 case Scalar::Float64:
391 // For now, don't allow constant values. The immediate operand affects
392 // instruction layout which affects patching.
393 lir = new (alloc())
394 LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
395 limitAlloc, memoryBaseAlloc);
396 break;
397 case Scalar::Int64:
398 case Scalar::Simd128:
399 MOZ_CRASH("NYI");
400 case Scalar::Uint8Clamped:
401 case Scalar::BigInt64:
402 case Scalar::BigUint64:
403 case Scalar::MaxTypedArrayViewType:
404 MOZ_CRASH("unexpected array type");
405 }
406 add(lir, ins);
407 }
408
lowerUDiv(MDiv * div)409 void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) {
410 if (div->rhs()->isConstant()) {
411 uint32_t rhs = div->rhs()->toConstant()->toInt32();
412 int32_t shift = FloorLog2(rhs);
413
414 LAllocation lhs = useRegisterAtStart(div->lhs());
415 if (rhs != 0 && uint32_t(1) << shift == rhs) {
416 LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, false);
417 if (div->fallible()) {
418 assignSnapshot(lir, div->bailoutKind());
419 }
420 defineReuseInput(lir, div, 0);
421 } else {
422 LUDivOrModConstant* lir = new (alloc())
423 LUDivOrModConstant(useRegister(div->lhs()), rhs, tempFixed(eax));
424 if (div->fallible()) {
425 assignSnapshot(lir, div->bailoutKind());
426 }
427 defineFixed(lir, div, LAllocation(AnyRegister(edx)));
428 }
429 return;
430 }
431
432 LUDivOrMod* lir = new (alloc()) LUDivOrMod(
433 useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
434 if (div->fallible()) {
435 assignSnapshot(lir, div->bailoutKind());
436 }
437 defineFixed(lir, div, LAllocation(AnyRegister(eax)));
438 }
439
lowerUMod(MMod * mod)440 void LIRGeneratorX86Shared::lowerUMod(MMod* mod) {
441 if (mod->rhs()->isConstant()) {
442 uint32_t rhs = mod->rhs()->toConstant()->toInt32();
443 int32_t shift = FloorLog2(rhs);
444
445 if (rhs != 0 && uint32_t(1) << shift == rhs) {
446 LModPowTwoI* lir =
447 new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
448 if (mod->fallible()) {
449 assignSnapshot(lir, mod->bailoutKind());
450 }
451 defineReuseInput(lir, mod, 0);
452 } else {
453 LUDivOrModConstant* lir = new (alloc())
454 LUDivOrModConstant(useRegister(mod->lhs()), rhs, tempFixed(edx));
455 if (mod->fallible()) {
456 assignSnapshot(lir, mod->bailoutKind());
457 }
458 defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
459 }
460 return;
461 }
462
463 LUDivOrMod* lir = new (alloc()) LUDivOrMod(
464 useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
465 if (mod->fallible()) {
466 assignSnapshot(lir, mod->bailoutKind());
467 }
468 defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
469 }
470
lowerUrshD(MUrsh * mir)471 void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) {
472 MDefinition* lhs = mir->lhs();
473 MDefinition* rhs = mir->rhs();
474
475 MOZ_ASSERT(lhs->type() == MIRType::Int32);
476 MOZ_ASSERT(rhs->type() == MIRType::Int32);
477 MOZ_ASSERT(mir->type() == MIRType::Double);
478
479 #ifdef JS_CODEGEN_X64
480 static_assert(ecx == rcx);
481 #endif
482
483 // Without BMI2, x86 can only shift by ecx.
484 LUse lhsUse = useRegisterAtStart(lhs);
485 LAllocation rhsAlloc;
486 if (rhs->isConstant()) {
487 rhsAlloc = useOrConstant(rhs);
488 } else if (Assembler::HasBMI2()) {
489 rhsAlloc = useRegister(rhs);
490 } else {
491 rhsAlloc = useFixed(rhs, ecx);
492 }
493
494 LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
495 define(lir, mir);
496 }
497
lowerPowOfTwoI(MPow * mir)498 void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) {
499 int32_t base = mir->input()->toConstant()->toInt32();
500 MDefinition* power = mir->power();
501
502 // Shift operand should be in register ecx, unless BMI2 is available.
503 // x86 can't shift a non-ecx register.
504 LAllocation powerAlloc =
505 Assembler::HasBMI2() ? useRegister(power) : useFixed(power, ecx);
506 auto* lir = new (alloc()) LPowOfTwoI(base, powerAlloc);
507 assignSnapshot(lir, mir->bailoutKind());
508 define(lir, mir);
509 }
510
lowerBigIntLsh(MBigIntLsh * ins)511 void LIRGeneratorX86Shared::lowerBigIntLsh(MBigIntLsh* ins) {
512 // Shift operand should be in register ecx, unless BMI2 is available.
513 // x86 can't shift a non-ecx register.
514 LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
515 auto* lir =
516 new (alloc()) LBigIntLsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
517 temp(), shiftAlloc, temp());
518 define(lir, ins);
519 assignSafepoint(lir, ins);
520 }
521
lowerBigIntRsh(MBigIntRsh * ins)522 void LIRGeneratorX86Shared::lowerBigIntRsh(MBigIntRsh* ins) {
523 // Shift operand should be in register ecx, unless BMI2 is available.
524 // x86 can't shift a non-ecx register.
525 LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
526 auto* lir =
527 new (alloc()) LBigIntRsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
528 temp(), shiftAlloc, temp());
529 define(lir, ins);
530 assignSafepoint(lir, ins);
531 }
532
lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32 * ins)533 void LIRGeneratorX86Shared::lowerWasmBuiltinTruncateToInt32(
534 MWasmBuiltinTruncateToInt32* ins) {
535 MDefinition* opd = ins->input();
536 MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
537
538 LDefinition maybeTemp =
539 Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
540 if (opd->type() == MIRType::Double) {
541 define(new (alloc()) LWasmBuiltinTruncateDToInt32(
542 useRegister(opd), useFixed(ins->tls(), WasmTlsReg), maybeTemp),
543 ins);
544 return;
545 }
546
547 define(new (alloc()) LWasmBuiltinTruncateFToInt32(
548 useRegister(opd), useFixed(ins->tls(), WasmTlsReg), maybeTemp),
549 ins);
550 }
551
lowerTruncateDToInt32(MTruncateToInt32 * ins)552 void LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) {
553 MDefinition* opd = ins->input();
554 MOZ_ASSERT(opd->type() == MIRType::Double);
555
556 LDefinition maybeTemp =
557 Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
558 define(new (alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
559 }
560
lowerTruncateFToInt32(MTruncateToInt32 * ins)561 void LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) {
562 MDefinition* opd = ins->input();
563 MOZ_ASSERT(opd->type() == MIRType::Float32);
564
565 LDefinition maybeTemp =
566 Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
567 define(new (alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
568 }
569
lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement * ins,bool useI386ByteRegisters)570 void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(
571 MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
572 MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
573 MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
574
575 MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
576 MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
577
578 const LUse elements = useRegister(ins->elements());
579 const LAllocation index =
580 useRegisterOrIndexConstant(ins->index(), ins->arrayType());
581
582 // If the target is a floating register then we need a temp at the
583 // lower level; that temp must be eax.
584 //
585 // Otherwise the target (if used) is an integer register, which
586 // must be eax. If the target is not used the machine code will
587 // still clobber eax, so just pretend it's used.
588 //
589 // oldval must be in a register.
590 //
591 // newval must be in a register. If the source is a byte array
592 // then newval must be a register that has a byte size: on x86
593 // this must be ebx, ecx, or edx (eax is taken for the output).
594 //
595 // Bug #1077036 describes some further optimization opportunities.
596
597 bool fixedOutput = false;
598 LDefinition tempDef = LDefinition::BogusTemp();
599 LAllocation newval;
600 if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
601 tempDef = tempFixed(eax);
602 newval = useRegister(ins->newval());
603 } else {
604 fixedOutput = true;
605 if (useI386ByteRegisters && ins->isByteArray()) {
606 newval = useFixed(ins->newval(), ebx);
607 } else {
608 newval = useRegister(ins->newval());
609 }
610 }
611
612 const LAllocation oldval = useRegister(ins->oldval());
613
614 LCompareExchangeTypedArrayElement* lir =
615 new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
616 newval, tempDef);
617
618 if (fixedOutput) {
619 defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
620 } else {
621 define(lir, ins);
622 }
623 }
624
lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement * ins,bool useI386ByteRegisters)625 void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(
626 MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
627 MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
628
629 MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
630 MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
631
632 const LUse elements = useRegister(ins->elements());
633 const LAllocation index =
634 useRegisterOrIndexConstant(ins->index(), ins->arrayType());
635 const LAllocation value = useRegister(ins->value());
636
637 // The underlying instruction is XCHG, which can operate on any
638 // register.
639 //
640 // If the target is a floating register (for Uint32) then we need
641 // a temp into which to exchange.
642 //
643 // If the source is a byte array then we need a register that has
644 // a byte size; in this case -- on x86 only -- pin the output to
645 // an appropriate register and use that as a temp in the back-end.
646
647 LDefinition tempDef = LDefinition::BogusTemp();
648 if (ins->arrayType() == Scalar::Uint32) {
649 MOZ_ASSERT(ins->type() == MIRType::Double);
650 tempDef = temp();
651 }
652
653 LAtomicExchangeTypedArrayElement* lir = new (alloc())
654 LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
655
656 if (useI386ByteRegisters && ins->isByteArray()) {
657 defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
658 } else {
659 define(lir, ins);
660 }
661 }
662
lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop * ins,bool useI386ByteRegisters)663 void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(
664 MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) {
665 MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
666 MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
667 MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
668
669 MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
670 MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
671
672 const LUse elements = useRegister(ins->elements());
673 const LAllocation index =
674 useRegisterOrIndexConstant(ins->index(), ins->arrayType());
675
676 // Case 1: the result of the operation is not used.
677 //
678 // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
679 // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.
680
681 if (ins->isForEffect()) {
682 LAllocation value;
683 if (useI386ByteRegisters && ins->isByteArray() &&
684 !ins->value()->isConstant()) {
685 value = useFixed(ins->value(), ebx);
686 } else {
687 value = useRegisterOrConstant(ins->value());
688 }
689
690 LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
691 LAtomicTypedArrayElementBinopForEffect(elements, index, value);
692
693 add(lir, ins);
694 return;
695 }
696
697 // Case 2: the result of the operation is used.
698 //
699 // For ADD and SUB we'll use XADD:
700 //
701 // movl src, output
702 // lock xaddl output, mem
703 //
704 // For the 8-bit variants XADD needs a byte register for the output.
705 //
706 // For AND/OR/XOR we need to use a CMPXCHG loop:
707 //
708 // movl *mem, eax
709 // L: mov eax, temp
710 // andl src, temp
711 // lock cmpxchg temp, mem ; reads eax also
712 // jnz L
713 // ; result in eax
714 //
715 // Note the placement of L, cmpxchg will update eax with *mem if
716 // *mem does not have the expected value, so reloading it at the
717 // top of the loop would be redundant.
718 //
719 // If the array is not a uint32 array then:
720 // - eax should be the output (one result of the cmpxchg)
721 // - there is a temp, which must have a byte register if
722 // the array has 1-byte elements elements
723 //
724 // If the array is a uint32 array then:
725 // - eax is the first temp
726 // - we also need a second temp
727 //
728 // There are optimization opportunities:
729 // - better register allocation in the x86 8-bit case, Bug #1077036.
730
731 bool bitOp = !(ins->operation() == AtomicFetchAddOp ||
732 ins->operation() == AtomicFetchSubOp);
733 bool fixedOutput = true;
734 bool reuseInput = false;
735 LDefinition tempDef1 = LDefinition::BogusTemp();
736 LDefinition tempDef2 = LDefinition::BogusTemp();
737 LAllocation value;
738
739 if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
740 value = useRegisterOrConstant(ins->value());
741 fixedOutput = false;
742 if (bitOp) {
743 tempDef1 = tempFixed(eax);
744 tempDef2 = temp();
745 } else {
746 tempDef1 = temp();
747 }
748 } else if (useI386ByteRegisters && ins->isByteArray()) {
749 if (ins->value()->isConstant()) {
750 value = useRegisterOrConstant(ins->value());
751 } else {
752 value = useFixed(ins->value(), ebx);
753 }
754 if (bitOp) {
755 tempDef1 = tempFixed(ecx);
756 }
757 } else if (bitOp) {
758 value = useRegisterOrConstant(ins->value());
759 tempDef1 = temp();
760 } else if (ins->value()->isConstant()) {
761 fixedOutput = false;
762 value = useRegisterOrConstant(ins->value());
763 } else {
764 fixedOutput = false;
765 reuseInput = true;
766 value = useRegisterAtStart(ins->value());
767 }
768
769 LAtomicTypedArrayElementBinop* lir = new (alloc())
770 LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
771
772 if (fixedOutput) {
773 defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
774 } else if (reuseInput) {
775 defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
776 } else {
777 define(lir, ins);
778 }
779 }
780
visitCopySign(MCopySign * ins)781 void LIRGenerator::visitCopySign(MCopySign* ins) {
782 MDefinition* lhs = ins->lhs();
783 MDefinition* rhs = ins->rhs();
784
785 MOZ_ASSERT(IsFloatingPointType(lhs->type()));
786 MOZ_ASSERT(lhs->type() == rhs->type());
787 MOZ_ASSERT(lhs->type() == ins->type());
788
789 LInstructionHelper<1, 2, 2>* lir;
790 if (lhs->type() == MIRType::Double) {
791 lir = new (alloc()) LCopySignD();
792 } else {
793 lir = new (alloc()) LCopySignF();
794 }
795
796 // As lowerForFPU, but we want rhs to be in a FP register too.
797 lir->setOperand(0, useRegisterAtStart(lhs));
798 if (!Assembler::HasAVX()) {
799 lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
800 ? useRegister(rhs)
801 : useRegisterAtStart(rhs));
802 defineReuseInput(lir, ins, 0);
803 } else {
804 lir->setOperand(1, useRegisterAtStart(rhs));
805 define(lir, ins);
806 }
807 }
808
809 // These lowerings are really x86-shared but some Masm APIs are not yet
810 // available on x86.
811
812 // Ternary and binary operators require the dest register to be the same as
813 // their first input register, leading to a pattern of useRegisterAtStart +
814 // defineReuseInput.
815
visitWasmBitselectSimd128(MWasmBitselectSimd128 * ins)816 void LIRGenerator::visitWasmBitselectSimd128(MWasmBitselectSimd128* ins) {
817 #ifdef ENABLE_WASM_SIMD
818 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
819 MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
820 MOZ_ASSERT(ins->control()->type() == MIRType::Simd128);
821 MOZ_ASSERT(ins->type() == MIRType::Simd128);
822
823 // Enforcing lhs == output avoids one setup move. We would like to also
824 // enforce merging the control with the temp (with usRegisterAtStart(control)
825 // and tempCopy()), but the register allocator ignores those constraints
826 // at present.
827
828 auto* lir = new (alloc()) LWasmBitselectSimd128(
829 useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()),
830 useRegister(ins->control()), tempSimd128());
831 defineReuseInput(lir, ins, LWasmBitselectSimd128::LhsDest);
832 #else
833 MOZ_CRASH("No SIMD");
834 #endif
835 }
836
visitWasmBinarySimd128(MWasmBinarySimd128 * ins)837 void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
838 #ifdef ENABLE_WASM_SIMD
839 MDefinition* lhs = ins->lhs();
840 MDefinition* rhs = ins->rhs();
841 wasm::SimdOp op = ins->simdOp();
842
843 MOZ_ASSERT(lhs->type() == MIRType::Simd128);
844 MOZ_ASSERT(rhs->type() == MIRType::Simd128);
845 MOZ_ASSERT(ins->type() == MIRType::Simd128);
846
847 // Note MWasmBinarySimd128::foldsTo has already specialized operations that
848 // have a constant operand, so this takes care of more general cases of
849 // reordering, see ReorderCommutative.
850 if (ins->isCommutative()) {
851 ReorderCommutative(&lhs, &rhs, ins);
852 }
853
854 // Swap operands and change operation if necessary, these are all x86/x64
855 // dependent transformations. Except where noted, this is about avoiding
856 // unnecessary moves and fixups in the code generator macros.
857 bool swap = false;
858 switch (op) {
859 case wasm::SimdOp::V128AndNot: {
860 // Code generation requires the operands to be reversed.
861 swap = true;
862 break;
863 }
864 case wasm::SimdOp::I8x16LtS: {
865 swap = true;
866 op = wasm::SimdOp::I8x16GtS;
867 break;
868 }
869 case wasm::SimdOp::I8x16GeS: {
870 swap = true;
871 op = wasm::SimdOp::I8x16LeS;
872 break;
873 }
874 case wasm::SimdOp::I16x8LtS: {
875 swap = true;
876 op = wasm::SimdOp::I16x8GtS;
877 break;
878 }
879 case wasm::SimdOp::I16x8GeS: {
880 swap = true;
881 op = wasm::SimdOp::I16x8LeS;
882 break;
883 }
884 case wasm::SimdOp::I32x4LtS: {
885 swap = true;
886 op = wasm::SimdOp::I32x4GtS;
887 break;
888 }
889 case wasm::SimdOp::I32x4GeS: {
890 swap = true;
891 op = wasm::SimdOp::I32x4LeS;
892 break;
893 }
894 case wasm::SimdOp::F32x4Gt: {
895 swap = true;
896 op = wasm::SimdOp::F32x4Lt;
897 break;
898 }
899 case wasm::SimdOp::F32x4Ge: {
900 swap = true;
901 op = wasm::SimdOp::F32x4Le;
902 break;
903 }
904 case wasm::SimdOp::F64x2Gt: {
905 swap = true;
906 op = wasm::SimdOp::F64x2Lt;
907 break;
908 }
909 case wasm::SimdOp::F64x2Ge: {
910 swap = true;
911 op = wasm::SimdOp::F64x2Le;
912 break;
913 }
914 case wasm::SimdOp::F32x4PMin:
915 case wasm::SimdOp::F32x4PMax:
916 case wasm::SimdOp::F64x2PMin:
917 case wasm::SimdOp::F64x2PMax: {
918 // Code generation requires the operations to be reversed (the rhs is the
919 // output register).
920 swap = true;
921 break;
922 }
923 default:
924 break;
925 }
926 if (swap) {
927 MDefinition* tmp = lhs;
928 lhs = rhs;
929 rhs = tmp;
930 }
931
932 // Allocate temp registers
933 LDefinition tempReg0 = LDefinition::BogusTemp();
934 LDefinition tempReg1 = LDefinition::BogusTemp();
935 switch (op) {
936 case wasm::SimdOp::I64x2Mul:
937 tempReg0 = tempSimd128();
938 break;
939 case wasm::SimdOp::F32x4Min:
940 case wasm::SimdOp::F32x4Max:
941 case wasm::SimdOp::F64x2Min:
942 case wasm::SimdOp::F64x2Max:
943 case wasm::SimdOp::I64x2LtS:
944 case wasm::SimdOp::I64x2GtS:
945 case wasm::SimdOp::I64x2LeS:
946 case wasm::SimdOp::I64x2GeS:
947 tempReg0 = tempSimd128();
948 tempReg1 = tempSimd128();
949 break;
950 default:
951 break;
952 }
953
954 // For binary ops, the Masm API always is usually (rhs, lhsDest) and requires
955 // AtStart+ReuseInput for the lhs.
956 //
957 // For a few ops, the API is actually (rhsDest, lhs) and the rules are the
958 // same but the reversed. We swapped operands above; they will be swapped
959 // again in the code generator to emit the right code.
960
961 LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
962 LAllocation rhsAlloc = willHaveDifferentLIRNodes(lhs, rhs)
963 ? useRegister(rhs)
964 : useRegisterAtStart(rhs);
965 auto* lir = new (alloc())
966 LWasmBinarySimd128(op, lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
967 defineReuseInput(lir, ins, LWasmBinarySimd128::LhsDest);
968 #else
969 MOZ_CRASH("No SIMD");
970 #endif
971 }
972
973 #ifdef ENABLE_WASM_SIMD
specializeConstantMaskAsShuffle(int8_t shuffle[16])974 bool MWasmBitselectSimd128::specializeConstantMaskAsShuffle(
975 int8_t shuffle[16]) {
976 // Optimization when control vector is a mask with all 0 or all 1 per lane.
977 // On x86, there is no bitselect, blend operations will be a win,
978 // e.g. via PBLENDVB or PBLENDW.
979 SimdConstant constant =
980 static_cast<MWasmFloatConstant*>(control())->toSimd128();
981 const SimdConstant::I8x16& bytes = constant.asInt8x16();
982 for (int8_t i = 0; i < 16; i++) {
983 if (bytes[i] == -1) {
984 shuffle[i] = i + 16;
985 } else if (bytes[i] == 0) {
986 shuffle[i] = i;
987 } else {
988 return false;
989 }
990 }
991 return true;
992 }
993 #endif
994
specializeForConstantRhs()995 bool MWasmBinarySimd128::specializeForConstantRhs() {
996 // The order follows MacroAssembler.h, generally
997 switch (simdOp()) {
998 // Operations implemented by a single native instruction where it is
999 // plausible that the rhs (after commutation if available) could be a
1000 // constant.
1001 //
1002 // Swizzle is not here because it was handled earlier in the pipeline.
1003 //
1004 // Integer compares >= and < are not here because they are not supported in
1005 // the hardware.
1006 //
1007 // Floating compares are not here because our patching machinery can't
1008 // handle them yet.
1009 //
1010 // Floating-point min and max (including pmin and pmax) are not here because
1011 // they are not straightforward to implement.
1012 case wasm::SimdOp::I8x16Add:
1013 case wasm::SimdOp::I16x8Add:
1014 case wasm::SimdOp::I32x4Add:
1015 case wasm::SimdOp::I64x2Add:
1016 case wasm::SimdOp::I8x16Sub:
1017 case wasm::SimdOp::I16x8Sub:
1018 case wasm::SimdOp::I32x4Sub:
1019 case wasm::SimdOp::I64x2Sub:
1020 case wasm::SimdOp::I16x8Mul:
1021 case wasm::SimdOp::I32x4Mul:
1022 case wasm::SimdOp::I8x16AddSaturateS:
1023 case wasm::SimdOp::I8x16AddSaturateU:
1024 case wasm::SimdOp::I16x8AddSaturateS:
1025 case wasm::SimdOp::I16x8AddSaturateU:
1026 case wasm::SimdOp::I8x16SubSaturateS:
1027 case wasm::SimdOp::I8x16SubSaturateU:
1028 case wasm::SimdOp::I16x8SubSaturateS:
1029 case wasm::SimdOp::I16x8SubSaturateU:
1030 case wasm::SimdOp::I8x16MinS:
1031 case wasm::SimdOp::I8x16MinU:
1032 case wasm::SimdOp::I16x8MinS:
1033 case wasm::SimdOp::I16x8MinU:
1034 case wasm::SimdOp::I32x4MinS:
1035 case wasm::SimdOp::I32x4MinU:
1036 case wasm::SimdOp::I8x16MaxS:
1037 case wasm::SimdOp::I8x16MaxU:
1038 case wasm::SimdOp::I16x8MaxS:
1039 case wasm::SimdOp::I16x8MaxU:
1040 case wasm::SimdOp::I32x4MaxS:
1041 case wasm::SimdOp::I32x4MaxU:
1042 case wasm::SimdOp::V128And:
1043 case wasm::SimdOp::V128Or:
1044 case wasm::SimdOp::V128Xor:
1045 case wasm::SimdOp::I8x16Eq:
1046 case wasm::SimdOp::I8x16Ne:
1047 case wasm::SimdOp::I8x16GtS:
1048 case wasm::SimdOp::I8x16LeS:
1049 case wasm::SimdOp::I16x8Eq:
1050 case wasm::SimdOp::I16x8Ne:
1051 case wasm::SimdOp::I16x8GtS:
1052 case wasm::SimdOp::I16x8LeS:
1053 case wasm::SimdOp::I32x4Eq:
1054 case wasm::SimdOp::I32x4Ne:
1055 case wasm::SimdOp::I32x4GtS:
1056 case wasm::SimdOp::I32x4LeS:
1057 case wasm::SimdOp::F32x4Eq:
1058 case wasm::SimdOp::F32x4Ne:
1059 case wasm::SimdOp::F32x4Lt:
1060 case wasm::SimdOp::F32x4Le:
1061 case wasm::SimdOp::F64x2Eq:
1062 case wasm::SimdOp::F64x2Ne:
1063 case wasm::SimdOp::F64x2Lt:
1064 case wasm::SimdOp::F64x2Le:
1065 case wasm::SimdOp::I32x4DotSI16x8:
1066 case wasm::SimdOp::F32x4Add:
1067 case wasm::SimdOp::F64x2Add:
1068 case wasm::SimdOp::F32x4Sub:
1069 case wasm::SimdOp::F64x2Sub:
1070 case wasm::SimdOp::F32x4Div:
1071 case wasm::SimdOp::F64x2Div:
1072 case wasm::SimdOp::F32x4Mul:
1073 case wasm::SimdOp::F64x2Mul:
1074 case wasm::SimdOp::I8x16NarrowSI16x8:
1075 case wasm::SimdOp::I8x16NarrowUI16x8:
1076 case wasm::SimdOp::I16x8NarrowSI32x4:
1077 case wasm::SimdOp::I16x8NarrowUI32x4:
1078 return true;
1079 default:
1080 return false;
1081 }
1082 }
1083
visitWasmBinarySimd128WithConstant(MWasmBinarySimd128WithConstant * ins)1084 void LIRGenerator::visitWasmBinarySimd128WithConstant(
1085 MWasmBinarySimd128WithConstant* ins) {
1086 #ifdef ENABLE_WASM_SIMD
1087 MDefinition* lhs = ins->lhs();
1088
1089 MOZ_ASSERT(lhs->type() == MIRType::Simd128);
1090 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1091
1092 // Always beneficial to reuse the lhs register here, see discussion in
1093 // visitWasmBinarySimd128() and also code in specializeForConstantRhs().
1094
1095 LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
1096 auto* lir =
1097 new (alloc()) LWasmBinarySimd128WithConstant(lhsDestAlloc, ins->rhs());
1098 defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsDest);
1099 #else
1100 MOZ_CRASH("No SIMD");
1101 #endif
1102 }
1103
visitWasmShiftSimd128(MWasmShiftSimd128 * ins)1104 void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
1105 #ifdef ENABLE_WASM_SIMD
1106 MDefinition* lhs = ins->lhs();
1107 MDefinition* rhs = ins->rhs();
1108
1109 MOZ_ASSERT(lhs->type() == MIRType::Simd128);
1110 MOZ_ASSERT(rhs->type() == MIRType::Int32);
1111 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1112
1113 if (rhs->isConstant()) {
1114 int32_t shiftCountMask;
1115 switch (ins->simdOp()) {
1116 case wasm::SimdOp::I8x16Shl:
1117 case wasm::SimdOp::I8x16ShrU:
1118 case wasm::SimdOp::I8x16ShrS:
1119 shiftCountMask = 7;
1120 break;
1121 case wasm::SimdOp::I16x8Shl:
1122 case wasm::SimdOp::I16x8ShrU:
1123 case wasm::SimdOp::I16x8ShrS:
1124 shiftCountMask = 15;
1125 break;
1126 case wasm::SimdOp::I32x4Shl:
1127 case wasm::SimdOp::I32x4ShrU:
1128 case wasm::SimdOp::I32x4ShrS:
1129 shiftCountMask = 31;
1130 break;
1131 case wasm::SimdOp::I64x2Shl:
1132 case wasm::SimdOp::I64x2ShrU:
1133 case wasm::SimdOp::I64x2ShrS:
1134 shiftCountMask = 63;
1135 break;
1136 default:
1137 MOZ_CRASH("Unexpected shift operation");
1138 }
1139
1140 int32_t shiftCount = rhs->toConstant()->toInt32() & shiftCountMask;
1141 if (shiftCount == shiftCountMask) {
1142 // Check if possible to apply sign replication optimization.
1143 // For some ops the input shall be reused.
1144 switch (ins->simdOp()) {
1145 case wasm::SimdOp::I8x16ShrS: {
1146 auto* lir =
1147 new (alloc()) LWasmSignReplicationSimd128(useRegister(lhs));
1148 define(lir, ins);
1149 return;
1150 }
1151 case wasm::SimdOp::I16x8ShrS:
1152 case wasm::SimdOp::I32x4ShrS:
1153 case wasm::SimdOp::I64x2ShrS: {
1154 auto* lir = new (alloc())
1155 LWasmSignReplicationSimd128(useRegisterAtStart(lhs));
1156 defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
1157 return;
1158 }
1159 default:
1160 break;
1161 }
1162 }
1163
1164 # ifdef DEBUG
1165 js::wasm::ReportSimdAnalysis("shift -> constant shift");
1166 # endif
1167 // Almost always beneficial, and never detrimental, to reuse the input if
1168 // possible.
1169 auto* lir = new (alloc())
1170 LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
1171 defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
1172 return;
1173 }
1174
1175 # ifdef DEBUG
1176 js::wasm::ReportSimdAnalysis("shift -> variable shift");
1177 # endif
1178
1179 LDefinition tempReg0 = LDefinition::BogusTemp();
1180 LDefinition tempReg1 = LDefinition::BogusTemp();
1181 switch (ins->simdOp()) {
1182 case wasm::SimdOp::I8x16Shl:
1183 case wasm::SimdOp::I8x16ShrS:
1184 case wasm::SimdOp::I8x16ShrU:
1185 case wasm::SimdOp::I64x2ShrS:
1186 tempReg0 = temp();
1187 tempReg1 = tempSimd128();
1188 break;
1189 default:
1190 tempReg0 = temp();
1191 break;
1192 }
1193
1194 // Reusing the input if possible is never detrimental.
1195 LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
1196 LAllocation rhsAlloc = useRegisterAtStart(rhs);
1197 auto* lir = new (alloc())
1198 LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
1199 defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest);
1200 #else
1201 MOZ_CRASH("No SIMD");
1202 #endif
1203 }
1204
visitWasmShuffleSimd128(MWasmShuffleSimd128 * ins)1205 void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
1206 #ifdef ENABLE_WASM_SIMD
1207 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1208 MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
1209 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1210
1211 Shuffle s = AnalyzeShuffle(ins);
1212 # ifdef DEBUG
1213 ReportShuffleSpecialization(s);
1214 # endif
1215 switch (s.opd) {
1216 case Shuffle::Operand::LEFT:
1217 case Shuffle::Operand::RIGHT: {
1218 LAllocation src;
1219 // All permute operators currently favor reusing the input register so
1220 // we're not currently exercising code paths below that do not reuse.
1221 // Those paths have been exercised in the past however and are believed
1222 // to be correct.
1223 bool useAtStartAndReuse = false;
1224 switch (*s.permuteOp) {
1225 case LWasmPermuteSimd128::MOVE:
1226 case LWasmPermuteSimd128::BROADCAST_8x16:
1227 case LWasmPermuteSimd128::BROADCAST_16x8:
1228 case LWasmPermuteSimd128::PERMUTE_8x16:
1229 case LWasmPermuteSimd128::PERMUTE_16x8:
1230 case LWasmPermuteSimd128::PERMUTE_32x4:
1231 case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
1232 case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
1233 case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
1234 useAtStartAndReuse = true;
1235 break;
1236 default:
1237 MOZ_CRASH("Unexpected operator");
1238 }
1239 if (s.opd == Shuffle::Operand::LEFT) {
1240 if (useAtStartAndReuse) {
1241 src = useRegisterAtStart(ins->lhs());
1242 } else {
1243 src = useRegister(ins->lhs());
1244 }
1245 } else {
1246 if (useAtStartAndReuse) {
1247 src = useRegisterAtStart(ins->rhs());
1248 } else {
1249 src = useRegister(ins->rhs());
1250 }
1251 }
1252 auto* lir =
1253 new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
1254 if (useAtStartAndReuse) {
1255 defineReuseInput(lir, ins, LWasmPermuteSimd128::Src);
1256 } else {
1257 define(lir, ins);
1258 }
1259 break;
1260 }
1261 case Shuffle::Operand::BOTH:
1262 case Shuffle::Operand::BOTH_SWAPPED: {
1263 LDefinition temp = LDefinition::BogusTemp();
1264 switch (*s.shuffleOp) {
1265 case LWasmShuffleSimd128::BLEND_8x16:
1266 temp = tempFixed(xmm0);
1267 break;
1268 default:
1269 break;
1270 }
1271 LAllocation lhs;
1272 LAllocation rhs;
1273 if (s.opd == Shuffle::Operand::BOTH) {
1274 lhs = useRegisterAtStart(ins->lhs());
1275 rhs = useRegister(ins->rhs());
1276 } else {
1277 lhs = useRegisterAtStart(ins->rhs());
1278 rhs = useRegister(ins->lhs());
1279 }
1280 auto* lir = new (alloc())
1281 LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
1282 defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsDest);
1283 break;
1284 }
1285 }
1286 #else
1287 MOZ_CRASH("No SIMD");
1288 #endif
1289 }
1290
visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128 * ins)1291 void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
1292 #ifdef ENABLE_WASM_SIMD
1293 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
1294 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1295
1296 // The Masm API is (rhs, lhsDest) and requires AtStart+ReuseInput for the lhs.
1297 // For type reasons, the rhs will never be the same as the lhs and is
1298 // therefore a plain Use.
1299
1300 if (ins->rhs()->type() == MIRType::Int64) {
1301 auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
1302 useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
1303 defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsDest);
1304 } else {
1305 auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
1306 useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
1307 defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsDest);
1308 }
1309 #else
1310 MOZ_CRASH("No SIMD");
1311 #endif
1312 }
1313
visitWasmScalarToSimd128(MWasmScalarToSimd128 * ins)1314 void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
1315 #ifdef ENABLE_WASM_SIMD
1316 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1317
1318 switch (ins->input()->type()) {
1319 case MIRType::Int64: {
1320 // 64-bit integer splats.
1321 // Load-and-(sign|zero)extend.
1322 auto* lir = new (alloc())
1323 LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
1324 define(lir, ins);
1325 break;
1326 }
1327 case MIRType::Float32:
1328 case MIRType::Double: {
1329 // Floating-point splats.
1330 // Ideally we save a move on SSE systems by reusing the input register,
1331 // but since the input and output register types differ, we can't.
1332 auto* lir =
1333 new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
1334 define(lir, ins);
1335 break;
1336 }
1337 default: {
1338 // 32-bit integer splats.
1339 auto* lir =
1340 new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
1341 define(lir, ins);
1342 break;
1343 }
1344 }
1345 #else
1346 MOZ_CRASH("No SIMD");
1347 #endif
1348 }
1349
visitWasmUnarySimd128(MWasmUnarySimd128 * ins)1350 void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
1351 #ifdef ENABLE_WASM_SIMD
1352 MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
1353 MOZ_ASSERT(ins->type() == MIRType::Simd128);
1354
1355 bool useAtStart = false;
1356 bool reuseInput = false;
1357 LDefinition tempReg = LDefinition::BogusTemp();
1358 switch (ins->simdOp()) {
1359 case wasm::SimdOp::I8x16Neg:
1360 case wasm::SimdOp::I16x8Neg:
1361 case wasm::SimdOp::I32x4Neg:
1362 case wasm::SimdOp::I64x2Neg:
1363 // Prefer src != dest to avoid an unconditional src->temp move.
1364 MOZ_ASSERT(!useAtStart && !reuseInput);
1365 break;
1366 case wasm::SimdOp::F32x4Neg:
1367 case wasm::SimdOp::F64x2Neg:
1368 case wasm::SimdOp::F32x4Abs:
1369 case wasm::SimdOp::F64x2Abs:
1370 case wasm::SimdOp::V128Not:
1371 case wasm::SimdOp::F32x4Sqrt:
1372 case wasm::SimdOp::F64x2Sqrt:
1373 case wasm::SimdOp::I8x16Abs:
1374 case wasm::SimdOp::I16x8Abs:
1375 case wasm::SimdOp::I32x4Abs:
1376 case wasm::SimdOp::I64x2Abs:
1377 case wasm::SimdOp::I32x4TruncSSatF32x4:
1378 case wasm::SimdOp::F32x4ConvertUI32x4:
1379 case wasm::SimdOp::I16x8ExtAddPairwiseI8x16S:
1380 case wasm::SimdOp::I16x8ExtAddPairwiseI8x16U:
1381 case wasm::SimdOp::I32x4ExtAddPairwiseI16x8S:
1382 case wasm::SimdOp::I32x4ExtAddPairwiseI16x8U:
1383 // Prefer src == dest to avoid an unconditional src->dest move.
1384 useAtStart = true;
1385 reuseInput = true;
1386 break;
1387 case wasm::SimdOp::I32x4TruncUSatF32x4:
1388 case wasm::SimdOp::I32x4TruncSatF64x2SZero:
1389 case wasm::SimdOp::I32x4TruncSatF64x2UZero:
1390 case wasm::SimdOp::I8x16Popcnt:
1391 tempReg = tempSimd128();
1392 // Prefer src == dest to avoid an unconditional src->dest move.
1393 useAtStart = true;
1394 reuseInput = true;
1395 break;
1396 case wasm::SimdOp::I16x8WidenLowSI8x16:
1397 case wasm::SimdOp::I16x8WidenHighSI8x16:
1398 case wasm::SimdOp::I16x8WidenLowUI8x16:
1399 case wasm::SimdOp::I16x8WidenHighUI8x16:
1400 case wasm::SimdOp::I32x4WidenLowSI16x8:
1401 case wasm::SimdOp::I32x4WidenHighSI16x8:
1402 case wasm::SimdOp::I32x4WidenLowUI16x8:
1403 case wasm::SimdOp::I32x4WidenHighUI16x8:
1404 case wasm::SimdOp::I64x2WidenLowSI32x4:
1405 case wasm::SimdOp::I64x2WidenHighSI32x4:
1406 case wasm::SimdOp::I64x2WidenLowUI32x4:
1407 case wasm::SimdOp::I64x2WidenHighUI32x4:
1408 case wasm::SimdOp::F32x4ConvertSI32x4:
1409 case wasm::SimdOp::F32x4Ceil:
1410 case wasm::SimdOp::F32x4Floor:
1411 case wasm::SimdOp::F32x4Trunc:
1412 case wasm::SimdOp::F32x4Nearest:
1413 case wasm::SimdOp::F64x2Ceil:
1414 case wasm::SimdOp::F64x2Floor:
1415 case wasm::SimdOp::F64x2Trunc:
1416 case wasm::SimdOp::F64x2Nearest:
1417 case wasm::SimdOp::F32x4DemoteF64x2Zero:
1418 case wasm::SimdOp::F64x2PromoteLowF32x4:
1419 case wasm::SimdOp::F64x2ConvertLowI32x4S:
1420 case wasm::SimdOp::F64x2ConvertLowI32x4U:
1421 // Prefer src == dest to exert the lowest register pressure on the
1422 // surrounding code.
1423 useAtStart = true;
1424 MOZ_ASSERT(!reuseInput);
1425 break;
1426 default:
1427 MOZ_CRASH("Unary SimdOp not implemented");
1428 }
1429
1430 LUse inputUse =
1431 useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input());
1432 LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg);
1433 if (reuseInput) {
1434 defineReuseInput(lir, ins, LWasmUnarySimd128::Src);
1435 } else {
1436 define(lir, ins);
1437 }
1438 #else
1439 MOZ_CRASH("No SIMD");
1440 #endif
1441 }
1442
visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128 * ins)1443 void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
1444 #ifdef ENABLE_WASM_SIMD
1445 LUse base = useRegisterAtStart(ins->base());
1446 LUse inputUse = useRegisterAtStart(ins->value());
1447 LAllocation memoryBase = ins->hasMemoryBase()
1448 ? useRegisterAtStart(ins->memoryBase())
1449 : LAllocation();
1450 LWasmLoadLaneSimd128* lir = new (alloc()) LWasmLoadLaneSimd128(
1451 base, inputUse, LDefinition::BogusTemp(), memoryBase);
1452 defineReuseInput(lir, ins, LWasmLoadLaneSimd128::Src);
1453 #else
1454 MOZ_CRASH("No SIMD");
1455 #endif
1456 }
1457
visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128 * ins)1458 void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
1459 #ifdef ENABLE_WASM_SIMD
1460 LUse base = useRegisterAtStart(ins->base());
1461 LUse input = useRegisterAtStart(ins->value());
1462 LAllocation memoryBase = ins->hasMemoryBase()
1463 ? useRegisterAtStart(ins->memoryBase())
1464 : LAllocation();
1465 LWasmStoreLaneSimd128* lir = new (alloc())
1466 LWasmStoreLaneSimd128(base, input, LDefinition::BogusTemp(), memoryBase);
1467 add(lir, ins);
1468 #else
1469 MOZ_CRASH("No SIMD");
1470 #endif
1471 }
1472
1473 #ifdef ENABLE_WASM_SIMD
1474
canFoldReduceSimd128AndBranch(wasm::SimdOp op)1475 bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
1476 switch (op) {
1477 case wasm::SimdOp::V128AnyTrue:
1478 case wasm::SimdOp::I8x16AllTrue:
1479 case wasm::SimdOp::I16x8AllTrue:
1480 case wasm::SimdOp::I32x4AllTrue:
1481 case wasm::SimdOp::I64x2AllTrue:
1482 case wasm::SimdOp::I16x8Bitmask:
1483 return true;
1484 default:
1485 return false;
1486 }
1487 }
1488
canEmitWasmReduceSimd128AtUses(MWasmReduceSimd128 * ins)1489 bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses(
1490 MWasmReduceSimd128* ins) {
1491 if (!ins->canEmitAtUses()) {
1492 return false;
1493 }
1494 // Only specific ops generating int32.
1495 if (ins->type() != MIRType::Int32) {
1496 return false;
1497 }
1498 if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
1499 return false;
1500 }
1501 // If never used then defer (it will be removed).
1502 MUseIterator iter(ins->usesBegin());
1503 if (iter == ins->usesEnd()) {
1504 return true;
1505 }
1506 // We require an MTest consumer.
1507 MNode* node = iter->consumer();
1508 if (!node->isDefinition() || !node->toDefinition()->isTest()) {
1509 return false;
1510 }
1511 // Defer only if there's only one use.
1512 iter++;
1513 return iter == ins->usesEnd();
1514 }
1515
1516 #endif // ENABLE_WASM_SIMD
1517
visitWasmReduceSimd128(MWasmReduceSimd128 * ins)1518 void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
1519 #ifdef ENABLE_WASM_SIMD
1520 if (canEmitWasmReduceSimd128AtUses(ins)) {
1521 emitAtUses(ins);
1522 return;
1523 }
1524
1525 // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
1526 // useRegisterAtStart:
1527 //
1528 // - In most cases, the input type differs from the output type, so there's no
1529 // conflict and it doesn't really matter.
1530 //
1531 // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
1532 // code being generated.
1533 //
1534 // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
1535 // to be targeted lowers register pressure if it's the last use of the
1536 // input.
1537
1538 if (ins->type() == MIRType::Int64) {
1539 auto* lir = new (alloc())
1540 LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
1541 defineInt64(lir, ins);
1542 } else {
1543 // Ideally we would reuse the input register for floating extract_lane if
1544 // the lane is zero, but constraints in the register allocator require the
1545 // input and output register types to be the same.
1546 auto* lir = new (alloc()) LWasmReduceSimd128(
1547 useRegisterAtStart(ins->input()), LDefinition::BogusTemp());
1548 define(lir, ins);
1549 }
1550 #else
1551 MOZ_CRASH("No SIMD");
1552 #endif
1553 }
1554