1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "jit/x86-shared/Lowering-x86-shared.h"
8 
9 #include "mozilla/MathAlgorithms.h"
10 
11 #include "jit/MIR.h"
12 
13 #include "jit/shared/Lowering-shared-inl.h"
14 
15 using namespace js;
16 using namespace js::jit;
17 
18 using mozilla::Abs;
19 using mozilla::FloorLog2;
20 using mozilla::Swap;
21 
22 LTableSwitch*
newLTableSwitch(const LAllocation & in,const LDefinition & inputCopy,MTableSwitch * tableswitch)23 LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
24                                        MTableSwitch* tableswitch)
25 {
26     return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
27 }
28 
29 LTableSwitchV*
newLTableSwitchV(MTableSwitch * tableswitch)30 LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch)
31 {
32     return new(alloc()) LTableSwitchV(temp(), tempDouble(), temp(), tableswitch);
33 }
34 
35 void
visitGuardShape(MGuardShape * ins)36 LIRGeneratorX86Shared::visitGuardShape(MGuardShape* ins)
37 {
38     MOZ_ASSERT(ins->obj()->type() == MIRType_Object);
39 
40     LGuardShape* guard = new(alloc()) LGuardShape(useRegisterAtStart(ins->obj()));
41     assignSnapshot(guard, ins->bailoutKind());
42     add(guard, ins);
43     redefine(ins, ins->obj());
44 }
45 
46 void
visitGuardObjectGroup(MGuardObjectGroup * ins)47 LIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup* ins)
48 {
49     MOZ_ASSERT(ins->obj()->type() == MIRType_Object);
50 
51     LGuardObjectGroup* guard = new(alloc()) LGuardObjectGroup(useRegisterAtStart(ins->obj()));
52     assignSnapshot(guard, ins->bailoutKind());
53     add(guard, ins);
54     redefine(ins, ins->obj());
55 }
56 
57 void
visitPowHalf(MPowHalf * ins)58 LIRGeneratorX86Shared::visitPowHalf(MPowHalf* ins)
59 {
60     MDefinition* input = ins->input();
61     MOZ_ASSERT(input->type() == MIRType_Double);
62     LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input));
63     define(lir, ins);
64 }
65 
66 void
lowerForShift(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)67 LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
68                                      MDefinition* lhs, MDefinition* rhs)
69 {
70     ins->setOperand(0, useRegisterAtStart(lhs));
71 
72     // shift operator should be constant or in register ecx
73     // x86 can't shift a non-ecx register
74     if (rhs->isConstant())
75         ins->setOperand(1, useOrConstantAtStart(rhs));
76     else
77         ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx));
78 
79     defineReuseInput(ins, mir, 0);
80 }
81 
82 void
lowerForALU(LInstructionHelper<1,1,0> * ins,MDefinition * mir,MDefinition * input)83 LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
84                                    MDefinition* input)
85 {
86     ins->setOperand(0, useRegisterAtStart(input));
87     defineReuseInput(ins, mir, 0);
88 }
89 
90 void
lowerForALU(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)91 LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
92                                    MDefinition* lhs, MDefinition* rhs)
93 {
94     ins->setOperand(0, useRegisterAtStart(lhs));
95     ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
96     defineReuseInput(ins, mir, 0);
97 }
98 
99 template<size_t Temps>
100 void
lowerForFPU(LInstructionHelper<1,2,Temps> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)101 LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
102 {
103     // Without AVX, we'll need to use the x86 encodings where one of the
104     // inputs must be the same location as the output.
105     //
106     // :TODO: (Bug 1132894) Note, we might have to allocate a different
107     // registers if the MIRType of the reused operand differs from the MIRType
108     // of returned value, as MUST_REUSE_INPUT is not yet capable of reusing the
109     // same register but with a different register type.
110     if (!Assembler::HasAVX() && mir->type() == lhs->type()) {
111         ins->setOperand(0, useRegisterAtStart(lhs));
112         ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
113         defineReuseInput(ins, mir, 0);
114     } else {
115         ins->setOperand(0, useRegisterAtStart(lhs));
116         ins->setOperand(1, useAtStart(rhs));
117         define(ins, mir);
118     }
119 }
120 
121 template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
122                                                  MDefinition* lhs, MDefinition* rhs);
123 template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir,
124                                                  MDefinition* lhs, MDefinition* rhs);
125 
126 void
lowerForCompIx4(LSimdBinaryCompIx4 * ins,MSimdBinaryComp * mir,MDefinition * lhs,MDefinition * rhs)127 LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
128 {
129     lowerForALU(ins, mir, lhs, rhs);
130 }
131 
132 void
lowerForCompFx4(LSimdBinaryCompFx4 * ins,MSimdBinaryComp * mir,MDefinition * lhs,MDefinition * rhs)133 LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
134 {
135     // Swap the operands around to fit the instructions that x86 actually has.
136     // We do this here, before register allocation, so that we don't need
137     // temporaries and copying afterwards.
138     switch (mir->operation()) {
139       case MSimdBinaryComp::greaterThan:
140       case MSimdBinaryComp::greaterThanOrEqual:
141         mir->reverse();
142         Swap(lhs, rhs);
143         break;
144       default:
145         break;
146     }
147 
148     lowerForFPU(ins, mir, lhs, rhs);
149 }
150 
151 void
lowerForBitAndAndBranch(LBitAndAndBranch * baab,MInstruction * mir,MDefinition * lhs,MDefinition * rhs)152 LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
153                                                MDefinition* lhs, MDefinition* rhs)
154 {
155     baab->setOperand(0, useRegisterAtStart(lhs));
156     baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
157     add(baab, mir);
158 }
159 
160 void
lowerMulI(MMul * mul,MDefinition * lhs,MDefinition * rhs)161 LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs)
162 {
163     // Note: If we need a negative zero check, lhs is used twice.
164     LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
165     LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy);
166     if (mul->fallible())
167         assignSnapshot(lir, Bailout_DoubleOutput);
168     defineReuseInput(lir, mul, 0);
169 }
170 
171 void
lowerDivI(MDiv * div)172 LIRGeneratorX86Shared::lowerDivI(MDiv* div)
173 {
174     if (div->isUnsigned()) {
175         lowerUDiv(div);
176         return;
177     }
178 
179     // Division instructions are slow. Division by constant denominators can be
180     // rewritten to use other instructions.
181     if (div->rhs()->isConstant()) {
182         int32_t rhs = div->rhs()->toConstant()->value().toInt32();
183 
184         // Division by powers of two can be done by shifting, and division by
185         // other numbers can be done by a reciprocal multiplication technique.
186         int32_t shift = FloorLog2(Abs(rhs));
187         if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
188             LAllocation lhs = useRegisterAtStart(div->lhs());
189             LDivPowTwoI* lir;
190             if (!div->canBeNegativeDividend()) {
191                 // Numerator is unsigned, so does not need adjusting.
192                 lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
193             } else {
194                 // Numerator is signed, and needs adjusting, and an extra
195                 // lhs copy register is needed.
196                 lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
197             }
198             if (div->fallible())
199                 assignSnapshot(lir, Bailout_DoubleOutput);
200             defineReuseInput(lir, div, 0);
201             return;
202         }
203         if (rhs != 0) {
204             LDivOrModConstantI* lir;
205             lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
206             if (div->fallible())
207                 assignSnapshot(lir, Bailout_DoubleOutput);
208             defineFixed(lir, div, LAllocation(AnyRegister(edx)));
209             return;
210         }
211     }
212 
213     LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()),
214                                     tempFixed(edx));
215     if (div->fallible())
216         assignSnapshot(lir, Bailout_DoubleOutput);
217     defineFixed(lir, div, LAllocation(AnyRegister(eax)));
218 }
219 
220 void
lowerModI(MMod * mod)221 LIRGeneratorX86Shared::lowerModI(MMod* mod)
222 {
223     if (mod->isUnsigned()) {
224         lowerUMod(mod);
225         return;
226     }
227 
228     if (mod->rhs()->isConstant()) {
229         int32_t rhs = mod->rhs()->toConstant()->value().toInt32();
230         int32_t shift = FloorLog2(Abs(rhs));
231         if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
232             LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
233             if (mod->fallible())
234                 assignSnapshot(lir, Bailout_DoubleOutput);
235             defineReuseInput(lir, mod, 0);
236             return;
237         }
238         if (rhs != 0) {
239             LDivOrModConstantI* lir;
240             lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
241             if (mod->fallible())
242                 assignSnapshot(lir, Bailout_DoubleOutput);
243             defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
244             return;
245         }
246     }
247 
248     LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()),
249                                     useRegister(mod->rhs()),
250                                     tempFixed(eax));
251     if (mod->fallible())
252         assignSnapshot(lir, Bailout_DoubleOutput);
253     defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
254 }
255 
256 void
visitAsmJSNeg(MAsmJSNeg * ins)257 LIRGeneratorX86Shared::visitAsmJSNeg(MAsmJSNeg* ins)
258 {
259     switch (ins->type()) {
260       case MIRType_Int32:
261         defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0);
262         break;
263       case MIRType_Float32:
264         defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0);
265         break;
266       case MIRType_Double:
267         defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0);
268         break;
269       default:
270         MOZ_CRASH();
271     }
272 }
273 
274 void
lowerUDiv(MDiv * div)275 LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
276 {
277     if (div->rhs()->isConstant()) {
278         uint32_t rhs = div->rhs()->toConstant()->value().toInt32();
279         int32_t shift = FloorLog2(rhs);
280 
281         LAllocation lhs = useRegisterAtStart(div->lhs());
282         if (rhs != 0 && uint32_t(1) << shift == rhs) {
283             LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false);
284             if (div->fallible())
285                 assignSnapshot(lir, Bailout_DoubleOutput);
286             defineReuseInput(lir, div, 0);
287         } else {
288             LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()),
289                                                                       rhs, tempFixed(eax));
290             if (div->fallible())
291                 assignSnapshot(lir, Bailout_DoubleOutput);
292             defineFixed(lir, div, LAllocation(AnyRegister(edx)));
293         }
294         return;
295     }
296 
297     LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()),
298                                               useRegister(div->rhs()),
299                                               tempFixed(edx));
300     if (div->fallible())
301         assignSnapshot(lir, Bailout_DoubleOutput);
302     defineFixed(lir, div, LAllocation(AnyRegister(eax)));
303 }
304 
305 void
lowerUMod(MMod * mod)306 LIRGeneratorX86Shared::lowerUMod(MMod* mod)
307 {
308     if (mod->rhs()->isConstant()) {
309         uint32_t rhs = mod->rhs()->toConstant()->value().toInt32();
310         int32_t shift = FloorLog2(rhs);
311 
312         if (rhs != 0 && uint32_t(1) << shift == rhs) {
313             LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
314             if (mod->fallible())
315                 assignSnapshot(lir, Bailout_DoubleOutput);
316             defineReuseInput(lir, mod, 0);
317         } else {
318             LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()),
319                                                                       rhs, tempFixed(edx));
320             if (mod->fallible())
321                 assignSnapshot(lir, Bailout_DoubleOutput);
322             defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
323         }
324         return;
325     }
326 
327     LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()),
328                                               useRegister(mod->rhs()),
329                                               tempFixed(eax));
330     if (mod->fallible())
331         assignSnapshot(lir, Bailout_DoubleOutput);
332     defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
333 }
334 
335 void
lowerUrshD(MUrsh * mir)336 LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir)
337 {
338     MDefinition* lhs = mir->lhs();
339     MDefinition* rhs = mir->rhs();
340 
341     MOZ_ASSERT(lhs->type() == MIRType_Int32);
342     MOZ_ASSERT(rhs->type() == MIRType_Int32);
343     MOZ_ASSERT(mir->type() == MIRType_Double);
344 
345 #ifdef JS_CODEGEN_X64
346     MOZ_ASSERT(ecx == rcx);
347 #endif
348 
349     LUse lhsUse = useRegisterAtStart(lhs);
350     LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx);
351 
352     LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
353     define(lir, mir);
354 }
355 
356 void
lowerTruncateDToInt32(MTruncateToInt32 * ins)357 LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins)
358 {
359     MDefinition* opd = ins->input();
360     MOZ_ASSERT(opd->type() == MIRType_Double);
361 
362     LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
363     define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
364 }
365 
366 void
lowerTruncateFToInt32(MTruncateToInt32 * ins)367 LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins)
368 {
369     MDefinition* opd = ins->input();
370     MOZ_ASSERT(opd->type() == MIRType_Float32);
371 
372     LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
373     define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
374 }
375 
376 void
lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement * ins,bool useI386ByteRegisters)377 LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins,
378                                                              bool useI386ByteRegisters)
379 {
380     MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
381     MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
382 
383     MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
384     MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
385 
386     const LUse elements = useRegister(ins->elements());
387     const LAllocation index = useRegisterOrConstant(ins->index());
388 
389     // If the target is a floating register then we need a temp at the
390     // lower level; that temp must be eax.
391     //
392     // Otherwise the target (if used) is an integer register, which
393     // must be eax.  If the target is not used the machine code will
394     // still clobber eax, so just pretend it's used.
395     //
396     // oldval must be in a register.
397     //
398     // newval must be in a register.  If the source is a byte array
399     // then newval must be a register that has a byte size: on x86
400     // this must be ebx, ecx, or edx (eax is taken for the output).
401     //
402     // Bug #1077036 describes some further optimization opportunities.
403 
404     bool fixedOutput = false;
405     LDefinition tempDef = LDefinition::BogusTemp();
406     LAllocation newval;
407     if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
408         tempDef = tempFixed(eax);
409         newval = useRegister(ins->newval());
410     } else {
411         fixedOutput = true;
412         if (useI386ByteRegisters && ins->isByteArray())
413             newval = useFixed(ins->newval(), ebx);
414         else
415             newval = useRegister(ins->newval());
416     }
417 
418     const LAllocation oldval = useRegister(ins->oldval());
419 
420     LCompareExchangeTypedArrayElement* lir =
421         new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef);
422 
423     if (fixedOutput)
424         defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
425     else
426         define(lir, ins);
427 }
428 
429 void
lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement * ins,bool useI386ByteRegisters)430 LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins,
431                                                             bool useI386ByteRegisters)
432 {
433     MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
434 
435     MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
436     MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
437 
438     const LUse elements = useRegister(ins->elements());
439     const LAllocation index = useRegisterOrConstant(ins->index());
440     const LAllocation value = useRegister(ins->value());
441 
442     // The underlying instruction is XCHG, which can operate on any
443     // register.
444     //
445     // If the target is a floating register (for Uint32) then we need
446     // a temp into which to exchange.
447     //
448     // If the source is a byte array then we need a register that has
449     // a byte size; in this case -- on x86 only -- pin the output to
450     // an appropriate register and use that as a temp in the back-end.
451 
452     LDefinition tempDef = LDefinition::BogusTemp();
453     if (ins->arrayType() == Scalar::Uint32) {
454         // This restriction is bug 1077305.
455         MOZ_ASSERT(ins->type() == MIRType_Double);
456         tempDef = temp();
457     }
458 
459     LAtomicExchangeTypedArrayElement* lir =
460         new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
461 
462     if (useI386ByteRegisters && ins->isByteArray())
463         defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
464     else
465         define(lir, ins);
466 }
467 
468 void
lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop * ins,bool useI386ByteRegisters)469 LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins,
470                                                          bool useI386ByteRegisters)
471 {
472     MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
473     MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
474     MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
475 
476     MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
477     MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
478 
479     const LUse elements = useRegister(ins->elements());
480     const LAllocation index = useRegisterOrConstant(ins->index());
481 
482     // Case 1: the result of the operation is not used.
483     //
484     // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
485     // LOCK OR, or LOCK XOR.  We can do this even for the Uint32 case.
486 
487     if (!ins->hasUses()) {
488         LAllocation value;
489         if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant())
490             value = useFixed(ins->value(), ebx);
491         else
492             value = useRegisterOrConstant(ins->value());
493 
494         LAtomicTypedArrayElementBinopForEffect* lir =
495             new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);
496 
497         add(lir, ins);
498         return;
499     }
500 
501     // Case 2: the result of the operation is used.
502     //
503     // For ADD and SUB we'll use XADD:
504     //
505     //    movl       src, output
506     //    lock xaddl output, mem
507     //
508     // For the 8-bit variants XADD needs a byte register for the output.
509     //
510     // For AND/OR/XOR we need to use a CMPXCHG loop:
511     //
512     //    movl          *mem, eax
513     // L: mov           eax, temp
514     //    andl          src, temp
515     //    lock cmpxchg  temp, mem  ; reads eax also
516     //    jnz           L
517     //    ; result in eax
518     //
519     // Note the placement of L, cmpxchg will update eax with *mem if
520     // *mem does not have the expected value, so reloading it at the
521     // top of the loop would be redundant.
522     //
523     // If the array is not a uint32 array then:
524     //  - eax should be the output (one result of the cmpxchg)
525     //  - there is a temp, which must have a byte register if
526     //    the array has 1-byte elements elements
527     //
528     // If the array is a uint32 array then:
529     //  - eax is the first temp
530     //  - we also need a second temp
531     //
532     // There are optimization opportunities:
533     //  - better register allocation in the x86 8-bit case, Bug #1077036.
534 
535     bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
536     bool fixedOutput = true;
537     bool reuseInput = false;
538     LDefinition tempDef1 = LDefinition::BogusTemp();
539     LDefinition tempDef2 = LDefinition::BogusTemp();
540     LAllocation value;
541 
542     if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
543         value = useRegisterOrConstant(ins->value());
544         fixedOutput = false;
545         if (bitOp) {
546             tempDef1 = tempFixed(eax);
547             tempDef2 = temp();
548         } else {
549             tempDef1 = temp();
550         }
551     } else if (useI386ByteRegisters && ins->isByteArray()) {
552         if (ins->value()->isConstant())
553             value = useRegisterOrConstant(ins->value());
554         else
555             value = useFixed(ins->value(), ebx);
556         if (bitOp)
557             tempDef1 = tempFixed(ecx);
558     } else if (bitOp) {
559         value = useRegisterOrConstant(ins->value());
560         tempDef1 = temp();
561     } else if (ins->value()->isConstant()) {
562         fixedOutput = false;
563         value = useRegisterOrConstant(ins->value());
564     } else {
565         fixedOutput = false;
566         reuseInput = true;
567         value = useRegisterAtStart(ins->value());
568     }
569 
570     LAtomicTypedArrayElementBinop* lir =
571         new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
572 
573     if (fixedOutput)
574         defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
575     else if (reuseInput)
576         defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
577     else
578         define(lir, ins);
579 }
580 
581 void
visitSimdBinaryArith(MSimdBinaryArith * ins)582 LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins)
583 {
584     MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
585     MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
586     MOZ_ASSERT(IsSimdType(ins->type()));
587 
588     MDefinition* lhs = ins->lhs();
589     MDefinition* rhs = ins->rhs();
590 
591     if (ins->isCommutative())
592         ReorderCommutative(&lhs, &rhs, ins);
593 
594     if (ins->type() == MIRType_Int32x4) {
595         LSimdBinaryArithIx4* lir = new(alloc()) LSimdBinaryArithIx4();
596         bool needsTemp = ins->operation() == MSimdBinaryArith::Op_mul && !MacroAssembler::HasSSE41();
597         lir->setTemp(0, needsTemp ? temp(LDefinition::INT32X4) : LDefinition::BogusTemp());
598         lowerForFPU(lir, ins, lhs, rhs);
599         return;
600     }
601 
602     MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");
603 
604     LSimdBinaryArithFx4* lir = new(alloc()) LSimdBinaryArithFx4();
605 
606     bool needsTemp = ins->operation() == MSimdBinaryArith::Op_max ||
607                      ins->operation() == MSimdBinaryArith::Op_minNum ||
608                      ins->operation() == MSimdBinaryArith::Op_maxNum;
609     lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());
610 
611     lowerForFPU(lir, ins, lhs, rhs);
612 }
613 
614 void
visitSimdSelect(MSimdSelect * ins)615 LIRGeneratorX86Shared::visitSimdSelect(MSimdSelect* ins)
616 {
617     MOZ_ASSERT(IsSimdType(ins->type()));
618     MOZ_ASSERT(ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4,
619                "Unknown SIMD kind when doing bitwise operations");
620 
621     LSimdSelect* lins = new(alloc()) LSimdSelect;
622     MDefinition* r0 = ins->getOperand(0);
623     MDefinition* r1 = ins->getOperand(1);
624     MDefinition* r2 = ins->getOperand(2);
625 
626     lins->setOperand(0, useRegister(r0));
627     lins->setOperand(1, useRegister(r1));
628     lins->setOperand(2, useRegister(r2));
629     lins->setTemp(0, temp(LDefinition::FLOAT32X4));
630 
631     define(lins, ins);
632 }
633 
634 void
visitSimdSplatX4(MSimdSplatX4 * ins)635 LIRGeneratorX86Shared::visitSimdSplatX4(MSimdSplatX4* ins)
636 {
637     LAllocation x = useRegisterAtStart(ins->getOperand(0));
638     LSimdSplatX4* lir = new(alloc()) LSimdSplatX4(x);
639 
640     switch (ins->type()) {
641       case MIRType_Int32x4:
642         define(lir, ins);
643         break;
644       case MIRType_Float32x4:
645         // (Non-AVX) codegen actually wants the input and the output to be in
646         // the same register, but we can't currently use defineReuseInput
647         // because they have different types (scalar vs vector), so a spill slot
648         // for one may not be suitable for the other.
649         define(lir, ins);
650         break;
651       default:
652         MOZ_CRASH("Unknown SIMD kind");
653     }
654 }
655 
656 void
visitSimdValueX4(MSimdValueX4 * ins)657 LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4* ins)
658 {
659     if (ins->type() == MIRType_Float32x4) {
660         // Ideally, x would be used at start and reused for the output, however
661         // register allocation currently doesn't permit us to tie together two
662         // virtual registers with different types.
663         LAllocation x = useRegister(ins->getOperand(0));
664         LAllocation y = useRegister(ins->getOperand(1));
665         LAllocation z = useRegister(ins->getOperand(2));
666         LAllocation w = useRegister(ins->getOperand(3));
667         LDefinition t = temp(LDefinition::FLOAT32X4);
668         define(new (alloc()) LSimdValueFloat32x4(x, y, z, w, t), ins);
669     } else {
670         MOZ_ASSERT(ins->type() == MIRType_Int32x4);
671 
672         // No defineReuseInput => useAtStart for everyone.
673         LAllocation x = useRegisterAtStart(ins->getOperand(0));
674         LAllocation y = useRegisterAtStart(ins->getOperand(1));
675         LAllocation z = useRegisterAtStart(ins->getOperand(2));
676         LAllocation w = useRegisterAtStart(ins->getOperand(3));
677         define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins);
678     }
679 }
680