jit/x86-shared/Lowering-x86-shared.cpp

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=4 et sw=4 tw=99:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "jit/x86-shared/Lowering-x86-shared.h"

#include "mozilla/MathAlgorithms.h"

#include "jit/MIR.h"

#include "jit/shared/Lowering-shared-inl.h"

using namespace js;
using namespace js::jit;

using mozilla::Abs;
using mozilla::FloorLog2;
using mozilla::Swap;

LTableSwitch*
LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
                                       MTableSwitch* tableswitch)
{
    return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
}

LTableSwitchV*
LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch)
{
    return new(alloc()) LTableSwitchV(temp(), tempDouble(), temp(), tableswitch);
}

void
LIRGeneratorX86Shared::visitGuardShape(MGuardShape* ins)
{
    MOZ_ASSERT(ins->obj()->type() == MIRType_Object);

    LGuardShape* guard = new(alloc()) LGuardShape(useRegisterAtStart(ins->obj()));
    assignSnapshot(guard, ins->bailoutKind());
    add(guard, ins);
    redefine(ins, ins->obj());
}

void
LIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup* ins)
{
    MOZ_ASSERT(ins->obj()->type() == MIRType_Object);

    LGuardObjectGroup* guard = new(alloc()) LGuardObjectGroup(useRegisterAtStart(ins->obj()));
    assignSnapshot(guard, ins->bailoutKind());
    add(guard, ins);
    redefine(ins, ins->obj());
}

void
LIRGeneratorX86Shared::visitPowHalf(MPowHalf* ins)
{
    MDefinition* input = ins->input();
    MOZ_ASSERT(input->type() == MIRType_Double);
    LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input));
    define(lir, ins);
}

void
LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                                     MDefinition* lhs, MDefinition* rhs)
{
    ins->setOperand(0, useRegisterAtStart(lhs));

    // shift operator should be constant or in register ecx
    // x86 can't shift a non-ecx register
    if (rhs->isConstant())
        ins->setOperand(1, useOrConstantAtStart(rhs));
    else
        ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx));

    defineReuseInput(ins, mir, 0);
}

void
LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                                   MDefinition* input)
{
    ins->setOperand(0, useRegisterAtStart(input));
    defineReuseInput(ins, mir, 0);
}

void
LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                                   MDefinition* lhs, MDefinition* rhs)
{
    ins->setOperand(0, useRegisterAtStart(lhs));
    ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
    defineReuseInput(ins, mir, 0);
}

template<size_t Temps>
void
LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
{
    // Without AVX, we'll need to use the x86 encodings where one of the
    // inputs must be the same location as the output.
    //
    // :TODO: (Bug 1132894) Note, we might have to allocate a different
    // registers if the MIRType of the reused operand differs from the MIRType
    // of returned value, as MUST_REUSE_INPUT is not yet capable of reusing the
    // same register but with a different register type.
    if (!Assembler::HasAVX() && mir->type() == lhs->type()) {
        ins->setOperand(0, useRegisterAtStart(lhs));
        ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
        defineReuseInput(ins, mir, 0);
    } else {
        ins->setOperand(0, useRegisterAtStart(lhs));
        ins->setOperand(1, useAtStart(rhs));
        define(ins, mir);
    }
}

template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                                                 MDefinition* lhs, MDefinition* rhs);
template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir,
                                                 MDefinition* lhs, MDefinition* rhs);

void
LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
{
    lowerForALU(ins, mir, lhs, rhs);
}

void
LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
{
    // Swap the operands around to fit the instructions that x86 actually has.
    // We do this here, before register allocation, so that we don't need
    // temporaries and copying afterwards.
    switch (mir->operation()) {
      case MSimdBinaryComp::greaterThan:
      case MSimdBinaryComp::greaterThanOrEqual:
        mir->reverse();
        Swap(lhs, rhs);
        break;
      default:
        break;
    }

    lowerForFPU(ins, mir, lhs, rhs);
}

void
LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
                                               MDefinition* lhs, MDefinition* rhs)
{
    baab->setOperand(0, useRegisterAtStart(lhs));
    baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
    add(baab, mir);
}

void
LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs)
{
    // Note: If we need a negative zero check, lhs is used twice.
    LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
    LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy);
    if (mul->fallible())
        assignSnapshot(lir, Bailout_DoubleOutput);
    defineReuseInput(lir, mul, 0);
}

void
LIRGeneratorX86Shared::lowerDivI(MDiv* div)
{
    if (div->isUnsigned()) {
        lowerUDiv(div);
        return;
    }

    // Division instructions are slow. Division by constant denominators can be
    // rewritten to use other instructions.
    if (div->rhs()->isConstant()) {
        int32_t rhs = div->rhs()->toConstant()->value().toInt32();

        // Division by powers of two can be done by shifting, and division by
        // other numbers can be done by a reciprocal multiplication technique.
        int32_t shift = FloorLog2(Abs(rhs));
        if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
            LAllocation lhs = useRegisterAtStart(div->lhs());
            LDivPowTwoI* lir;
            if (!div->canBeNegativeDividend()) {
                // Numerator is unsigned, so does not need adjusting.
                lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
            } else {
                // Numerator is signed, and needs adjusting, and an extra
                // lhs copy register is needed.
                lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
            }
            if (div->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineReuseInput(lir, div, 0);
            return;
        }
        if (rhs != 0) {
            LDivOrModConstantI* lir;
            lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
            if (div->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineFixed(lir, div, LAllocation(AnyRegister(edx)));
            return;
        }
    }

    LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()),
                                    tempFixed(edx));
    if (div->fallible())
        assignSnapshot(lir, Bailout_DoubleOutput);
    defineFixed(lir, div, LAllocation(AnyRegister(eax)));
}

void
LIRGeneratorX86Shared::lowerModI(MMod* mod)
{
    if (mod->isUnsigned()) {
        lowerUMod(mod);
        return;
    }

    if (mod->rhs()->isConstant()) {
        int32_t rhs = mod->rhs()->toConstant()->value().toInt32();
        int32_t shift = FloorLog2(Abs(rhs));
        if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
            LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
            if (mod->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineReuseInput(lir, mod, 0);
            return;
        }
        if (rhs != 0) {
            LDivOrModConstantI* lir;
            lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
            if (mod->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
            return;
        }
    }

    LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()),
                                    useRegister(mod->rhs()),
                                    tempFixed(eax));
    if (mod->fallible())
        assignSnapshot(lir, Bailout_DoubleOutput);
    defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
}

void
LIRGeneratorX86Shared::visitAsmJSNeg(MAsmJSNeg* ins)
{
    switch (ins->type()) {
      case MIRType_Int32:
        defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0);
        break;
      case MIRType_Float32:
        defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0);
        break;
      case MIRType_Double:
        defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0);
        break;
      default:
        MOZ_CRASH();
    }
}

void
LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
{
    if (div->rhs()->isConstant()) {
        uint32_t rhs = div->rhs()->toConstant()->value().toInt32();
        int32_t shift = FloorLog2(rhs);

        LAllocation lhs = useRegisterAtStart(div->lhs());
        if (rhs != 0 && uint32_t(1) << shift == rhs) {
            LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false);
            if (div->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineReuseInput(lir, div, 0);
        } else {
            LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()),
                                                                      rhs, tempFixed(eax));
            if (div->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineFixed(lir, div, LAllocation(AnyRegister(edx)));
        }
        return;
    }

    LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()),
                                              useRegister(div->rhs()),
                                              tempFixed(edx));
    if (div->fallible())
        assignSnapshot(lir, Bailout_DoubleOutput);
    defineFixed(lir, div, LAllocation(AnyRegister(eax)));
}

void
LIRGeneratorX86Shared::lowerUMod(MMod* mod)
{
    if (mod->rhs()->isConstant()) {
        uint32_t rhs = mod->rhs()->toConstant()->value().toInt32();
        int32_t shift = FloorLog2(rhs);

        if (rhs != 0 && uint32_t(1) << shift == rhs) {
            LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
            if (mod->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineReuseInput(lir, mod, 0);
        } else {
            LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()),
                                                                      rhs, tempFixed(edx));
            if (mod->fallible())
                assignSnapshot(lir, Bailout_DoubleOutput);
            defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
        }
        return;
    }

    LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()),
                                              useRegister(mod->rhs()),
                                              tempFixed(eax));
    if (mod->fallible())
        assignSnapshot(lir, Bailout_DoubleOutput);
    defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
}

void
LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir)
{
    MDefinition* lhs = mir->lhs();
    MDefinition* rhs = mir->rhs();

    MOZ_ASSERT(lhs->type() == MIRType_Int32);
    MOZ_ASSERT(rhs->type() == MIRType_Int32);
    MOZ_ASSERT(mir->type() == MIRType_Double);

#ifdef JS_CODEGEN_X64
    MOZ_ASSERT(ecx == rcx);
#endif

    LUse lhsUse = useRegisterAtStart(lhs);
    LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx);

    LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
    define(lir, mir);
}

void
LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins)
{
    MDefinition* opd = ins->input();
    MOZ_ASSERT(opd->type() == MIRType_Double);

    LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
    define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
}

void
LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins)
{
    MDefinition* opd = ins->input();
    MOZ_ASSERT(opd->type() == MIRType_Float32);

    LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
    define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
}

void
LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins,
                                                             bool useI386ByteRegisters)
{
    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);

    MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
    MOZ_ASSERT(ins->index()->type() == MIRType_Int32);

    const LUse elements = useRegister(ins->elements());
    const LAllocation index = useRegisterOrConstant(ins->index());

    // If the target is a floating register then we need a temp at the
    // lower level; that temp must be eax.
    //
    // Otherwise the target (if used) is an integer register, which
    // must be eax.  If the target is not used the machine code will
    // still clobber eax, so just pretend it's used.
    //
    // oldval must be in a register.
    //
    // newval must be in a register.  If the source is a byte array
    // then newval must be a register that has a byte size: on x86
    // this must be ebx, ecx, or edx (eax is taken for the output).
    //
    // Bug #1077036 describes some further optimization opportunities.

    bool fixedOutput = false;
    LDefinition tempDef = LDefinition::BogusTemp();
    LAllocation newval;
    if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
        tempDef = tempFixed(eax);
        newval = useRegister(ins->newval());
    } else {
        fixedOutput = true;
        if (useI386ByteRegisters && ins->isByteArray())
            newval = useFixed(ins->newval(), ebx);
        else
            newval = useRegister(ins->newval());
    }

    const LAllocation oldval = useRegister(ins->oldval());

    LCompareExchangeTypedArrayElement* lir =
        new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef);

    if (fixedOutput)
        defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
    else
        define(lir, ins);
}

void
LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins,
                                                            bool useI386ByteRegisters)
{
    MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);

    MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
    MOZ_ASSERT(ins->index()->type() == MIRType_Int32);

    const LUse elements = useRegister(ins->elements());
    const LAllocation index = useRegisterOrConstant(ins->index());
    const LAllocation value = useRegister(ins->value());

    // The underlying instruction is XCHG, which can operate on any
    // register.
    //
    // If the target is a floating register (for Uint32) then we need
    // a temp into which to exchange.
    //
    // If the source is a byte array then we need a register that has
    // a byte size; in this case -- on x86 only -- pin the output to
    // an appropriate register and use that as a temp in the back-end.

    LDefinition tempDef = LDefinition::BogusTemp();
    if (ins->arrayType() == Scalar::Uint32) {
        // This restriction is bug 1077305.
        MOZ_ASSERT(ins->type() == MIRType_Double);
        tempDef = temp();
    }

    LAtomicExchangeTypedArrayElement* lir =
        new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);

    if (useI386ByteRegisters && ins->isByteArray())
        defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
    else
        define(lir, ins);
}

void
LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins,
                                                         bool useI386ByteRegisters)
{
    MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);

    MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
    MOZ_ASSERT(ins->index()->type() == MIRType_Int32);

    const LUse elements = useRegister(ins->elements());
    const LAllocation index = useRegisterOrConstant(ins->index());

    // Case 1: the result of the operation is not used.
    //
    // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
    // LOCK OR, or LOCK XOR.  We can do this even for the Uint32 case.

    if (!ins->hasUses()) {
        LAllocation value;
        if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant())
            value = useFixed(ins->value(), ebx);
        else
            value = useRegisterOrConstant(ins->value());

        LAtomicTypedArrayElementBinopForEffect* lir =
            new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);

        add(lir, ins);
        return;
    }

    // Case 2: the result of the operation is used.
    //
    // For ADD and SUB we'll use XADD:
    //
    //    movl       src, output
    //    lock xaddl output, mem
    //
    // For the 8-bit variants XADD needs a byte register for the output.
    //
    // For AND/OR/XOR we need to use a CMPXCHG loop:
    //
    //    movl          *mem, eax
    // L: mov           eax, temp
    //    andl          src, temp
    //    lock cmpxchg  temp, mem  ; reads eax also
    //    jnz           L
    //    ; result in eax
    //
    // Note the placement of L, cmpxchg will update eax with *mem if
    // *mem does not have the expected value, so reloading it at the
    // top of the loop would be redundant.
    //
    // If the array is not a uint32 array then:
    //  - eax should be the output (one result of the cmpxchg)
    //  - there is a temp, which must have a byte register if
    //    the array has 1-byte elements elements
    //
    // If the array is a uint32 array then:
    //  - eax is the first temp
    //  - we also need a second temp
    //
    // There are optimization opportunities:
    //  - better register allocation in the x86 8-bit case, Bug #1077036.

    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
    bool fixedOutput = true;
    bool reuseInput = false;
    LDefinition tempDef1 = LDefinition::BogusTemp();
    LDefinition tempDef2 = LDefinition::BogusTemp();
    LAllocation value;

    if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
        value = useRegisterOrConstant(ins->value());
        fixedOutput = false;
        if (bitOp) {
            tempDef1 = tempFixed(eax);
            tempDef2 = temp();
        } else {
            tempDef1 = temp();
        }
    } else if (useI386ByteRegisters && ins->isByteArray()) {
        if (ins->value()->isConstant())
            value = useRegisterOrConstant(ins->value());
        else
            value = useFixed(ins->value(), ebx);
        if (bitOp)
            tempDef1 = tempFixed(ecx);
    } else if (bitOp) {
        value = useRegisterOrConstant(ins->value());
        tempDef1 = temp();
    } else if (ins->value()->isConstant()) {
        fixedOutput = false;
        value = useRegisterOrConstant(ins->value());
    } else {
        fixedOutput = false;
        reuseInput = true;
        value = useRegisterAtStart(ins->value());
    }

    LAtomicTypedArrayElementBinop* lir =
        new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);

    if (fixedOutput)
        defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
    else if (reuseInput)
        defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
    else
        define(lir, ins);
}

void
LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins)
{
    MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
    MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
    MOZ_ASSERT(IsSimdType(ins->type()));

    MDefinition* lhs = ins->lhs();
    MDefinition* rhs = ins->rhs();

    if (ins->isCommutative())
        ReorderCommutative(&lhs, &rhs, ins);

    if (ins->type() == MIRType_Int32x4) {
        LSimdBinaryArithIx4* lir = new(alloc()) LSimdBinaryArithIx4();
        bool needsTemp = ins->operation() == MSimdBinaryArith::Op_mul && !MacroAssembler::HasSSE41();
        lir->setTemp(0, needsTemp ? temp(LDefinition::INT32X4) : LDefinition::BogusTemp());
        lowerForFPU(lir, ins, lhs, rhs);
        return;
    }

    MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");

    LSimdBinaryArithFx4* lir = new(alloc()) LSimdBinaryArithFx4();

    bool needsTemp = ins->operation() == MSimdBinaryArith::Op_max ||
                     ins->operation() == MSimdBinaryArith::Op_minNum ||
                     ins->operation() == MSimdBinaryArith::Op_maxNum;
    lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());

    lowerForFPU(lir, ins, lhs, rhs);
}

void
LIRGeneratorX86Shared::visitSimdSelect(MSimdSelect* ins)
{
    MOZ_ASSERT(IsSimdType(ins->type()));
    MOZ_ASSERT(ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4,
               "Unknown SIMD kind when doing bitwise operations");

    LSimdSelect* lins = new(alloc()) LSimdSelect;
    MDefinition* r0 = ins->getOperand(0);
    MDefinition* r1 = ins->getOperand(1);
    MDefinition* r2 = ins->getOperand(2);

    lins->setOperand(0, useRegister(r0));
    lins->setOperand(1, useRegister(r1));
    lins->setOperand(2, useRegister(r2));
    lins->setTemp(0, temp(LDefinition::FLOAT32X4));

    define(lins, ins);
}

void
LIRGeneratorX86Shared::visitSimdSplatX4(MSimdSplatX4* ins)
{
    LAllocation x = useRegisterAtStart(ins->getOperand(0));
    LSimdSplatX4* lir = new(alloc()) LSimdSplatX4(x);

    switch (ins->type()) {
      case MIRType_Int32x4:
        define(lir, ins);
        break;
      case MIRType_Float32x4:
        // (Non-AVX) codegen actually wants the input and the output to be in
        // the same register, but we can't currently use defineReuseInput
        // because they have different types (scalar vs vector), so a spill slot
        // for one may not be suitable for the other.
        define(lir, ins);
        break;
      default:
        MOZ_CRASH("Unknown SIMD kind");
    }
}

void
LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4* ins)
{
    if (ins->type() == MIRType_Float32x4) {
        // Ideally, x would be used at start and reused for the output, however
        // register allocation currently doesn't permit us to tie together two
        // virtual registers with different types.
        LAllocation x = useRegister(ins->getOperand(0));
        LAllocation y = useRegister(ins->getOperand(1));
        LAllocation z = useRegister(ins->getOperand(2));
        LAllocation w = useRegister(ins->getOperand(3));
        LDefinition t = temp(LDefinition::FLOAT32X4);
        define(new (alloc()) LSimdValueFloat32x4(x, y, z, w, t), ins);
    } else {
        MOZ_ASSERT(ins->type() == MIRType_Int32x4);

        // No defineReuseInput => useAtStart for everyone.
        LAllocation x = useRegisterAtStart(ins->getOperand(0));
        LAllocation y = useRegisterAtStart(ins->getOperand(1));
        LAllocation z = useRegisterAtStart(ins->getOperand(2));
        LAllocation w = useRegisterAtStart(ins->getOperand(3));
        define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins);
    }
}