1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "jit/x86-shared/Lowering-x86-shared.h"
8
9 #include "mozilla/MathAlgorithms.h"
10
11 #include "jit/MIR.h"
12
13 #include "jit/shared/Lowering-shared-inl.h"
14
15 using namespace js;
16 using namespace js::jit;
17
18 using mozilla::Abs;
19 using mozilla::FloorLog2;
20 using mozilla::Swap;
21
22 LTableSwitch*
newLTableSwitch(const LAllocation & in,const LDefinition & inputCopy,MTableSwitch * tableswitch)23 LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
24 MTableSwitch* tableswitch)
25 {
26 return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
27 }
28
29 LTableSwitchV*
newLTableSwitchV(MTableSwitch * tableswitch)30 LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch)
31 {
32 return new(alloc()) LTableSwitchV(temp(), tempDouble(), temp(), tableswitch);
33 }
34
35 void
visitGuardShape(MGuardShape * ins)36 LIRGeneratorX86Shared::visitGuardShape(MGuardShape* ins)
37 {
38 MOZ_ASSERT(ins->obj()->type() == MIRType_Object);
39
40 LGuardShape* guard = new(alloc()) LGuardShape(useRegisterAtStart(ins->obj()));
41 assignSnapshot(guard, ins->bailoutKind());
42 add(guard, ins);
43 redefine(ins, ins->obj());
44 }
45
46 void
visitGuardObjectGroup(MGuardObjectGroup * ins)47 LIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup* ins)
48 {
49 MOZ_ASSERT(ins->obj()->type() == MIRType_Object);
50
51 LGuardObjectGroup* guard = new(alloc()) LGuardObjectGroup(useRegisterAtStart(ins->obj()));
52 assignSnapshot(guard, ins->bailoutKind());
53 add(guard, ins);
54 redefine(ins, ins->obj());
55 }
56
57 void
visitPowHalf(MPowHalf * ins)58 LIRGeneratorX86Shared::visitPowHalf(MPowHalf* ins)
59 {
60 MDefinition* input = ins->input();
61 MOZ_ASSERT(input->type() == MIRType_Double);
62 LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input));
63 define(lir, ins);
64 }
65
66 void
lowerForShift(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)67 LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
68 MDefinition* lhs, MDefinition* rhs)
69 {
70 ins->setOperand(0, useRegisterAtStart(lhs));
71
72 // shift operator should be constant or in register ecx
73 // x86 can't shift a non-ecx register
74 if (rhs->isConstant())
75 ins->setOperand(1, useOrConstantAtStart(rhs));
76 else
77 ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx));
78
79 defineReuseInput(ins, mir, 0);
80 }
81
82 void
lowerForALU(LInstructionHelper<1,1,0> * ins,MDefinition * mir,MDefinition * input)83 LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
84 MDefinition* input)
85 {
86 ins->setOperand(0, useRegisterAtStart(input));
87 defineReuseInput(ins, mir, 0);
88 }
89
90 void
lowerForALU(LInstructionHelper<1,2,0> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)91 LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
92 MDefinition* lhs, MDefinition* rhs)
93 {
94 ins->setOperand(0, useRegisterAtStart(lhs));
95 ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
96 defineReuseInput(ins, mir, 0);
97 }
98
99 template<size_t Temps>
100 void
lowerForFPU(LInstructionHelper<1,2,Temps> * ins,MDefinition * mir,MDefinition * lhs,MDefinition * rhs)101 LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
102 {
103 // Without AVX, we'll need to use the x86 encodings where one of the
104 // inputs must be the same location as the output.
105 //
106 // :TODO: (Bug 1132894) Note, we might have to allocate a different
107 // registers if the MIRType of the reused operand differs from the MIRType
108 // of returned value, as MUST_REUSE_INPUT is not yet capable of reusing the
109 // same register but with a different register type.
110 if (!Assembler::HasAVX() && mir->type() == lhs->type()) {
111 ins->setOperand(0, useRegisterAtStart(lhs));
112 ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
113 defineReuseInput(ins, mir, 0);
114 } else {
115 ins->setOperand(0, useRegisterAtStart(lhs));
116 ins->setOperand(1, useAtStart(rhs));
117 define(ins, mir);
118 }
119 }
120
121 template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
122 MDefinition* lhs, MDefinition* rhs);
123 template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir,
124 MDefinition* lhs, MDefinition* rhs);
125
126 void
lowerForCompIx4(LSimdBinaryCompIx4 * ins,MSimdBinaryComp * mir,MDefinition * lhs,MDefinition * rhs)127 LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
128 {
129 lowerForALU(ins, mir, lhs, rhs);
130 }
131
132 void
lowerForCompFx4(LSimdBinaryCompFx4 * ins,MSimdBinaryComp * mir,MDefinition * lhs,MDefinition * rhs)133 LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
134 {
135 // Swap the operands around to fit the instructions that x86 actually has.
136 // We do this here, before register allocation, so that we don't need
137 // temporaries and copying afterwards.
138 switch (mir->operation()) {
139 case MSimdBinaryComp::greaterThan:
140 case MSimdBinaryComp::greaterThanOrEqual:
141 mir->reverse();
142 Swap(lhs, rhs);
143 break;
144 default:
145 break;
146 }
147
148 lowerForFPU(ins, mir, lhs, rhs);
149 }
150
151 void
lowerForBitAndAndBranch(LBitAndAndBranch * baab,MInstruction * mir,MDefinition * lhs,MDefinition * rhs)152 LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
153 MDefinition* lhs, MDefinition* rhs)
154 {
155 baab->setOperand(0, useRegisterAtStart(lhs));
156 baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
157 add(baab, mir);
158 }
159
160 void
lowerMulI(MMul * mul,MDefinition * lhs,MDefinition * rhs)161 LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs)
162 {
163 // Note: If we need a negative zero check, lhs is used twice.
164 LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
165 LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy);
166 if (mul->fallible())
167 assignSnapshot(lir, Bailout_DoubleOutput);
168 defineReuseInput(lir, mul, 0);
169 }
170
171 void
lowerDivI(MDiv * div)172 LIRGeneratorX86Shared::lowerDivI(MDiv* div)
173 {
174 if (div->isUnsigned()) {
175 lowerUDiv(div);
176 return;
177 }
178
179 // Division instructions are slow. Division by constant denominators can be
180 // rewritten to use other instructions.
181 if (div->rhs()->isConstant()) {
182 int32_t rhs = div->rhs()->toConstant()->value().toInt32();
183
184 // Division by powers of two can be done by shifting, and division by
185 // other numbers can be done by a reciprocal multiplication technique.
186 int32_t shift = FloorLog2(Abs(rhs));
187 if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
188 LAllocation lhs = useRegisterAtStart(div->lhs());
189 LDivPowTwoI* lir;
190 if (!div->canBeNegativeDividend()) {
191 // Numerator is unsigned, so does not need adjusting.
192 lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
193 } else {
194 // Numerator is signed, and needs adjusting, and an extra
195 // lhs copy register is needed.
196 lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
197 }
198 if (div->fallible())
199 assignSnapshot(lir, Bailout_DoubleOutput);
200 defineReuseInput(lir, div, 0);
201 return;
202 }
203 if (rhs != 0) {
204 LDivOrModConstantI* lir;
205 lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
206 if (div->fallible())
207 assignSnapshot(lir, Bailout_DoubleOutput);
208 defineFixed(lir, div, LAllocation(AnyRegister(edx)));
209 return;
210 }
211 }
212
213 LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()),
214 tempFixed(edx));
215 if (div->fallible())
216 assignSnapshot(lir, Bailout_DoubleOutput);
217 defineFixed(lir, div, LAllocation(AnyRegister(eax)));
218 }
219
220 void
lowerModI(MMod * mod)221 LIRGeneratorX86Shared::lowerModI(MMod* mod)
222 {
223 if (mod->isUnsigned()) {
224 lowerUMod(mod);
225 return;
226 }
227
228 if (mod->rhs()->isConstant()) {
229 int32_t rhs = mod->rhs()->toConstant()->value().toInt32();
230 int32_t shift = FloorLog2(Abs(rhs));
231 if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
232 LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
233 if (mod->fallible())
234 assignSnapshot(lir, Bailout_DoubleOutput);
235 defineReuseInput(lir, mod, 0);
236 return;
237 }
238 if (rhs != 0) {
239 LDivOrModConstantI* lir;
240 lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
241 if (mod->fallible())
242 assignSnapshot(lir, Bailout_DoubleOutput);
243 defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
244 return;
245 }
246 }
247
248 LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()),
249 useRegister(mod->rhs()),
250 tempFixed(eax));
251 if (mod->fallible())
252 assignSnapshot(lir, Bailout_DoubleOutput);
253 defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
254 }
255
256 void
visitAsmJSNeg(MAsmJSNeg * ins)257 LIRGeneratorX86Shared::visitAsmJSNeg(MAsmJSNeg* ins)
258 {
259 switch (ins->type()) {
260 case MIRType_Int32:
261 defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0);
262 break;
263 case MIRType_Float32:
264 defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0);
265 break;
266 case MIRType_Double:
267 defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0);
268 break;
269 default:
270 MOZ_CRASH();
271 }
272 }
273
274 void
lowerUDiv(MDiv * div)275 LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
276 {
277 if (div->rhs()->isConstant()) {
278 uint32_t rhs = div->rhs()->toConstant()->value().toInt32();
279 int32_t shift = FloorLog2(rhs);
280
281 LAllocation lhs = useRegisterAtStart(div->lhs());
282 if (rhs != 0 && uint32_t(1) << shift == rhs) {
283 LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false);
284 if (div->fallible())
285 assignSnapshot(lir, Bailout_DoubleOutput);
286 defineReuseInput(lir, div, 0);
287 } else {
288 LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()),
289 rhs, tempFixed(eax));
290 if (div->fallible())
291 assignSnapshot(lir, Bailout_DoubleOutput);
292 defineFixed(lir, div, LAllocation(AnyRegister(edx)));
293 }
294 return;
295 }
296
297 LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()),
298 useRegister(div->rhs()),
299 tempFixed(edx));
300 if (div->fallible())
301 assignSnapshot(lir, Bailout_DoubleOutput);
302 defineFixed(lir, div, LAllocation(AnyRegister(eax)));
303 }
304
305 void
lowerUMod(MMod * mod)306 LIRGeneratorX86Shared::lowerUMod(MMod* mod)
307 {
308 if (mod->rhs()->isConstant()) {
309 uint32_t rhs = mod->rhs()->toConstant()->value().toInt32();
310 int32_t shift = FloorLog2(rhs);
311
312 if (rhs != 0 && uint32_t(1) << shift == rhs) {
313 LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
314 if (mod->fallible())
315 assignSnapshot(lir, Bailout_DoubleOutput);
316 defineReuseInput(lir, mod, 0);
317 } else {
318 LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()),
319 rhs, tempFixed(edx));
320 if (mod->fallible())
321 assignSnapshot(lir, Bailout_DoubleOutput);
322 defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
323 }
324 return;
325 }
326
327 LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()),
328 useRegister(mod->rhs()),
329 tempFixed(eax));
330 if (mod->fallible())
331 assignSnapshot(lir, Bailout_DoubleOutput);
332 defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
333 }
334
335 void
lowerUrshD(MUrsh * mir)336 LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir)
337 {
338 MDefinition* lhs = mir->lhs();
339 MDefinition* rhs = mir->rhs();
340
341 MOZ_ASSERT(lhs->type() == MIRType_Int32);
342 MOZ_ASSERT(rhs->type() == MIRType_Int32);
343 MOZ_ASSERT(mir->type() == MIRType_Double);
344
345 #ifdef JS_CODEGEN_X64
346 MOZ_ASSERT(ecx == rcx);
347 #endif
348
349 LUse lhsUse = useRegisterAtStart(lhs);
350 LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx);
351
352 LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
353 define(lir, mir);
354 }
355
356 void
lowerTruncateDToInt32(MTruncateToInt32 * ins)357 LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins)
358 {
359 MDefinition* opd = ins->input();
360 MOZ_ASSERT(opd->type() == MIRType_Double);
361
362 LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
363 define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
364 }
365
366 void
lowerTruncateFToInt32(MTruncateToInt32 * ins)367 LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins)
368 {
369 MDefinition* opd = ins->input();
370 MOZ_ASSERT(opd->type() == MIRType_Float32);
371
372 LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
373 define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
374 }
375
376 void
lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement * ins,bool useI386ByteRegisters)377 LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins,
378 bool useI386ByteRegisters)
379 {
380 MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
381 MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
382
383 MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
384 MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
385
386 const LUse elements = useRegister(ins->elements());
387 const LAllocation index = useRegisterOrConstant(ins->index());
388
389 // If the target is a floating register then we need a temp at the
390 // lower level; that temp must be eax.
391 //
392 // Otherwise the target (if used) is an integer register, which
393 // must be eax. If the target is not used the machine code will
394 // still clobber eax, so just pretend it's used.
395 //
396 // oldval must be in a register.
397 //
398 // newval must be in a register. If the source is a byte array
399 // then newval must be a register that has a byte size: on x86
400 // this must be ebx, ecx, or edx (eax is taken for the output).
401 //
402 // Bug #1077036 describes some further optimization opportunities.
403
404 bool fixedOutput = false;
405 LDefinition tempDef = LDefinition::BogusTemp();
406 LAllocation newval;
407 if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
408 tempDef = tempFixed(eax);
409 newval = useRegister(ins->newval());
410 } else {
411 fixedOutput = true;
412 if (useI386ByteRegisters && ins->isByteArray())
413 newval = useFixed(ins->newval(), ebx);
414 else
415 newval = useRegister(ins->newval());
416 }
417
418 const LAllocation oldval = useRegister(ins->oldval());
419
420 LCompareExchangeTypedArrayElement* lir =
421 new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef);
422
423 if (fixedOutput)
424 defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
425 else
426 define(lir, ins);
427 }
428
429 void
lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement * ins,bool useI386ByteRegisters)430 LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins,
431 bool useI386ByteRegisters)
432 {
433 MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
434
435 MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
436 MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
437
438 const LUse elements = useRegister(ins->elements());
439 const LAllocation index = useRegisterOrConstant(ins->index());
440 const LAllocation value = useRegister(ins->value());
441
442 // The underlying instruction is XCHG, which can operate on any
443 // register.
444 //
445 // If the target is a floating register (for Uint32) then we need
446 // a temp into which to exchange.
447 //
448 // If the source is a byte array then we need a register that has
449 // a byte size; in this case -- on x86 only -- pin the output to
450 // an appropriate register and use that as a temp in the back-end.
451
452 LDefinition tempDef = LDefinition::BogusTemp();
453 if (ins->arrayType() == Scalar::Uint32) {
454 // This restriction is bug 1077305.
455 MOZ_ASSERT(ins->type() == MIRType_Double);
456 tempDef = temp();
457 }
458
459 LAtomicExchangeTypedArrayElement* lir =
460 new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
461
462 if (useI386ByteRegisters && ins->isByteArray())
463 defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
464 else
465 define(lir, ins);
466 }
467
468 void
lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop * ins,bool useI386ByteRegisters)469 LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins,
470 bool useI386ByteRegisters)
471 {
472 MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
473 MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
474 MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
475
476 MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
477 MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
478
479 const LUse elements = useRegister(ins->elements());
480 const LAllocation index = useRegisterOrConstant(ins->index());
481
482 // Case 1: the result of the operation is not used.
483 //
484 // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
485 // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.
486
487 if (!ins->hasUses()) {
488 LAllocation value;
489 if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant())
490 value = useFixed(ins->value(), ebx);
491 else
492 value = useRegisterOrConstant(ins->value());
493
494 LAtomicTypedArrayElementBinopForEffect* lir =
495 new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);
496
497 add(lir, ins);
498 return;
499 }
500
501 // Case 2: the result of the operation is used.
502 //
503 // For ADD and SUB we'll use XADD:
504 //
505 // movl src, output
506 // lock xaddl output, mem
507 //
508 // For the 8-bit variants XADD needs a byte register for the output.
509 //
510 // For AND/OR/XOR we need to use a CMPXCHG loop:
511 //
512 // movl *mem, eax
513 // L: mov eax, temp
514 // andl src, temp
515 // lock cmpxchg temp, mem ; reads eax also
516 // jnz L
517 // ; result in eax
518 //
519 // Note the placement of L, cmpxchg will update eax with *mem if
520 // *mem does not have the expected value, so reloading it at the
521 // top of the loop would be redundant.
522 //
523 // If the array is not a uint32 array then:
524 // - eax should be the output (one result of the cmpxchg)
525 // - there is a temp, which must have a byte register if
526 // the array has 1-byte elements elements
527 //
528 // If the array is a uint32 array then:
529 // - eax is the first temp
530 // - we also need a second temp
531 //
532 // There are optimization opportunities:
533 // - better register allocation in the x86 8-bit case, Bug #1077036.
534
535 bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
536 bool fixedOutput = true;
537 bool reuseInput = false;
538 LDefinition tempDef1 = LDefinition::BogusTemp();
539 LDefinition tempDef2 = LDefinition::BogusTemp();
540 LAllocation value;
541
542 if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
543 value = useRegisterOrConstant(ins->value());
544 fixedOutput = false;
545 if (bitOp) {
546 tempDef1 = tempFixed(eax);
547 tempDef2 = temp();
548 } else {
549 tempDef1 = temp();
550 }
551 } else if (useI386ByteRegisters && ins->isByteArray()) {
552 if (ins->value()->isConstant())
553 value = useRegisterOrConstant(ins->value());
554 else
555 value = useFixed(ins->value(), ebx);
556 if (bitOp)
557 tempDef1 = tempFixed(ecx);
558 } else if (bitOp) {
559 value = useRegisterOrConstant(ins->value());
560 tempDef1 = temp();
561 } else if (ins->value()->isConstant()) {
562 fixedOutput = false;
563 value = useRegisterOrConstant(ins->value());
564 } else {
565 fixedOutput = false;
566 reuseInput = true;
567 value = useRegisterAtStart(ins->value());
568 }
569
570 LAtomicTypedArrayElementBinop* lir =
571 new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
572
573 if (fixedOutput)
574 defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
575 else if (reuseInput)
576 defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
577 else
578 define(lir, ins);
579 }
580
581 void
visitSimdBinaryArith(MSimdBinaryArith * ins)582 LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins)
583 {
584 MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
585 MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
586 MOZ_ASSERT(IsSimdType(ins->type()));
587
588 MDefinition* lhs = ins->lhs();
589 MDefinition* rhs = ins->rhs();
590
591 if (ins->isCommutative())
592 ReorderCommutative(&lhs, &rhs, ins);
593
594 if (ins->type() == MIRType_Int32x4) {
595 LSimdBinaryArithIx4* lir = new(alloc()) LSimdBinaryArithIx4();
596 bool needsTemp = ins->operation() == MSimdBinaryArith::Op_mul && !MacroAssembler::HasSSE41();
597 lir->setTemp(0, needsTemp ? temp(LDefinition::INT32X4) : LDefinition::BogusTemp());
598 lowerForFPU(lir, ins, lhs, rhs);
599 return;
600 }
601
602 MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");
603
604 LSimdBinaryArithFx4* lir = new(alloc()) LSimdBinaryArithFx4();
605
606 bool needsTemp = ins->operation() == MSimdBinaryArith::Op_max ||
607 ins->operation() == MSimdBinaryArith::Op_minNum ||
608 ins->operation() == MSimdBinaryArith::Op_maxNum;
609 lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());
610
611 lowerForFPU(lir, ins, lhs, rhs);
612 }
613
614 void
visitSimdSelect(MSimdSelect * ins)615 LIRGeneratorX86Shared::visitSimdSelect(MSimdSelect* ins)
616 {
617 MOZ_ASSERT(IsSimdType(ins->type()));
618 MOZ_ASSERT(ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4,
619 "Unknown SIMD kind when doing bitwise operations");
620
621 LSimdSelect* lins = new(alloc()) LSimdSelect;
622 MDefinition* r0 = ins->getOperand(0);
623 MDefinition* r1 = ins->getOperand(1);
624 MDefinition* r2 = ins->getOperand(2);
625
626 lins->setOperand(0, useRegister(r0));
627 lins->setOperand(1, useRegister(r1));
628 lins->setOperand(2, useRegister(r2));
629 lins->setTemp(0, temp(LDefinition::FLOAT32X4));
630
631 define(lins, ins);
632 }
633
634 void
visitSimdSplatX4(MSimdSplatX4 * ins)635 LIRGeneratorX86Shared::visitSimdSplatX4(MSimdSplatX4* ins)
636 {
637 LAllocation x = useRegisterAtStart(ins->getOperand(0));
638 LSimdSplatX4* lir = new(alloc()) LSimdSplatX4(x);
639
640 switch (ins->type()) {
641 case MIRType_Int32x4:
642 define(lir, ins);
643 break;
644 case MIRType_Float32x4:
645 // (Non-AVX) codegen actually wants the input and the output to be in
646 // the same register, but we can't currently use defineReuseInput
647 // because they have different types (scalar vs vector), so a spill slot
648 // for one may not be suitable for the other.
649 define(lir, ins);
650 break;
651 default:
652 MOZ_CRASH("Unknown SIMD kind");
653 }
654 }
655
656 void
visitSimdValueX4(MSimdValueX4 * ins)657 LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4* ins)
658 {
659 if (ins->type() == MIRType_Float32x4) {
660 // Ideally, x would be used at start and reused for the output, however
661 // register allocation currently doesn't permit us to tie together two
662 // virtual registers with different types.
663 LAllocation x = useRegister(ins->getOperand(0));
664 LAllocation y = useRegister(ins->getOperand(1));
665 LAllocation z = useRegister(ins->getOperand(2));
666 LAllocation w = useRegister(ins->getOperand(3));
667 LDefinition t = temp(LDefinition::FLOAT32X4);
668 define(new (alloc()) LSimdValueFloat32x4(x, y, z, w, t), ins);
669 } else {
670 MOZ_ASSERT(ins->type() == MIRType_Int32x4);
671
672 // No defineReuseInput => useAtStart for everyone.
673 LAllocation x = useRegisterAtStart(ins->getOperand(0));
674 LAllocation y = useRegisterAtStart(ins->getOperand(1));
675 LAllocation z = useRegisterAtStart(ins->getOperand(2));
676 LAllocation w = useRegisterAtStart(ins->getOperand(3));
677 define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins);
678 }
679 }
680