1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
8 #define jit_x86_shared_MacroAssembler_x86_shared_h
9 
10 #include "mozilla/Casting.h"
11 
12 #if defined(JS_CODEGEN_X86)
13 #include "jit/x86/Assembler-x86.h"
14 #elif defined(JS_CODEGEN_X64)
15 #include "jit/x64/Assembler-x64.h"
16 #endif
17 
18 namespace js {
19 namespace jit {
20 
21 class MacroAssembler;
22 
23 class MacroAssemblerX86Shared : public Assembler {
24  private:
25   // Perform a downcast. Should be removed by Bug 996602.
26   MacroAssembler& asMasm();
27   const MacroAssembler& asMasm() const;
28 
29  public:
30   typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
31 
32  protected:
33   // For Double, Float and SimdData, make the move ctors explicit so that MSVC
34   // knows what to use instead of copying these data structures.
35   template <class T>
36   struct Constant {
37     typedef T Pod;
38 
39     T value;
40     UsesVector uses;
41 
ConstantConstant42     explicit Constant(const T& value) : value(value) {}
ConstantConstant43     Constant(Constant<T>&& other)
44         : value(other.value), uses(mozilla::Move(other.uses)) {}
45     explicit Constant(const Constant<T>&) = delete;
46   };
47 
48   // Containers use SystemAllocPolicy since wasm releases memory after each
49   // function is compiled, and these need to live until after all functions
50   // are compiled.
51   using Double = Constant<double>;
52   Vector<Double, 0, SystemAllocPolicy> doubles_;
53   typedef HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy>
54       DoubleMap;
55   DoubleMap doubleMap_;
56 
57   using Float = Constant<float>;
58   Vector<Float, 0, SystemAllocPolicy> floats_;
59   typedef HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy>
60       FloatMap;
61   FloatMap floatMap_;
62 
63   struct SimdData : public Constant<SimdConstant> {
SimdDataSimdData64     explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
SimdDataSimdData65     SimdData(SimdData&& d) : Constant<SimdConstant>(mozilla::Move(d)) {}
66     explicit SimdData(const SimdData&) = delete;
typeSimdData67     SimdConstant::Type type() const { return value.type(); }
68   };
69 
70   Vector<SimdData, 0, SystemAllocPolicy> simds_;
71   typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy>
72       SimdMap;
73   SimdMap simdMap_;
74 
75   template <class T, class Map>
76   T* getConstant(const typename T::Pod& value, Map& map,
77                  Vector<T, 0, SystemAllocPolicy>& vec);
78 
79   Float* getFloat(float f);
80   Double* getDouble(double d);
81   SimdData* getSimdData(const SimdConstant& v);
82 
83  public:
84   using Assembler::call;
85 
MacroAssemblerX86Shared()86   MacroAssemblerX86Shared() {}
87 
appendRawCode(const uint8_t * code,size_t numBytes)88   bool appendRawCode(const uint8_t* code, size_t numBytes) {
89     return masm.appendRawCode(code, numBytes);
90   }
91 
92   // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
93   // Checks for NaN if canBeNaN is true.
94   void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
95                     bool isMax);
96   void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
97                      bool isMax);
98 
compareDouble(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)99   void compareDouble(DoubleCondition cond, FloatRegister lhs,
100                      FloatRegister rhs) {
101     if (cond & DoubleConditionBitInvert)
102       vucomisd(lhs, rhs);
103     else
104       vucomisd(rhs, lhs);
105   }
106 
compareFloat(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)107   void compareFloat(DoubleCondition cond, FloatRegister lhs,
108                     FloatRegister rhs) {
109     if (cond & DoubleConditionBitInvert)
110       vucomiss(lhs, rhs);
111     else
112       vucomiss(rhs, lhs);
113   }
114 
115   void branchNegativeZero(FloatRegister reg, Register scratch, Label* label,
116                           bool maybeNonZero = true);
117   void branchNegativeZeroFloat32(FloatRegister reg, Register scratch,
118                                  Label* label);
119 
move32(Imm32 imm,Register dest)120   void move32(Imm32 imm, Register dest) {
121     // Use the ImmWord version of mov to register, which has special
122     // optimizations. Casting to uint32_t here ensures that the value
123     // is zero-extended.
124     mov(ImmWord(uint32_t(imm.value)), dest);
125   }
move32(Imm32 imm,const Operand & dest)126   void move32(Imm32 imm, const Operand& dest) { movl(imm, dest); }
move32(Register src,Register dest)127   void move32(Register src, Register dest) { movl(src, dest); }
move32(Register src,const Operand & dest)128   void move32(Register src, const Operand& dest) { movl(src, dest); }
test32(Register lhs,Register rhs)129   void test32(Register lhs, Register rhs) { testl(rhs, lhs); }
test32(const Address & addr,Imm32 imm)130   void test32(const Address& addr, Imm32 imm) { testl(imm, Operand(addr)); }
test32(const Operand lhs,Imm32 imm)131   void test32(const Operand lhs, Imm32 imm) { testl(imm, lhs); }
test32(Register lhs,Imm32 rhs)132   void test32(Register lhs, Imm32 rhs) { testl(rhs, lhs); }
cmp32(Register lhs,Imm32 rhs)133   void cmp32(Register lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,Register rhs)134   void cmp32(Register lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(const Address & lhs,Register rhs)135   void cmp32(const Address& lhs, Register rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Address & lhs,Imm32 rhs)136   void cmp32(const Address& lhs, Imm32 rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Operand & lhs,Imm32 rhs)137   void cmp32(const Operand& lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(const Operand & lhs,Register rhs)138   void cmp32(const Operand& lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,const Operand & rhs)139   void cmp32(Register lhs, const Operand& rhs) { cmpl(rhs, lhs); }
cmp32WithPatch(Register lhs,Imm32 rhs)140   CodeOffset cmp32WithPatch(Register lhs, Imm32 rhs) {
141     return cmplWithPatch(rhs, lhs);
142   }
atomic_inc32(const Operand & addr)143   void atomic_inc32(const Operand& addr) { lock_incl(addr); }
atomic_dec32(const Operand & addr)144   void atomic_dec32(const Operand& addr) { lock_decl(addr); }
145 
storeLoadFence()146   void storeLoadFence() {
147     // This implementation follows Linux.
148     if (HasSSE2())
149       masm.mfence();
150     else
151       lock_addl(Imm32(0), Operand(Address(esp, 0)));
152   }
153 
branch16(Condition cond,Register lhs,Register rhs,Label * label)154   void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
155     cmpw(rhs, lhs);
156     j(cond, label);
157   }
branchTest16(Condition cond,Register lhs,Register rhs,Label * label)158   void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
159     testw(rhs, lhs);
160     j(cond, label);
161   }
162 
jump(Label * label)163   void jump(Label* label) { jmp(label); }
jump(JitCode * code)164   void jump(JitCode* code) { jmp(code); }
jump(TrampolinePtr code)165   void jump(TrampolinePtr code) { jmp(ImmPtr(code.value)); }
jump(RepatchLabel * label)166   void jump(RepatchLabel* label) { jmp(label); }
jump(Register reg)167   void jump(Register reg) { jmp(Operand(reg)); }
jump(const Address & addr)168   void jump(const Address& addr) { jmp(Operand(addr)); }
jump(wasm::OldTrapDesc target)169   void jump(wasm::OldTrapDesc target) { jmp(target); }
170 
convertInt32ToDouble(Register src,FloatRegister dest)171   void convertInt32ToDouble(Register src, FloatRegister dest) {
172     // vcvtsi2sd and friends write only part of their output register, which
173     // causes slowdowns on out-of-order processors. Explicitly break
174     // dependencies with vxorpd (and vxorps elsewhere), which are handled
175     // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
176     // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
177     // document.
178     zeroDouble(dest);
179     vcvtsi2sd(src, dest, dest);
180   }
convertInt32ToDouble(const Address & src,FloatRegister dest)181   void convertInt32ToDouble(const Address& src, FloatRegister dest) {
182     convertInt32ToDouble(Operand(src), dest);
183   }
convertInt32ToDouble(const BaseIndex & src,FloatRegister dest)184   void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
185     convertInt32ToDouble(Operand(src), dest);
186   }
convertInt32ToDouble(const Operand & src,FloatRegister dest)187   void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
188     // Clear the output register first to break dependencies; see above;
189     zeroDouble(dest);
190     vcvtsi2sd(Operand(src), dest, dest);
191   }
convertInt32ToFloat32(Register src,FloatRegister dest)192   void convertInt32ToFloat32(Register src, FloatRegister dest) {
193     // Clear the output register first to break dependencies; see above;
194     zeroFloat32(dest);
195     vcvtsi2ss(src, dest, dest);
196   }
convertInt32ToFloat32(const Address & src,FloatRegister dest)197   void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
198     convertInt32ToFloat32(Operand(src), dest);
199   }
convertInt32ToFloat32(const Operand & src,FloatRegister dest)200   void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
201     // Clear the output register first to break dependencies; see above;
202     zeroFloat32(dest);
203     vcvtsi2ss(src, dest, dest);
204   }
testDoubleTruthy(bool truthy,FloatRegister reg)205   Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
206     ScratchDoubleScope scratch(asMasm());
207     zeroDouble(scratch);
208     vucomisd(reg, scratch);
209     return truthy ? NonZero : Zero;
210   }
211 
212   // Class which ensures that registers used in byte ops are compatible with
213   // such instructions, even if the original register passed in wasn't. This
214   // only applies to x86, as on x64 all registers are valid single byte regs.
215   // This doesn't lead to great code but helps to simplify code generation.
216   //
217   // Note that this can currently only be used in cases where the register is
218   // read from by the guarded instruction, not written to.
219   class AutoEnsureByteRegister {
220     MacroAssemblerX86Shared* masm;
221     Register original_;
222     Register substitute_;
223 
224    public:
225     template <typename T>
AutoEnsureByteRegister(MacroAssemblerX86Shared * masm,T address,Register reg)226     AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address,
227                            Register reg)
228         : masm(masm), original_(reg) {
229       AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
230       if (singleByteRegs.has(reg)) {
231         substitute_ = reg;
232       } else {
233         MOZ_ASSERT(address.base != StackPointer);
234         do {
235           substitute_ = singleByteRegs.takeAny();
236         } while (Operand(address).containsReg(substitute_));
237 
238         masm->push(substitute_);
239         masm->mov(reg, substitute_);
240       }
241     }
242 
~AutoEnsureByteRegister()243     ~AutoEnsureByteRegister() {
244       if (original_ != substitute_) masm->pop(substitute_);
245     }
246 
reg()247     Register reg() { return substitute_; }
248   };
249 
load8ZeroExtend(const Operand & src,Register dest)250   void load8ZeroExtend(const Operand& src, Register dest) { movzbl(src, dest); }
load8ZeroExtend(const Address & src,Register dest)251   void load8ZeroExtend(const Address& src, Register dest) {
252     movzbl(Operand(src), dest);
253   }
load8ZeroExtend(const BaseIndex & src,Register dest)254   void load8ZeroExtend(const BaseIndex& src, Register dest) {
255     movzbl(Operand(src), dest);
256   }
load8SignExtend(const Operand & src,Register dest)257   void load8SignExtend(const Operand& src, Register dest) { movsbl(src, dest); }
load8SignExtend(const Address & src,Register dest)258   void load8SignExtend(const Address& src, Register dest) {
259     movsbl(Operand(src), dest);
260   }
load8SignExtend(const BaseIndex & src,Register dest)261   void load8SignExtend(const BaseIndex& src, Register dest) {
262     movsbl(Operand(src), dest);
263   }
264   template <typename T>
store8(Imm32 src,const T & dest)265   void store8(Imm32 src, const T& dest) {
266     movb(src, Operand(dest));
267   }
268   template <typename T>
store8(Register src,const T & dest)269   void store8(Register src, const T& dest) {
270     AutoEnsureByteRegister ensure(this, dest, src);
271     movb(ensure.reg(), Operand(dest));
272   }
load16ZeroExtend(const Operand & src,Register dest)273   void load16ZeroExtend(const Operand& src, Register dest) {
274     movzwl(src, dest);
275   }
load16ZeroExtend(const Address & src,Register dest)276   void load16ZeroExtend(const Address& src, Register dest) {
277     movzwl(Operand(src), dest);
278   }
load16ZeroExtend(const BaseIndex & src,Register dest)279   void load16ZeroExtend(const BaseIndex& src, Register dest) {
280     movzwl(Operand(src), dest);
281   }
282   template <typename S, typename T>
store16(const S & src,const T & dest)283   void store16(const S& src, const T& dest) {
284     movw(src, Operand(dest));
285   }
load16SignExtend(const Operand & src,Register dest)286   void load16SignExtend(const Operand& src, Register dest) {
287     movswl(src, dest);
288   }
load16SignExtend(const Address & src,Register dest)289   void load16SignExtend(const Address& src, Register dest) {
290     movswl(Operand(src), dest);
291   }
load16SignExtend(const BaseIndex & src,Register dest)292   void load16SignExtend(const BaseIndex& src, Register dest) {
293     movswl(Operand(src), dest);
294   }
load32(const Address & address,Register dest)295   void load32(const Address& address, Register dest) {
296     movl(Operand(address), dest);
297   }
load32(const BaseIndex & src,Register dest)298   void load32(const BaseIndex& src, Register dest) { movl(Operand(src), dest); }
load32(const Operand & src,Register dest)299   void load32(const Operand& src, Register dest) { movl(src, dest); }
300   template <typename S, typename T>
store32(const S & src,const T & dest)301   void store32(const S& src, const T& dest) {
302     movl(src, Operand(dest));
303   }
304   template <typename S, typename T>
store32_NoSecondScratch(const S & src,const T & dest)305   void store32_NoSecondScratch(const S& src, const T& dest) {
306     store32(src, dest);
307   }
loadDouble(const Address & src,FloatRegister dest)308   void loadDouble(const Address& src, FloatRegister dest) { vmovsd(src, dest); }
loadDouble(const BaseIndex & src,FloatRegister dest)309   void loadDouble(const BaseIndex& src, FloatRegister dest) {
310     vmovsd(src, dest);
311   }
loadDouble(const Operand & src,FloatRegister dest)312   void loadDouble(const Operand& src, FloatRegister dest) {
313     switch (src.kind()) {
314       case Operand::MEM_REG_DISP:
315         loadDouble(src.toAddress(), dest);
316         break;
317       case Operand::MEM_SCALE:
318         loadDouble(src.toBaseIndex(), dest);
319         break;
320       default:
321         MOZ_CRASH("unexpected operand kind");
322     }
323   }
moveDouble(FloatRegister src,FloatRegister dest)324   void moveDouble(FloatRegister src, FloatRegister dest) {
325     // Use vmovapd instead of vmovsd to avoid dependencies.
326     vmovapd(src, dest);
327   }
zeroDouble(FloatRegister reg)328   void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); }
zeroFloat32(FloatRegister reg)329   void zeroFloat32(FloatRegister reg) { vxorps(reg, reg, reg); }
convertFloat32ToDouble(FloatRegister src,FloatRegister dest)330   void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
331     vcvtss2sd(src, dest, dest);
332   }
convertDoubleToFloat32(FloatRegister src,FloatRegister dest)333   void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
334     vcvtsd2ss(src, dest, dest);
335   }
336 
loadInt32x4(const Address & addr,FloatRegister dest)337   void loadInt32x4(const Address& addr, FloatRegister dest) {
338     vmovdqa(Operand(addr), dest);
339   }
loadFloat32x4(const Address & addr,FloatRegister dest)340   void loadFloat32x4(const Address& addr, FloatRegister dest) {
341     vmovaps(Operand(addr), dest);
342   }
storeInt32x4(FloatRegister src,const Address & addr)343   void storeInt32x4(FloatRegister src, const Address& addr) {
344     vmovdqa(src, Operand(addr));
345   }
storeFloat32x4(FloatRegister src,const Address & addr)346   void storeFloat32x4(FloatRegister src, const Address& addr) {
347     vmovaps(src, Operand(addr));
348   }
349 
convertFloat32x4ToInt32x4(FloatRegister src,FloatRegister dest)350   void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
351     // Note that if the conversion failed (because the converted
352     // result is larger than the maximum signed int32, or less than the
353     // least signed int32, or NaN), this will return the undefined integer
354     // value (0x8000000).
355     vcvttps2dq(src, dest);
356   }
convertInt32x4ToFloat32x4(FloatRegister src,FloatRegister dest)357   void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
358     vcvtdq2ps(src, dest);
359   }
360 
bitwiseAndSimd128(const Operand & src,FloatRegister dest)361   void bitwiseAndSimd128(const Operand& src, FloatRegister dest) {
362     // TODO Using the "ps" variant for all types incurs a domain crossing
363     // penalty for integer types and double.
364     vandps(src, dest, dest);
365   }
bitwiseAndNotSimd128(const Operand & src,FloatRegister dest)366   void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) {
367     vandnps(src, dest, dest);
368   }
bitwiseOrSimd128(const Operand & src,FloatRegister dest)369   void bitwiseOrSimd128(const Operand& src, FloatRegister dest) {
370     vorps(src, dest, dest);
371   }
bitwiseXorSimd128(const Operand & src,FloatRegister dest)372   void bitwiseXorSimd128(const Operand& src, FloatRegister dest) {
373     vxorps(src, dest, dest);
374   }
zeroSimd128Float(FloatRegister dest)375   void zeroSimd128Float(FloatRegister dest) { vxorps(dest, dest, dest); }
zeroSimd128Int(FloatRegister dest)376   void zeroSimd128Int(FloatRegister dest) { vpxor(dest, dest, dest); }
377 
378   template <class T, class Reg>
379   inline void loadScalar(const Operand& src, Reg dest);
380   template <class T, class Reg>
381   inline void storeScalar(Reg src, const Address& dest);
382   template <class T>
383   inline void loadAlignedVector(const Address& src, FloatRegister dest);
384   template <class T>
385   inline void storeAlignedVector(FloatRegister src, const Address& dest);
386 
loadInt32x1(const Address & src,FloatRegister dest)387   void loadInt32x1(const Address& src, FloatRegister dest) {
388     vmovd(Operand(src), dest);
389   }
loadInt32x1(const BaseIndex & src,FloatRegister dest)390   void loadInt32x1(const BaseIndex& src, FloatRegister dest) {
391     vmovd(Operand(src), dest);
392   }
loadInt32x2(const Address & src,FloatRegister dest)393   void loadInt32x2(const Address& src, FloatRegister dest) {
394     vmovq(Operand(src), dest);
395   }
loadInt32x2(const BaseIndex & src,FloatRegister dest)396   void loadInt32x2(const BaseIndex& src, FloatRegister dest) {
397     vmovq(Operand(src), dest);
398   }
loadInt32x3(const BaseIndex & src,FloatRegister dest)399   void loadInt32x3(const BaseIndex& src, FloatRegister dest) {
400     BaseIndex srcZ(src);
401     srcZ.offset += 2 * sizeof(int32_t);
402 
403     ScratchSimd128Scope scratch(asMasm());
404     vmovq(Operand(src), dest);
405     vmovd(Operand(srcZ), scratch);
406     vmovlhps(scratch, dest, dest);
407   }
loadInt32x3(const Address & src,FloatRegister dest)408   void loadInt32x3(const Address& src, FloatRegister dest) {
409     Address srcZ(src);
410     srcZ.offset += 2 * sizeof(int32_t);
411 
412     ScratchSimd128Scope scratch(asMasm());
413     vmovq(Operand(src), dest);
414     vmovd(Operand(srcZ), scratch);
415     vmovlhps(scratch, dest, dest);
416   }
417 
loadAlignedSimd128Int(const Address & src,FloatRegister dest)418   void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
419     vmovdqa(Operand(src), dest);
420   }
loadAlignedSimd128Int(const Operand & src,FloatRegister dest)421   void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
422     vmovdqa(src, dest);
423   }
storeAlignedSimd128Int(FloatRegister src,const Address & dest)424   void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
425     vmovdqa(src, Operand(dest));
426   }
moveSimd128Int(FloatRegister src,FloatRegister dest)427   void moveSimd128Int(FloatRegister src, FloatRegister dest) {
428     vmovdqa(src, dest);
429   }
reusedInputInt32x4(FloatRegister src,FloatRegister dest)430   FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
431     if (HasAVX()) return src;
432     moveSimd128Int(src, dest);
433     return dest;
434   }
reusedInputAlignedInt32x4(const Operand & src,FloatRegister dest)435   FloatRegister reusedInputAlignedInt32x4(const Operand& src,
436                                           FloatRegister dest) {
437     if (HasAVX() && src.kind() == Operand::FPREG)
438       return FloatRegister::FromCode(src.fpu());
439     loadAlignedSimd128Int(src, dest);
440     return dest;
441   }
loadUnalignedSimd128Int(const Address & src,FloatRegister dest)442   void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
443     vmovdqu(Operand(src), dest);
444   }
loadUnalignedSimd128Int(const BaseIndex & src,FloatRegister dest)445   void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
446     vmovdqu(Operand(src), dest);
447   }
loadUnalignedSimd128Int(const Operand & src,FloatRegister dest)448   void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
449     vmovdqu(src, dest);
450   }
451 
storeInt32x1(FloatRegister src,const Address & dest)452   void storeInt32x1(FloatRegister src, const Address& dest) {
453     vmovd(src, Operand(dest));
454   }
storeInt32x1(FloatRegister src,const BaseIndex & dest)455   void storeInt32x1(FloatRegister src, const BaseIndex& dest) {
456     vmovd(src, Operand(dest));
457   }
storeInt32x2(FloatRegister src,const Address & dest)458   void storeInt32x2(FloatRegister src, const Address& dest) {
459     vmovq(src, Operand(dest));
460   }
storeInt32x2(FloatRegister src,const BaseIndex & dest)461   void storeInt32x2(FloatRegister src, const BaseIndex& dest) {
462     vmovq(src, Operand(dest));
463   }
storeInt32x3(FloatRegister src,const Address & dest)464   void storeInt32x3(FloatRegister src, const Address& dest) {
465     Address destZ(dest);
466     destZ.offset += 2 * sizeof(int32_t);
467     vmovq(src, Operand(dest));
468     ScratchSimd128Scope scratch(asMasm());
469     vmovhlps(src, scratch, scratch);
470     vmovd(scratch, Operand(destZ));
471   }
storeInt32x3(FloatRegister src,const BaseIndex & dest)472   void storeInt32x3(FloatRegister src, const BaseIndex& dest) {
473     BaseIndex destZ(dest);
474     destZ.offset += 2 * sizeof(int32_t);
475     vmovq(src, Operand(dest));
476     ScratchSimd128Scope scratch(asMasm());
477     vmovhlps(src, scratch, scratch);
478     vmovd(scratch, Operand(destZ));
479   }
480 
storeUnalignedSimd128Int(FloatRegister src,const Address & dest)481   void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
482     vmovdqu(src, Operand(dest));
483   }
storeUnalignedSimd128Int(FloatRegister src,const BaseIndex & dest)484   void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
485     vmovdqu(src, Operand(dest));
486   }
storeUnalignedSimd128Int(FloatRegister src,const Operand & dest)487   void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
488     vmovdqu(src, dest);
489   }
packedEqualInt32x4(const Operand & src,FloatRegister dest)490   void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
491     vpcmpeqd(src, dest, dest);
492   }
packedGreaterThanInt32x4(const Operand & src,FloatRegister dest)493   void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
494     vpcmpgtd(src, dest, dest);
495   }
packedAddInt8(const Operand & src,FloatRegister dest)496   void packedAddInt8(const Operand& src, FloatRegister dest) {
497     vpaddb(src, dest, dest);
498   }
packedSubInt8(const Operand & src,FloatRegister dest)499   void packedSubInt8(const Operand& src, FloatRegister dest) {
500     vpsubb(src, dest, dest);
501   }
packedAddInt16(const Operand & src,FloatRegister dest)502   void packedAddInt16(const Operand& src, FloatRegister dest) {
503     vpaddw(src, dest, dest);
504   }
packedSubInt16(const Operand & src,FloatRegister dest)505   void packedSubInt16(const Operand& src, FloatRegister dest) {
506     vpsubw(src, dest, dest);
507   }
packedAddInt32(const Operand & src,FloatRegister dest)508   void packedAddInt32(const Operand& src, FloatRegister dest) {
509     vpaddd(src, dest, dest);
510   }
packedSubInt32(const Operand & src,FloatRegister dest)511   void packedSubInt32(const Operand& src, FloatRegister dest) {
512     vpsubd(src, dest, dest);
513   }
packedRcpApproximationFloat32x4(const Operand & src,FloatRegister dest)514   void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
515     // This function is an approximation of the result, this might need
516     // fix up if the spec requires a given precision for this operation.
517     // TODO See also bug 1068028.
518     vrcpps(src, dest);
519   }
packedRcpSqrtApproximationFloat32x4(const Operand & src,FloatRegister dest)520   void packedRcpSqrtApproximationFloat32x4(const Operand& src,
521                                            FloatRegister dest) {
522     // TODO See comment above. See also bug 1068028.
523     vrsqrtps(src, dest);
524   }
packedSqrtFloat32x4(const Operand & src,FloatRegister dest)525   void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
526     vsqrtps(src, dest);
527   }
528 
packedLeftShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)529   void packedLeftShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
530     vpsllw(src, dest, dest);
531   }
packedLeftShiftByScalarInt16x8(Imm32 count,FloatRegister dest)532   void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
533     vpsllw(count, dest, dest);
534   }
packedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)535   void packedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
536     vpsraw(src, dest, dest);
537   }
packedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)538   void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
539     vpsraw(count, dest, dest);
540   }
packedUnsignedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)541   void packedUnsignedRightShiftByScalarInt16x8(FloatRegister src,
542                                                FloatRegister dest) {
543     vpsrlw(src, dest, dest);
544   }
packedUnsignedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)545   void packedUnsignedRightShiftByScalarInt16x8(Imm32 count,
546                                                FloatRegister dest) {
547     vpsrlw(count, dest, dest);
548   }
549 
packedLeftShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)550   void packedLeftShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
551     vpslld(src, dest, dest);
552   }
packedLeftShiftByScalarInt32x4(Imm32 count,FloatRegister dest)553   void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
554     vpslld(count, dest, dest);
555   }
packedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)556   void packedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
557     vpsrad(src, dest, dest);
558   }
packedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)559   void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
560     vpsrad(count, dest, dest);
561   }
packedUnsignedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)562   void packedUnsignedRightShiftByScalarInt32x4(FloatRegister src,
563                                                FloatRegister dest) {
564     vpsrld(src, dest, dest);
565   }
packedUnsignedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)566   void packedUnsignedRightShiftByScalarInt32x4(Imm32 count,
567                                                FloatRegister dest) {
568     vpsrld(count, dest, dest);
569   }
570 
loadFloat32x3(const Address & src,FloatRegister dest)571   void loadFloat32x3(const Address& src, FloatRegister dest) {
572     Address srcZ(src);
573     srcZ.offset += 2 * sizeof(float);
574     vmovsd(src, dest);
575     ScratchSimd128Scope scratch(asMasm());
576     vmovss(srcZ, scratch);
577     vmovlhps(scratch, dest, dest);
578   }
loadFloat32x3(const BaseIndex & src,FloatRegister dest)579   void loadFloat32x3(const BaseIndex& src, FloatRegister dest) {
580     BaseIndex srcZ(src);
581     srcZ.offset += 2 * sizeof(float);
582     vmovsd(src, dest);
583     ScratchSimd128Scope scratch(asMasm());
584     vmovss(srcZ, scratch);
585     vmovlhps(scratch, dest, dest);
586   }
587 
loadAlignedSimd128Float(const Address & src,FloatRegister dest)588   void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
589     vmovaps(Operand(src), dest);
590   }
loadAlignedSimd128Float(const Operand & src,FloatRegister dest)591   void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
592     vmovaps(src, dest);
593   }
594 
storeAlignedSimd128Float(FloatRegister src,const Address & dest)595   void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
596     vmovaps(src, Operand(dest));
597   }
moveSimd128Float(FloatRegister src,FloatRegister dest)598   void moveSimd128Float(FloatRegister src, FloatRegister dest) {
599     vmovaps(src, dest);
600   }
reusedInputFloat32x4(FloatRegister src,FloatRegister dest)601   FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) {
602     if (HasAVX()) return src;
603     moveSimd128Float(src, dest);
604     return dest;
605   }
reusedInputAlignedFloat32x4(const Operand & src,FloatRegister dest)606   FloatRegister reusedInputAlignedFloat32x4(const Operand& src,
607                                             FloatRegister dest) {
608     if (HasAVX() && src.kind() == Operand::FPREG)
609       return FloatRegister::FromCode(src.fpu());
610     loadAlignedSimd128Float(src, dest);
611     return dest;
612   }
loadUnalignedSimd128Float(const Address & src,FloatRegister dest)613   void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
614     vmovups(Operand(src), dest);
615   }
loadUnalignedSimd128Float(const BaseIndex & src,FloatRegister dest)616   void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
617     vmovdqu(Operand(src), dest);
618   }
loadUnalignedSimd128Float(const Operand & src,FloatRegister dest)619   void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
620     vmovups(src, dest);
621   }
storeUnalignedSimd128Float(FloatRegister src,const Address & dest)622   void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
623     vmovups(src, Operand(dest));
624   }
storeUnalignedSimd128Float(FloatRegister src,const BaseIndex & dest)625   void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
626     vmovups(src, Operand(dest));
627   }
storeUnalignedSimd128Float(FloatRegister src,const Operand & dest)628   void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
629     vmovups(src, dest);
630   }
packedAddFloat32(const Operand & src,FloatRegister dest)631   void packedAddFloat32(const Operand& src, FloatRegister dest) {
632     vaddps(src, dest, dest);
633   }
packedSubFloat32(const Operand & src,FloatRegister dest)634   void packedSubFloat32(const Operand& src, FloatRegister dest) {
635     vsubps(src, dest, dest);
636   }
packedMulFloat32(const Operand & src,FloatRegister dest)637   void packedMulFloat32(const Operand& src, FloatRegister dest) {
638     vmulps(src, dest, dest);
639   }
packedDivFloat32(const Operand & src,FloatRegister dest)640   void packedDivFloat32(const Operand& src, FloatRegister dest) {
641     vdivps(src, dest, dest);
642   }
643 
644   static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
645                                      uint32_t z = 2, uint32_t w = 3) {
646     MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
647     uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
648     MOZ_ASSERT(r < 256);
649     return r;
650   }
651 
shuffleInt32(uint32_t mask,FloatRegister src,FloatRegister dest)652   void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
653     vpshufd(mask, src, dest);
654   }
moveLowInt32(FloatRegister src,Register dest)655   void moveLowInt32(FloatRegister src, Register dest) { vmovd(src, dest); }
656 
moveHighPairToLowPairFloat32(FloatRegister src,FloatRegister dest)657   void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
658     vmovhlps(src, dest, dest);
659   }
shuffleFloat32(uint32_t mask,FloatRegister src,FloatRegister dest)660   void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
661     // The shuffle instruction on x86 is such that it moves 2 words from
662     // the dest and 2 words from the src operands. To simplify things, just
663     // clobber the output with the input and apply the instruction
664     // afterwards.
665     // Note: this is useAtStart-safe because src isn't read afterwards.
666     FloatRegister srcCopy = reusedInputFloat32x4(src, dest);
667     vshufps(mask, srcCopy, srcCopy, dest);
668   }
shuffleMix(uint32_t mask,const Operand & src,FloatRegister dest)669   void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
670     // Note this uses vshufps, which is a cross-domain penalty on CPU where it
671     // applies, but that's the way clang and gcc do it.
672     vshufps(mask, src, dest, dest);
673   }
674 
moveFloatAsDouble(Register src,FloatRegister dest)675   void moveFloatAsDouble(Register src, FloatRegister dest) {
676     vmovd(src, dest);
677     vcvtss2sd(dest, dest, dest);
678   }
loadFloatAsDouble(const Address & src,FloatRegister dest)679   void loadFloatAsDouble(const Address& src, FloatRegister dest) {
680     vmovss(src, dest);
681     vcvtss2sd(dest, dest, dest);
682   }
loadFloatAsDouble(const BaseIndex & src,FloatRegister dest)683   void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
684     vmovss(src, dest);
685     vcvtss2sd(dest, dest, dest);
686   }
loadFloatAsDouble(const Operand & src,FloatRegister dest)687   void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
688     loadFloat32(src, dest);
689     vcvtss2sd(dest, dest, dest);
690   }
loadFloat32(const Address & src,FloatRegister dest)691   void loadFloat32(const Address& src, FloatRegister dest) {
692     vmovss(src, dest);
693   }
loadFloat32(const BaseIndex & src,FloatRegister dest)694   void loadFloat32(const BaseIndex& src, FloatRegister dest) {
695     vmovss(src, dest);
696   }
loadFloat32(const Operand & src,FloatRegister dest)697   void loadFloat32(const Operand& src, FloatRegister dest) {
698     switch (src.kind()) {
699       case Operand::MEM_REG_DISP:
700         loadFloat32(src.toAddress(), dest);
701         break;
702       case Operand::MEM_SCALE:
703         loadFloat32(src.toBaseIndex(), dest);
704         break;
705       default:
706         MOZ_CRASH("unexpected operand kind");
707     }
708   }
moveFloat32(FloatRegister src,FloatRegister dest)709   void moveFloat32(FloatRegister src, FloatRegister dest) {
710     // Use vmovaps instead of vmovss to avoid dependencies.
711     vmovaps(src, dest);
712   }
713 
714   // Checks whether a double is representable as a 32-bit integer. If so, the
715   // integer is written to the output register. Otherwise, a bailout is taken to
716   // the given snapshot. This function overwrites the scratch float register.
717   void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
718                             bool negativeZeroCheck = true) {
719     // Check for -0.0
720     if (negativeZeroCheck) branchNegativeZero(src, dest, fail);
721 
722     ScratchDoubleScope scratch(asMasm());
723     vcvttsd2si(src, dest);
724     convertInt32ToDouble(dest, scratch);
725     vucomisd(scratch, src);
726     j(Assembler::Parity, fail);
727     j(Assembler::NotEqual, fail);
728   }
729 
730   // Checks whether a float32 is representable as a 32-bit integer. If so, the
731   // integer is written to the output register. Otherwise, a bailout is taken to
732   // the given snapshot. This function overwrites the scratch float register.
733   void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
734                              bool negativeZeroCheck = true) {
735     // Check for -0.0
736     if (negativeZeroCheck) branchNegativeZeroFloat32(src, dest, fail);
737 
738     ScratchFloat32Scope scratch(asMasm());
739     vcvttss2si(src, dest);
740     convertInt32ToFloat32(dest, scratch);
741     vucomiss(scratch, src);
742     j(Assembler::Parity, fail);
743     j(Assembler::NotEqual, fail);
744   }
745 
746   inline void clampIntToUint8(Register reg);
747 
maybeInlineDouble(double d,FloatRegister dest)748   bool maybeInlineDouble(double d, FloatRegister dest) {
749     // Loading zero with xor is specially optimized in hardware.
750     if (mozilla::IsPositiveZero(d)) {
751       zeroDouble(dest);
752       return true;
753     }
754 
755     // It is also possible to load several common constants using vpcmpeqw
756     // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
757     // as described in "13.4 Generating constants" of
758     // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
759     // previously implemented here. However, with x86 and x64 both using
760     // constant pool loads for double constants, this is probably only
761     // worthwhile in cases where a load is likely to be delayed.
762 
763     return false;
764   }
765 
maybeInlineFloat(float f,FloatRegister dest)766   bool maybeInlineFloat(float f, FloatRegister dest) {
767     // See comment above
768     if (mozilla::IsPositiveZero(f)) {
769       zeroFloat32(dest);
770       return true;
771     }
772     return false;
773   }
774 
maybeInlineSimd128Int(const SimdConstant & v,const FloatRegister & dest)775   bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
776     static const SimdConstant zero = SimdConstant::SplatX4(0);
777     static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
778     if (v == zero) {
779       zeroSimd128Int(dest);
780       return true;
781     }
782     if (v == minusOne) {
783       vpcmpeqw(Operand(dest), dest, dest);
784       return true;
785     }
786     return false;
787   }
maybeInlineSimd128Float(const SimdConstant & v,const FloatRegister & dest)788   bool maybeInlineSimd128Float(const SimdConstant& v,
789                                const FloatRegister& dest) {
790     static const SimdConstant zero = SimdConstant::SplatX4(0.f);
791     if (v == zero) {
792       // This won't get inlined if the SimdConstant v contains -0 in any
793       // lane, as operator== here does a memcmp.
794       zeroSimd128Float(dest);
795       return true;
796     }
797     return false;
798   }
799 
convertBoolToInt32(Register source,Register dest)800   void convertBoolToInt32(Register source, Register dest) {
801     // Note that C++ bool is only 1 byte, so zero extend it to clear the
802     // higher-order bits.
803     movzbl(source, dest);
804   }
805 
806   void emitSet(Assembler::Condition cond, Register dest,
807                Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
808     if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
809       // If the register we're defining is a single byte register,
810       // take advantage of the setCC instruction
811       setCC(cond, dest);
812       movzbl(dest, dest);
813 
814       if (ifNaN != Assembler::NaN_HandledByCond) {
815         Label noNaN;
816         j(Assembler::NoParity, &noNaN);
817         mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
818         bind(&noNaN);
819       }
820     } else {
821       Label end;
822       Label ifFalse;
823 
824       if (ifNaN == Assembler::NaN_IsFalse) j(Assembler::Parity, &ifFalse);
825       // Note a subtlety here: FLAGS is live at this point, and the
826       // mov interface doesn't guarantee to preserve FLAGS. Use
827       // movl instead of mov, because the movl instruction
828       // preserves FLAGS.
829       movl(Imm32(1), dest);
830       j(cond, &end);
831       if (ifNaN == Assembler::NaN_IsTrue) j(Assembler::Parity, &end);
832       bind(&ifFalse);
833       mov(ImmWord(0), dest);
834 
835       bind(&end);
836     }
837   }
838 
839   // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
toggledJump(Label * label)840   CodeOffset toggledJump(Label* label) {
841     CodeOffset offset(size());
842     jump(label);
843     return offset;
844   }
845 
846   template <typename T>
computeEffectiveAddress(const T & address,Register dest)847   void computeEffectiveAddress(const T& address, Register dest) {
848     lea(Operand(address), dest);
849   }
850 
checkStackAlignment()851   void checkStackAlignment() {
852     // Exists for ARM compatibility.
853   }
854 
labelForPatch()855   CodeOffset labelForPatch() { return CodeOffset(size()); }
856 
abiret()857   void abiret() { ret(); }
858 
859  protected:
860   bool buildOOLFakeExitFrame(void* fakeReturnAddr);
861 };
862 
863 // Specialize for float to use movaps. Use movdqa for everything else.
864 template <>
865 inline void MacroAssemblerX86Shared::loadAlignedVector<float>(
866     const Address& src, FloatRegister dest) {
867   loadAlignedSimd128Float(src, dest);
868 }
869 
870 template <typename T>
loadAlignedVector(const Address & src,FloatRegister dest)871 inline void MacroAssemblerX86Shared::loadAlignedVector(const Address& src,
872                                                        FloatRegister dest) {
873   loadAlignedSimd128Int(src, dest);
874 }
875 
876 // Specialize for float to use movaps. Use movdqa for everything else.
877 template <>
878 inline void MacroAssemblerX86Shared::storeAlignedVector<float>(
879     FloatRegister src, const Address& dest) {
880   storeAlignedSimd128Float(src, dest);
881 }
882 
883 template <typename T>
storeAlignedVector(FloatRegister src,const Address & dest)884 inline void MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src,
885                                                         const Address& dest) {
886   storeAlignedSimd128Int(src, dest);
887 }
888 
889 template <>
890 inline void MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src,
891                                                         Register dest) {
892   load8ZeroExtend(src, dest);
893 }
894 template <>
895 inline void MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src,
896                                                          Register dest) {
897   load16ZeroExtend(src, dest);
898 }
899 template <>
900 inline void MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src,
901                                                          Register dest) {
902   load32(src, dest);
903 }
904 template <>
905 inline void MacroAssemblerX86Shared::loadScalar<float>(const Operand& src,
906                                                        FloatRegister dest) {
907   loadFloat32(src, dest);
908 }
909 
910 template <>
911 inline void MacroAssemblerX86Shared::storeScalar<int8_t>(Register src,
912                                                          const Address& dest) {
913   store8(src, dest);
914 }
915 template <>
916 inline void MacroAssemblerX86Shared::storeScalar<int16_t>(Register src,
917                                                           const Address& dest) {
918   store16(src, dest);
919 }
920 template <>
921 inline void MacroAssemblerX86Shared::storeScalar<int32_t>(Register src,
922                                                           const Address& dest) {
923   store32(src, dest);
924 }
925 template <>
926 inline void MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src,
927                                                         const Address& dest) {
928   vmovss(src, dest);
929 }
930 
931 }  // namespace jit
932 }  // namespace js
933 
934 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */
935