1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
8 #define jit_x86_shared_MacroAssembler_x86_shared_h
9 
10 #include "mozilla/Casting.h"
11 
12 #if defined(JS_CODEGEN_X86)
13 #  include "jit/x86/Assembler-x86.h"
14 #elif defined(JS_CODEGEN_X64)
15 #  include "jit/x64/Assembler-x64.h"
16 #endif
17 
18 namespace js {
19 namespace jit {
20 
21 class MacroAssembler;
22 
23 class MacroAssemblerX86Shared : public Assembler {
24  private:
25   // Perform a downcast. Should be removed by Bug 996602.
26   MacroAssembler& asMasm();
27   const MacroAssembler& asMasm() const;
28 
29  public:
30   typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
31 
32  protected:
33   // For Double, Float and SimdData, make the move ctors explicit so that MSVC
34   // knows what to use instead of copying these data structures.
35   template <class T>
36   struct Constant {
37     using Pod = T;
38 
39     T value;
40     UsesVector uses;
41 
ConstantConstant42     explicit Constant(const T& value) : value(value) {}
ConstantConstant43     Constant(Constant<T>&& other)
44         : value(other.value), uses(std::move(other.uses)) {}
45     explicit Constant(const Constant<T>&) = delete;
46   };
47 
48   // Containers use SystemAllocPolicy since wasm releases memory after each
49   // function is compiled, and these need to live until after all functions
50   // are compiled.
51   using Double = Constant<double>;
52   Vector<Double, 0, SystemAllocPolicy> doubles_;
53   typedef HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy>
54       DoubleMap;
55   DoubleMap doubleMap_;
56 
57   using Float = Constant<float>;
58   Vector<Float, 0, SystemAllocPolicy> floats_;
59   typedef HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy>
60       FloatMap;
61   FloatMap floatMap_;
62 
63   struct SimdData : public Constant<SimdConstant> {
SimdDataSimdData64     explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
SimdDataSimdData65     SimdData(SimdData&& d) : Constant<SimdConstant>(std::move(d)) {}
66     explicit SimdData(const SimdData&) = delete;
typeSimdData67     SimdConstant::Type type() const { return value.type(); }
68   };
69 
70   Vector<SimdData, 0, SystemAllocPolicy> simds_;
71   typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy>
72       SimdMap;
73   SimdMap simdMap_;
74 
75   template <class T, class Map>
76   T* getConstant(const typename T::Pod& value, Map& map,
77                  Vector<T, 0, SystemAllocPolicy>& vec);
78 
79   Float* getFloat(float f);
80   Double* getDouble(double d);
81   SimdData* getSimdData(const SimdConstant& v);
82 
83  public:
84   using Assembler::call;
85 
86   MacroAssemblerX86Shared() = default;
87 
appendRawCode(const uint8_t * code,size_t numBytes)88   bool appendRawCode(const uint8_t* code, size_t numBytes) {
89     return masm.appendRawCode(code, numBytes);
90   }
91 
addToPCRel4(uint32_t offset,int32_t bias)92   void addToPCRel4(uint32_t offset, int32_t bias) {
93     return masm.addToPCRel4(offset, bias);
94   }
95 
96   // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
97   // Checks for NaN if canBeNaN is true.
98   void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
99                     bool isMax);
100   void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
101                      bool isMax);
102 
compareDouble(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)103   void compareDouble(DoubleCondition cond, FloatRegister lhs,
104                      FloatRegister rhs) {
105     if (cond & DoubleConditionBitInvert) {
106       vucomisd(lhs, rhs);
107     } else {
108       vucomisd(rhs, lhs);
109     }
110   }
111 
compareFloat(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)112   void compareFloat(DoubleCondition cond, FloatRegister lhs,
113                     FloatRegister rhs) {
114     if (cond & DoubleConditionBitInvert) {
115       vucomiss(lhs, rhs);
116     } else {
117       vucomiss(rhs, lhs);
118     }
119   }
120 
121   void branchNegativeZero(FloatRegister reg, Register scratch, Label* label,
122                           bool maybeNonZero = true);
123   void branchNegativeZeroFloat32(FloatRegister reg, Register scratch,
124                                  Label* label);
125 
move32(Imm32 imm,Register dest)126   void move32(Imm32 imm, Register dest) {
127     // Use the ImmWord version of mov to register, which has special
128     // optimizations. Casting to uint32_t here ensures that the value
129     // is zero-extended.
130     mov(ImmWord(uint32_t(imm.value)), dest);
131   }
move32(Imm32 imm,const Operand & dest)132   void move32(Imm32 imm, const Operand& dest) { movl(imm, dest); }
move32(Register src,Register dest)133   void move32(Register src, Register dest) { movl(src, dest); }
move32(Register src,const Operand & dest)134   void move32(Register src, const Operand& dest) { movl(src, dest); }
test32(Register lhs,Register rhs)135   void test32(Register lhs, Register rhs) { testl(rhs, lhs); }
test32(const Address & addr,Imm32 imm)136   void test32(const Address& addr, Imm32 imm) { testl(imm, Operand(addr)); }
test32(const Operand lhs,Imm32 imm)137   void test32(const Operand lhs, Imm32 imm) { testl(imm, lhs); }
test32(Register lhs,Imm32 rhs)138   void test32(Register lhs, Imm32 rhs) { testl(rhs, lhs); }
cmp32(Register lhs,Imm32 rhs)139   void cmp32(Register lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,Register rhs)140   void cmp32(Register lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(const Address & lhs,Register rhs)141   void cmp32(const Address& lhs, Register rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Address & lhs,Imm32 rhs)142   void cmp32(const Address& lhs, Imm32 rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Operand & lhs,Imm32 rhs)143   void cmp32(const Operand& lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(const Operand & lhs,Register rhs)144   void cmp32(const Operand& lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,const Operand & rhs)145   void cmp32(Register lhs, const Operand& rhs) { cmpl(rhs, lhs); }
146 
atomic_inc32(const Operand & addr)147   void atomic_inc32(const Operand& addr) { lock_incl(addr); }
atomic_dec32(const Operand & addr)148   void atomic_dec32(const Operand& addr) { lock_decl(addr); }
149 
storeLoadFence()150   void storeLoadFence() {
151     // This implementation follows Linux.
152     if (HasSSE2()) {
153       masm.mfence();
154     } else {
155       lock_addl(Imm32(0), Operand(Address(esp, 0)));
156     }
157   }
158 
branch16(Condition cond,Register lhs,Register rhs,Label * label)159   void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
160     cmpw(rhs, lhs);
161     j(cond, label);
162   }
branchTest16(Condition cond,Register lhs,Register rhs,Label * label)163   void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
164     testw(rhs, lhs);
165     j(cond, label);
166   }
167 
jump(Label * label)168   void jump(Label* label) { jmp(label); }
jump(JitCode * code)169   void jump(JitCode* code) { jmp(code); }
jump(TrampolinePtr code)170   void jump(TrampolinePtr code) { jmp(ImmPtr(code.value)); }
jump(ImmPtr ptr)171   void jump(ImmPtr ptr) { jmp(ptr); }
jump(Register reg)172   void jump(Register reg) { jmp(Operand(reg)); }
jump(const Address & addr)173   void jump(const Address& addr) { jmp(Operand(addr)); }
174 
convertInt32ToDouble(Register src,FloatRegister dest)175   void convertInt32ToDouble(Register src, FloatRegister dest) {
176     // vcvtsi2sd and friends write only part of their output register, which
177     // causes slowdowns on out-of-order processors. Explicitly break
178     // dependencies with vxorpd (and vxorps elsewhere), which are handled
179     // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
180     // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
181     // document.
182     zeroDouble(dest);
183     vcvtsi2sd(src, dest, dest);
184   }
convertInt32ToDouble(const Address & src,FloatRegister dest)185   void convertInt32ToDouble(const Address& src, FloatRegister dest) {
186     convertInt32ToDouble(Operand(src), dest);
187   }
convertInt32ToDouble(const BaseIndex & src,FloatRegister dest)188   void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
189     convertInt32ToDouble(Operand(src), dest);
190   }
convertInt32ToDouble(const Operand & src,FloatRegister dest)191   void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
192     // Clear the output register first to break dependencies; see above;
193     zeroDouble(dest);
194     vcvtsi2sd(Operand(src), dest, dest);
195   }
convertInt32ToFloat32(Register src,FloatRegister dest)196   void convertInt32ToFloat32(Register src, FloatRegister dest) {
197     // Clear the output register first to break dependencies; see above;
198     zeroFloat32(dest);
199     vcvtsi2ss(src, dest, dest);
200   }
convertInt32ToFloat32(const Address & src,FloatRegister dest)201   void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
202     convertInt32ToFloat32(Operand(src), dest);
203   }
convertInt32ToFloat32(const Operand & src,FloatRegister dest)204   void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
205     // Clear the output register first to break dependencies; see above;
206     zeroFloat32(dest);
207     vcvtsi2ss(src, dest, dest);
208   }
testDoubleTruthy(bool truthy,FloatRegister reg)209   Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
210     ScratchDoubleScope scratch(asMasm());
211     zeroDouble(scratch);
212     vucomisd(reg, scratch);
213     return truthy ? NonZero : Zero;
214   }
215 
216   // Class which ensures that registers used in byte ops are compatible with
217   // such instructions, even if the original register passed in wasn't. This
218   // only applies to x86, as on x64 all registers are valid single byte regs.
219   // This doesn't lead to great code but helps to simplify code generation.
220   //
221   // Note that this can currently only be used in cases where the register is
222   // read from by the guarded instruction, not written to.
223   class AutoEnsureByteRegister {
224     MacroAssemblerX86Shared* masm;
225     Register original_;
226     Register substitute_;
227 
228    public:
229     template <typename T>
AutoEnsureByteRegister(MacroAssemblerX86Shared * masm,T address,Register reg)230     AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address,
231                            Register reg)
232         : masm(masm), original_(reg) {
233       AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
234       if (singleByteRegs.has(reg)) {
235         substitute_ = reg;
236       } else {
237         MOZ_ASSERT(address.base != StackPointer);
238         do {
239           substitute_ = singleByteRegs.takeAny();
240         } while (Operand(address).containsReg(substitute_));
241 
242         masm->push(substitute_);
243         masm->mov(reg, substitute_);
244       }
245     }
246 
~AutoEnsureByteRegister()247     ~AutoEnsureByteRegister() {
248       if (original_ != substitute_) {
249         masm->pop(substitute_);
250       }
251     }
252 
reg()253     Register reg() { return substitute_; }
254   };
255 
load8ZeroExtend(const Operand & src,Register dest)256   void load8ZeroExtend(const Operand& src, Register dest) { movzbl(src, dest); }
load8ZeroExtend(const Address & src,Register dest)257   void load8ZeroExtend(const Address& src, Register dest) {
258     movzbl(Operand(src), dest);
259   }
load8ZeroExtend(const BaseIndex & src,Register dest)260   void load8ZeroExtend(const BaseIndex& src, Register dest) {
261     movzbl(Operand(src), dest);
262   }
load8SignExtend(const Operand & src,Register dest)263   void load8SignExtend(const Operand& src, Register dest) { movsbl(src, dest); }
load8SignExtend(const Address & src,Register dest)264   void load8SignExtend(const Address& src, Register dest) {
265     movsbl(Operand(src), dest);
266   }
load8SignExtend(const BaseIndex & src,Register dest)267   void load8SignExtend(const BaseIndex& src, Register dest) {
268     movsbl(Operand(src), dest);
269   }
270   template <typename T>
store8(Imm32 src,const T & dest)271   void store8(Imm32 src, const T& dest) {
272     movb(src, Operand(dest));
273   }
274   template <typename T>
store8(Register src,const T & dest)275   void store8(Register src, const T& dest) {
276     AutoEnsureByteRegister ensure(this, dest, src);
277     movb(ensure.reg(), Operand(dest));
278   }
load16ZeroExtend(const Operand & src,Register dest)279   void load16ZeroExtend(const Operand& src, Register dest) {
280     movzwl(src, dest);
281   }
load16ZeroExtend(const Address & src,Register dest)282   void load16ZeroExtend(const Address& src, Register dest) {
283     movzwl(Operand(src), dest);
284   }
load16ZeroExtend(const BaseIndex & src,Register dest)285   void load16ZeroExtend(const BaseIndex& src, Register dest) {
286     movzwl(Operand(src), dest);
287   }
288   template <typename S>
load16UnalignedZeroExtend(const S & src,Register dest)289   void load16UnalignedZeroExtend(const S& src, Register dest) {
290     load16ZeroExtend(src, dest);
291   }
292   template <typename S, typename T>
store16(const S & src,const T & dest)293   void store16(const S& src, const T& dest) {
294     movw(src, Operand(dest));
295   }
296   template <typename S, typename T>
store16Unaligned(const S & src,const T & dest)297   void store16Unaligned(const S& src, const T& dest) {
298     store16(src, dest);
299   }
load16SignExtend(const Operand & src,Register dest)300   void load16SignExtend(const Operand& src, Register dest) {
301     movswl(src, dest);
302   }
load16SignExtend(const Address & src,Register dest)303   void load16SignExtend(const Address& src, Register dest) {
304     movswl(Operand(src), dest);
305   }
load16SignExtend(const BaseIndex & src,Register dest)306   void load16SignExtend(const BaseIndex& src, Register dest) {
307     movswl(Operand(src), dest);
308   }
309   template <typename S>
load16UnalignedSignExtend(const S & src,Register dest)310   void load16UnalignedSignExtend(const S& src, Register dest) {
311     load16SignExtend(src, dest);
312   }
load32(const Address & address,Register dest)313   void load32(const Address& address, Register dest) {
314     movl(Operand(address), dest);
315   }
load32(const BaseIndex & src,Register dest)316   void load32(const BaseIndex& src, Register dest) { movl(Operand(src), dest); }
load32(const Operand & src,Register dest)317   void load32(const Operand& src, Register dest) { movl(src, dest); }
318   template <typename S>
load32Unaligned(const S & src,Register dest)319   void load32Unaligned(const S& src, Register dest) {
320     load32(src, dest);
321   }
322   template <typename S, typename T>
store32(const S & src,const T & dest)323   void store32(const S& src, const T& dest) {
324     movl(src, Operand(dest));
325   }
326   template <typename S, typename T>
store32_NoSecondScratch(const S & src,const T & dest)327   void store32_NoSecondScratch(const S& src, const T& dest) {
328     store32(src, dest);
329   }
330   template <typename S, typename T>
store32Unaligned(const S & src,const T & dest)331   void store32Unaligned(const S& src, const T& dest) {
332     store32(src, dest);
333   }
loadDouble(const Address & src,FloatRegister dest)334   void loadDouble(const Address& src, FloatRegister dest) { vmovsd(src, dest); }
loadDouble(const BaseIndex & src,FloatRegister dest)335   void loadDouble(const BaseIndex& src, FloatRegister dest) {
336     vmovsd(src, dest);
337   }
loadDouble(const Operand & src,FloatRegister dest)338   void loadDouble(const Operand& src, FloatRegister dest) {
339     switch (src.kind()) {
340       case Operand::MEM_REG_DISP:
341         loadDouble(src.toAddress(), dest);
342         break;
343       case Operand::MEM_SCALE:
344         loadDouble(src.toBaseIndex(), dest);
345         break;
346       default:
347         MOZ_CRASH("unexpected operand kind");
348     }
349   }
moveDouble(FloatRegister src,FloatRegister dest)350   void moveDouble(FloatRegister src, FloatRegister dest) {
351     // Use vmovapd instead of vmovsd to avoid dependencies.
352     vmovapd(src, dest);
353   }
zeroDouble(FloatRegister reg)354   void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); }
zeroFloat32(FloatRegister reg)355   void zeroFloat32(FloatRegister reg) { vxorps(reg, reg, reg); }
convertFloat32ToDouble(FloatRegister src,FloatRegister dest)356   void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
357     vcvtss2sd(src, dest, dest);
358   }
convertDoubleToFloat32(FloatRegister src,FloatRegister dest)359   void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
360     vcvtsd2ss(src, dest, dest);
361   }
362 
loadInt32x4(const Address & addr,FloatRegister dest)363   void loadInt32x4(const Address& addr, FloatRegister dest) {
364     vmovdqa(Operand(addr), dest);
365   }
loadFloat32x4(const Address & addr,FloatRegister dest)366   void loadFloat32x4(const Address& addr, FloatRegister dest) {
367     vmovaps(Operand(addr), dest);
368   }
storeInt32x4(FloatRegister src,const Address & addr)369   void storeInt32x4(FloatRegister src, const Address& addr) {
370     vmovdqa(src, Operand(addr));
371   }
storeFloat32x4(FloatRegister src,const Address & addr)372   void storeFloat32x4(FloatRegister src, const Address& addr) {
373     vmovaps(src, Operand(addr));
374   }
375 
convertFloat32x4ToInt32x4(FloatRegister src,FloatRegister dest)376   void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
377     // Note that if the conversion failed (because the converted
378     // result is larger than the maximum signed int32, or less than the
379     // least signed int32, or NaN), this will return the undefined integer
380     // value (0x8000000).
381     vcvttps2dq(src, dest);
382   }
convertInt32x4ToFloat32x4(FloatRegister src,FloatRegister dest)383   void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
384     vcvtdq2ps(src, dest);
385   }
386 
387   // SIMD methods, defined in MacroAssembler-x86-shared-SIMD.cpp.
388   void checkedConvertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest,
389                                         Register temp, Label* oolCheck,
390                                         Label* rejoin);
391   void oolConvertFloat32x4ToInt32x4(FloatRegister src, Register temp,
392                                     Label* rejoin, Label* onConversionError);
393   void checkedConvertFloat32x4ToUint32x4(FloatRegister src, FloatRegister dest,
394                                          Register temp, FloatRegister tempF,
395                                          Label* failed);
396 
397   void unsignedConvertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest);
398 
399   void truncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest);
400   void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister temp,
401                                           FloatRegister dest);
402 
403   void createInt32x4(Register lane0, Register lane1, Register lane2,
404                      Register lane3, FloatRegister dest);
405   void createFloat32x4(FloatRegister lane0, FloatRegister lane1,
406                        FloatRegister lane2, FloatRegister lane3,
407                        FloatRegister temp, FloatRegister output);
408 
409   void splatX16(Register input, FloatRegister output);
410   void splatX8(Register input, FloatRegister output);
411   void splatX4(Register input, FloatRegister output);
412   void splatX4(FloatRegister input, FloatRegister output);
413   void splatX2(FloatRegister input, FloatRegister output);
414 
415   void reinterpretSimd(bool isIntegerLaneType, FloatRegister input,
416                        FloatRegister output);
417 
418   void extractLaneInt32x4(FloatRegister input, Register output, unsigned lane);
419   void extractLaneFloat32x4(FloatRegister input, FloatRegister output,
420                             unsigned lane);
421   void extractLaneFloat64x2(FloatRegister input, FloatRegister output,
422                             unsigned lane);
423   void extractLaneInt16x8(FloatRegister input, Register output, unsigned lane,
424                           SimdSign sign);
425   void extractLaneInt8x16(FloatRegister input, Register output, unsigned lane,
426                           SimdSign sign);
427   void extractLaneSimdBool(FloatRegister input, Register output,
428                            unsigned numLanes, unsigned lane);
429 
430   void insertLaneSimdInt(FloatRegister input, Register value,
431                          FloatRegister output, unsigned lane,
432                          unsigned numLanes);
433   void insertLaneFloat32x4(FloatRegister input, FloatRegister value,
434                            FloatRegister output, unsigned lane);
435   void insertLaneFloat64x2(FloatRegister input, FloatRegister value,
436                            FloatRegister output, unsigned lane);
437 
438   void allTrueSimdBool(FloatRegister input, Register output);
439   void anyTrueSimdBool(FloatRegister input, Register output);
440 
441   void swizzleInt32x4(FloatRegister input, FloatRegister output,
442                       unsigned lanes[4]);
443   void swizzleFloat32x4(FloatRegister input, FloatRegister output,
444                         unsigned lanes[4]);
445   void oldSwizzleInt8x16(FloatRegister input, FloatRegister output,
446                          const mozilla::Maybe<Register>& temp,
447                          int8_t lanes[16]);
448 
449   void shuffleX4(FloatRegister lhs, Operand rhs, FloatRegister out,
450                  const mozilla::Maybe<FloatRegister>& maybeTemp,
451                  unsigned lanes[4]);
452   void shuffleInt8x16(FloatRegister lhs, FloatRegister rhs,
453                       FloatRegister output,
454                       const mozilla::Maybe<FloatRegister>& maybeFloatTemp,
455                       const mozilla::Maybe<Register>& maybeTemp,
456                       const uint8_t lanes[16]);
457   void blendInt8x16(FloatRegister lhs, FloatRegister rhs, FloatRegister output,
458                     FloatRegister temp, const uint8_t lanes[16]);
459   void blendInt16x8(FloatRegister lhs, FloatRegister rhs, FloatRegister output,
460                     const uint16_t lanes[8]);
461 
462   void compareInt8x16(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
463                       FloatRegister output);
464   void unsignedCompareInt8x16(FloatRegister lhs, Operand rhs,
465                               Assembler::Condition cond, FloatRegister output,
466                               FloatRegister tmp1, FloatRegister tmp2);
467   void compareInt16x8(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
468                       FloatRegister output);
469   void unsignedCompareInt16x8(FloatRegister lhs, Operand rhs,
470                               Assembler::Condition cond, FloatRegister output,
471                               FloatRegister tmp1, FloatRegister tmp2);
472   void compareInt32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
473                       FloatRegister output);
474   void unsignedCompareInt32x4(FloatRegister lhs, Operand rhs,
475                               Assembler::Condition cond, FloatRegister output,
476                               FloatRegister tmp1, FloatRegister tmp2);
477   void compareFloat32x4(FloatRegister lhs, Operand rhs,
478                         Assembler::Condition cond, FloatRegister output);
479   void compareFloat64x2(FloatRegister lhs, Operand rhs,
480                         Assembler::Condition cond, FloatRegister output);
481 
482   void mulInt32x4(FloatRegister lhs, Operand rhs,
483                   const mozilla::Maybe<FloatRegister>& temp,
484                   FloatRegister output);
485 
486   void negFloat32x4(Operand in, FloatRegister out);
487   void negFloat64x2(Operand in, FloatRegister out);
488 
489   void notInt8x16(Operand in, FloatRegister out);
490   void notInt16x8(Operand in, FloatRegister out);
491   void notInt32x4(Operand in, FloatRegister out);
492   void notFloat32x4(Operand in, FloatRegister out);
493 
494   void minFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output);
495   void maxFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
496                     FloatRegister output);
497   void minNumFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
498                        FloatRegister output);
499   void maxNumFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
500                        FloatRegister output);
501 
502   void minFloat64x2(FloatRegister lhs, Operand rhs, FloatRegister output);
503   void maxFloat64x2(FloatRegister lhs, Operand rhs, FloatRegister temp,
504                     FloatRegister output);
505 
506   void absFloat32x4(Operand in, FloatRegister out);
507   void absFloat64x2(Operand in, FloatRegister out);
508 
bitwiseAndFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)509   void bitwiseAndFloat32x4(FloatRegister lhs, const Operand& rhs,
510                            FloatRegister dest) {
511     vandps(rhs, lhs, dest);
512   }
bitwiseAndSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)513   void bitwiseAndSimdInt(FloatRegister lhs, const Operand& rhs,
514                          FloatRegister dest) {
515     vpand(rhs, lhs, dest);
516   }
517 
bitwiseOrFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)518   void bitwiseOrFloat32x4(FloatRegister lhs, const Operand& rhs,
519                           FloatRegister dest) {
520     vorps(rhs, lhs, dest);
521   }
bitwiseOrSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)522   void bitwiseOrSimdInt(FloatRegister lhs, const Operand& rhs,
523                         FloatRegister dest) {
524     vpor(rhs, lhs, dest);
525   }
526 
bitwiseXorFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)527   void bitwiseXorFloat32x4(FloatRegister lhs, const Operand& rhs,
528                            FloatRegister dest) {
529     vxorps(rhs, lhs, dest);
530   }
bitwiseXorSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)531   void bitwiseXorSimdInt(FloatRegister lhs, const Operand& rhs,
532                          FloatRegister dest) {
533     vpxor(rhs, lhs, dest);
534   }
535 
bitwiseAndNotFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)536   void bitwiseAndNotFloat32x4(FloatRegister lhs, const Operand& rhs,
537                               FloatRegister dest) {
538     vandnps(rhs, lhs, dest);
539   }
bitwiseAndNotSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)540   void bitwiseAndNotSimdInt(FloatRegister lhs, const Operand& rhs,
541                             FloatRegister dest) {
542     vpandn(rhs, lhs, dest);
543   }
544 
zeroSimd128Float(FloatRegister dest)545   void zeroSimd128Float(FloatRegister dest) { vxorps(dest, dest, dest); }
zeroSimd128Int(FloatRegister dest)546   void zeroSimd128Int(FloatRegister dest) { vpxor(dest, dest, dest); }
547 
548   void selectSimd128(FloatRegister mask, FloatRegister onTrue,
549                      FloatRegister onFalse, FloatRegister temp,
550                      FloatRegister output);
selectX4(FloatRegister mask,FloatRegister onTrue,FloatRegister onFalse,FloatRegister temp,FloatRegister output)551   void selectX4(FloatRegister mask, FloatRegister onTrue, FloatRegister onFalse,
552                 FloatRegister temp, FloatRegister output) {
553     if (AssemblerX86Shared::HasAVX()) {
554       vblendvps(mask, onTrue, onFalse, output);
555     } else {
556       selectSimd128(mask, onTrue, onFalse, temp, output);
557     }
558   }
559 
560   template <class T, class Reg>
561   inline void loadScalar(const Operand& src, Reg dest);
562   template <class T, class Reg>
563   inline void storeScalar(Reg src, const Address& dest);
564   template <class T>
565   inline void loadAlignedVector(const Address& src, FloatRegister dest);
566   template <class T>
567   inline void storeAlignedVector(FloatRegister src, const Address& dest);
568 
loadAlignedSimd128Int(const Address & src,FloatRegister dest)569   void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
570     vmovdqa(Operand(src), dest);
571   }
loadAlignedSimd128Int(const Operand & src,FloatRegister dest)572   void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
573     vmovdqa(src, dest);
574   }
storeAlignedSimd128Int(FloatRegister src,const Address & dest)575   void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
576     vmovdqa(src, Operand(dest));
577   }
moveSimd128Int(FloatRegister src,FloatRegister dest)578   void moveSimd128Int(FloatRegister src, FloatRegister dest) {
579     vmovdqa(src, dest);
580   }
reusedInputInt32x4(FloatRegister src,FloatRegister dest)581   FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
582     if (HasAVX()) {
583       return src;
584     }
585     moveSimd128Int(src, dest);
586     return dest;
587   }
reusedInputAlignedInt32x4(const Operand & src,FloatRegister dest)588   FloatRegister reusedInputAlignedInt32x4(const Operand& src,
589                                           FloatRegister dest) {
590     if (HasAVX() && src.kind() == Operand::FPREG) {
591       return FloatRegister::FromCode(src.fpu());
592     }
593     loadAlignedSimd128Int(src, dest);
594     return dest;
595   }
loadUnalignedSimd128Int(const Address & src,FloatRegister dest)596   void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
597     vmovdqu(Operand(src), dest);
598   }
loadUnalignedSimd128Int(const BaseIndex & src,FloatRegister dest)599   void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
600     vmovdqu(Operand(src), dest);
601   }
loadUnalignedSimd128Int(const Operand & src,FloatRegister dest)602   void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
603     vmovdqu(src, dest);
604   }
storeUnalignedSimd128Int(FloatRegister src,const Address & dest)605   void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
606     vmovdqu(src, Operand(dest));
607   }
storeUnalignedSimd128Int(FloatRegister src,const BaseIndex & dest)608   void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
609     vmovdqu(src, Operand(dest));
610   }
storeUnalignedSimd128Int(FloatRegister src,const Operand & dest)611   void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
612     vmovdqu(src, dest);
613   }
packedEqualInt32x4(const Operand & src,FloatRegister dest)614   void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
615     vpcmpeqd(src, dest, dest);
616   }
packedGreaterThanInt32x4(const Operand & src,FloatRegister dest)617   void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
618     vpcmpgtd(src, dest, dest);
619   }
packedAddInt8(const Operand & src,FloatRegister dest)620   void packedAddInt8(const Operand& src, FloatRegister dest) {
621     vpaddb(src, dest, dest);
622   }
packedSubInt8(const Operand & src,FloatRegister dest)623   void packedSubInt8(const Operand& src, FloatRegister dest) {
624     vpsubb(src, dest, dest);
625   }
packedAddInt16(const Operand & src,FloatRegister dest)626   void packedAddInt16(const Operand& src, FloatRegister dest) {
627     vpaddw(src, dest, dest);
628   }
packedSubInt16(const Operand & src,FloatRegister dest)629   void packedSubInt16(const Operand& src, FloatRegister dest) {
630     vpsubw(src, dest, dest);
631   }
packedAddInt32(const Operand & src,FloatRegister dest)632   void packedAddInt32(const Operand& src, FloatRegister dest) {
633     vpaddd(src, dest, dest);
634   }
packedSubInt32(const Operand & src,FloatRegister dest)635   void packedSubInt32(const Operand& src, FloatRegister dest) {
636     vpsubd(src, dest, dest);
637   }
packedRcpApproximationFloat32x4(const Operand & src,FloatRegister dest)638   void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
639     // This function is an approximation of the result, this might need
640     // fix up if the spec requires a given precision for this operation.
641     // TODO See also bug 1068028.
642     vrcpps(src, dest);
643   }
packedRcpSqrtApproximationFloat32x4(const Operand & src,FloatRegister dest)644   void packedRcpSqrtApproximationFloat32x4(const Operand& src,
645                                            FloatRegister dest) {
646     // TODO See comment above. See also bug 1068028.
647     vrsqrtps(src, dest);
648   }
649 
650  private:
651   void packedShiftByScalarInt8x16(
652       FloatRegister in, Register count, Register temp, FloatRegister xtmp,
653       FloatRegister dest,
654       void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister,
655                                              FloatRegister),
656       void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister));
657 
658  public:
659   void packedLeftShiftByScalarInt8x16(FloatRegister in, Register count,
660                                       Register temp, FloatRegister xtmp,
661                                       FloatRegister dest);
662   void packedLeftShiftByScalarInt8x16(Imm32 count, FloatRegister src,
663                                       FloatRegister dest);
664   void packedRightShiftByScalarInt8x16(FloatRegister in, Register count,
665                                        Register temp, FloatRegister xtmp,
666                                        FloatRegister dest);
667   void packedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src,
668                                        FloatRegister temp, FloatRegister dest);
669   void packedUnsignedRightShiftByScalarInt8x16(FloatRegister in, Register count,
670                                                Register temp,
671                                                FloatRegister xtmp,
672                                                FloatRegister dest);
673   void packedUnsignedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src,
674                                                FloatRegister dest);
675 
676   void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count,
677                                       Register temp, FloatRegister dest);
678   void packedRightShiftByScalarInt16x8(FloatRegister in, Register count,
679                                        Register temp, FloatRegister dest);
680   void packedUnsignedRightShiftByScalarInt16x8(FloatRegister in, Register count,
681                                                Register temp,
682                                                FloatRegister dest);
683 
packedLeftShiftByScalarInt16x8(Imm32 count,FloatRegister dest)684   void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
685     count.value &= 15;
686     vpsllw(count, dest, dest);
687   }
packedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)688   void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
689     count.value &= 15;
690     vpsraw(count, dest, dest);
691   }
packedUnsignedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)692   void packedUnsignedRightShiftByScalarInt16x8(Imm32 count,
693                                                FloatRegister dest) {
694     count.value &= 15;
695     vpsrlw(count, dest, dest);
696   }
697 
698   void packedLeftShiftByScalarInt32x4(FloatRegister in, Register count,
699                                       Register temp, FloatRegister dest);
700   void packedRightShiftByScalarInt32x4(FloatRegister in, Register count,
701                                        Register temp, FloatRegister dest);
702   void packedUnsignedRightShiftByScalarInt32x4(FloatRegister in, Register count,
703                                                Register temp,
704                                                FloatRegister dest);
705   void packedLeftShiftByScalarInt64x2(FloatRegister in, Register count,
706                                       Register temp, FloatRegister dest);
707   void packedRightShiftByScalarInt64x2(FloatRegister in, Register count,
708                                        Register temp1, FloatRegister temp2,
709                                        FloatRegister dest);
710   void packedUnsignedRightShiftByScalarInt64x2(FloatRegister in, Register count,
711                                                Register temp,
712                                                FloatRegister dest);
713 
packedLeftShiftByScalarInt32x4(Imm32 count,FloatRegister dest)714   void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
715     count.value &= 31;
716     vpslld(count, dest, dest);
717   }
packedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)718   void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
719     count.value &= 31;
720     vpsrad(count, dest, dest);
721   }
packedUnsignedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)722   void packedUnsignedRightShiftByScalarInt32x4(Imm32 count,
723                                                FloatRegister dest) {
724     count.value &= 31;
725     vpsrld(count, dest, dest);
726   }
727 
loadAlignedSimd128Float(const Address & src,FloatRegister dest)728   void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
729     vmovaps(Operand(src), dest);
730   }
loadAlignedSimd128Float(const Operand & src,FloatRegister dest)731   void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
732     vmovaps(src, dest);
733   }
734 
storeAlignedSimd128Float(FloatRegister src,const Address & dest)735   void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
736     vmovaps(src, Operand(dest));
737   }
moveSimd128Float(FloatRegister src,FloatRegister dest)738   void moveSimd128Float(FloatRegister src, FloatRegister dest) {
739     vmovaps(src, dest);
740   }
reusedInputSimd128Float(FloatRegister src,FloatRegister dest)741   FloatRegister reusedInputSimd128Float(FloatRegister src, FloatRegister dest) {
742     if (HasAVX()) {
743       return src;
744     }
745     moveSimd128Float(src, dest);
746     return dest;
747   }
reusedInputAlignedSimd128Float(const Operand & src,FloatRegister dest)748   FloatRegister reusedInputAlignedSimd128Float(const Operand& src,
749                                                FloatRegister dest) {
750     if (HasAVX() && src.kind() == Operand::FPREG) {
751       return FloatRegister::FromCode(src.fpu());
752     }
753     loadAlignedSimd128Float(src, dest);
754     return dest;
755   }
loadUnalignedSimd128(const Operand & src,FloatRegister dest)756   void loadUnalignedSimd128(const Operand& src, FloatRegister dest) {
757     vmovups(src, dest);
758   }
storeUnalignedSimd128(FloatRegister src,const Operand & dest)759   void storeUnalignedSimd128(FloatRegister src, const Operand& dest) {
760     vmovups(src, dest);
761   }
packedAddFloat32(const Operand & src,FloatRegister dest)762   void packedAddFloat32(const Operand& src, FloatRegister dest) {
763     vaddps(src, dest, dest);
764   }
packedSubFloat32(const Operand & src,FloatRegister dest)765   void packedSubFloat32(const Operand& src, FloatRegister dest) {
766     vsubps(src, dest, dest);
767   }
packedMulFloat32(const Operand & src,FloatRegister dest)768   void packedMulFloat32(const Operand& src, FloatRegister dest) {
769     vmulps(src, dest, dest);
770   }
packedDivFloat32(const Operand & src,FloatRegister dest)771   void packedDivFloat32(const Operand& src, FloatRegister dest) {
772     vdivps(src, dest, dest);
773   }
774 
775   static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
776                                      uint32_t z = 2, uint32_t w = 3) {
777     MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
778     uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
779     MOZ_ASSERT(r < 256);
780     return r;
781   }
782 
shuffleInt32(uint32_t mask,FloatRegister src,FloatRegister dest)783   void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
784     vpshufd(mask, src, dest);
785   }
moveLowInt32(FloatRegister src,Register dest)786   void moveLowInt32(FloatRegister src, Register dest) { vmovd(src, dest); }
787 
moveHighPairToLowPairFloat32(FloatRegister src,FloatRegister dest)788   void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
789     vmovhlps(src, dest, dest);
790   }
shuffleFloat32(uint32_t mask,FloatRegister src,FloatRegister dest)791   void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
792     // The shuffle instruction on x86 is such that it moves 2 words from
793     // the dest and 2 words from the src operands. To simplify things, just
794     // clobber the output with the input and apply the instruction
795     // afterwards.
796     // Note: this is useAtStart-safe because src isn't read afterwards.
797     FloatRegister srcCopy = reusedInputSimd128Float(src, dest);
798     vshufps(mask, srcCopy, srcCopy, dest);
799   }
shuffleMix(uint32_t mask,const Operand & src,FloatRegister dest)800   void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
801     // Note this uses vshufps, which is a cross-domain penalty on CPU where it
802     // applies, but that's the way clang and gcc do it.
803     vshufps(mask, src, dest, dest);
804   }
805 
moveFloatAsDouble(Register src,FloatRegister dest)806   void moveFloatAsDouble(Register src, FloatRegister dest) {
807     vmovd(src, dest);
808     vcvtss2sd(dest, dest, dest);
809   }
loadFloatAsDouble(const Address & src,FloatRegister dest)810   void loadFloatAsDouble(const Address& src, FloatRegister dest) {
811     vmovss(src, dest);
812     vcvtss2sd(dest, dest, dest);
813   }
loadFloatAsDouble(const BaseIndex & src,FloatRegister dest)814   void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
815     vmovss(src, dest);
816     vcvtss2sd(dest, dest, dest);
817   }
loadFloatAsDouble(const Operand & src,FloatRegister dest)818   void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
819     loadFloat32(src, dest);
820     vcvtss2sd(dest, dest, dest);
821   }
loadFloat32(const Address & src,FloatRegister dest)822   void loadFloat32(const Address& src, FloatRegister dest) {
823     vmovss(src, dest);
824   }
loadFloat32(const BaseIndex & src,FloatRegister dest)825   void loadFloat32(const BaseIndex& src, FloatRegister dest) {
826     vmovss(src, dest);
827   }
loadFloat32(const Operand & src,FloatRegister dest)828   void loadFloat32(const Operand& src, FloatRegister dest) {
829     switch (src.kind()) {
830       case Operand::MEM_REG_DISP:
831         loadFloat32(src.toAddress(), dest);
832         break;
833       case Operand::MEM_SCALE:
834         loadFloat32(src.toBaseIndex(), dest);
835         break;
836       default:
837         MOZ_CRASH("unexpected operand kind");
838     }
839   }
moveFloat32(FloatRegister src,FloatRegister dest)840   void moveFloat32(FloatRegister src, FloatRegister dest) {
841     // Use vmovaps instead of vmovss to avoid dependencies.
842     vmovaps(src, dest);
843   }
844 
845   // Checks whether a double is representable as a 32-bit integer. If so, the
846   // integer is written to the output register. Otherwise, a bailout is taken to
847   // the given snapshot. This function overwrites the scratch float register.
848   void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
849                             bool negativeZeroCheck = true) {
850     // Check for -0.0
851     if (negativeZeroCheck) {
852       branchNegativeZero(src, dest, fail);
853     }
854 
855     ScratchDoubleScope scratch(asMasm());
856     vcvttsd2si(src, dest);
857     convertInt32ToDouble(dest, scratch);
858     vucomisd(scratch, src);
859     j(Assembler::Parity, fail);
860     j(Assembler::NotEqual, fail);
861   }
862 
863   // Checks whether a float32 is representable as a 32-bit integer. If so, the
864   // integer is written to the output register. Otherwise, a bailout is taken to
865   // the given snapshot. This function overwrites the scratch float register.
866   void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
867                              bool negativeZeroCheck = true) {
868     // Check for -0.0
869     if (negativeZeroCheck) {
870       branchNegativeZeroFloat32(src, dest, fail);
871     }
872 
873     ScratchFloat32Scope scratch(asMasm());
874     vcvttss2si(src, dest);
875     convertInt32ToFloat32(dest, scratch);
876     vucomiss(scratch, src);
877     j(Assembler::Parity, fail);
878     j(Assembler::NotEqual, fail);
879   }
880 
truncateDoubleToInt32(FloatRegister src,Register dest,Label * fail)881   void truncateDoubleToInt32(FloatRegister src, Register dest, Label* fail) {
882     // vcvttsd2si returns 0x80000000 on failure. Test for it by
883     // subtracting 1 and testing overflow. The other possibility is to test
884     // equality for INT_MIN after a comparison, but 1 costs fewer bytes to
885     // materialize.
886     vcvttsd2si(src, dest);
887     cmp32(dest, Imm32(1));
888     j(Assembler::Overflow, fail);
889   }
truncateFloat32ToInt32(FloatRegister src,Register dest,Label * fail)890   void truncateFloat32ToInt32(FloatRegister src, Register dest, Label* fail) {
891     // Same trick as explained in the above comment.
892     vcvttss2si(src, dest);
893     cmp32(dest, Imm32(1));
894     j(Assembler::Overflow, fail);
895   }
896 
897   inline void clampIntToUint8(Register reg);
898 
maybeInlineDouble(double d,FloatRegister dest)899   bool maybeInlineDouble(double d, FloatRegister dest) {
900     // Loading zero with xor is specially optimized in hardware.
901     if (mozilla::IsPositiveZero(d)) {
902       zeroDouble(dest);
903       return true;
904     }
905 
906     // It is also possible to load several common constants using vpcmpeqw
907     // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
908     // as described in "13.4 Generating constants" of
909     // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
910     // previously implemented here. However, with x86 and x64 both using
911     // constant pool loads for double constants, this is probably only
912     // worthwhile in cases where a load is likely to be delayed.
913 
914     return false;
915   }
916 
maybeInlineFloat(float f,FloatRegister dest)917   bool maybeInlineFloat(float f, FloatRegister dest) {
918     // See comment above
919     if (mozilla::IsPositiveZero(f)) {
920       zeroFloat32(dest);
921       return true;
922     }
923     return false;
924   }
925 
maybeInlineSimd128Int(const SimdConstant & v,const FloatRegister & dest)926   bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
927     static const SimdConstant zero = SimdConstant::SplatX4(0);
928     static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
929     if (v == zero) {
930       zeroSimd128Int(dest);
931       return true;
932     }
933     if (v == minusOne) {
934       vpcmpeqw(Operand(dest), dest, dest);
935       return true;
936     }
937     return false;
938   }
maybeInlineSimd128Float(const SimdConstant & v,const FloatRegister & dest)939   bool maybeInlineSimd128Float(const SimdConstant& v,
940                                const FloatRegister& dest) {
941     static const SimdConstant zero = SimdConstant::SplatX4(0.f);
942     if (v == zero) {
943       // This won't get inlined if the SimdConstant v contains -0 in any
944       // lane, as operator== here does a memcmp.
945       zeroSimd128Float(dest);
946       return true;
947     }
948     return false;
949   }
950 
convertBoolToInt32(Register source,Register dest)951   void convertBoolToInt32(Register source, Register dest) {
952     // Note that C++ bool is only 1 byte, so zero extend it to clear the
953     // higher-order bits.
954     movzbl(source, dest);
955   }
956 
957   void emitSet(Assembler::Condition cond, Register dest,
958                Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
959     if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
960       // If the register we're defining is a single byte register,
961       // take advantage of the setCC instruction
962       setCC(cond, dest);
963       movzbl(dest, dest);
964 
965       if (ifNaN != Assembler::NaN_HandledByCond) {
966         Label noNaN;
967         j(Assembler::NoParity, &noNaN);
968         mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
969         bind(&noNaN);
970       }
971     } else {
972       Label end;
973       Label ifFalse;
974 
975       if (ifNaN == Assembler::NaN_IsFalse) {
976         j(Assembler::Parity, &ifFalse);
977       }
978       // Note a subtlety here: FLAGS is live at this point, and the
979       // mov interface doesn't guarantee to preserve FLAGS. Use
980       // movl instead of mov, because the movl instruction
981       // preserves FLAGS.
982       movl(Imm32(1), dest);
983       j(cond, &end);
984       if (ifNaN == Assembler::NaN_IsTrue) {
985         j(Assembler::Parity, &end);
986       }
987       bind(&ifFalse);
988       mov(ImmWord(0), dest);
989 
990       bind(&end);
991     }
992   }
993 
994   // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
toggledJump(Label * label)995   CodeOffset toggledJump(Label* label) {
996     CodeOffset offset(size());
997     jump(label);
998     return offset;
999   }
1000 
1001   template <typename T>
computeEffectiveAddress(const T & address,Register dest)1002   void computeEffectiveAddress(const T& address, Register dest) {
1003     lea(Operand(address), dest);
1004   }
1005 
checkStackAlignment()1006   void checkStackAlignment() {
1007     // Exists for ARM compatibility.
1008   }
1009 
abiret()1010   void abiret() { ret(); }
1011 
1012  protected:
1013   bool buildOOLFakeExitFrame(void* fakeReturnAddr);
1014 };
1015 
1016 // Specialize for float to use movaps. Use movdqa for everything else.
1017 template <>
1018 inline void MacroAssemblerX86Shared::loadAlignedVector<float>(
1019     const Address& src, FloatRegister dest) {
1020   loadAlignedSimd128Float(src, dest);
1021 }
1022 
1023 template <typename T>
loadAlignedVector(const Address & src,FloatRegister dest)1024 inline void MacroAssemblerX86Shared::loadAlignedVector(const Address& src,
1025                                                        FloatRegister dest) {
1026   loadAlignedSimd128Int(src, dest);
1027 }
1028 
1029 // Specialize for float to use movaps. Use movdqa for everything else.
1030 template <>
1031 inline void MacroAssemblerX86Shared::storeAlignedVector<float>(
1032     FloatRegister src, const Address& dest) {
1033   storeAlignedSimd128Float(src, dest);
1034 }
1035 
1036 template <typename T>
storeAlignedVector(FloatRegister src,const Address & dest)1037 inline void MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src,
1038                                                         const Address& dest) {
1039   storeAlignedSimd128Int(src, dest);
1040 }
1041 
1042 template <>
1043 inline void MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src,
1044                                                         Register dest) {
1045   load8ZeroExtend(src, dest);
1046 }
1047 template <>
1048 inline void MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src,
1049                                                          Register dest) {
1050   load16ZeroExtend(src, dest);
1051 }
1052 template <>
1053 inline void MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src,
1054                                                          Register dest) {
1055   load32(src, dest);
1056 }
1057 template <>
1058 inline void MacroAssemblerX86Shared::loadScalar<float>(const Operand& src,
1059                                                        FloatRegister dest) {
1060   loadFloat32(src, dest);
1061 }
1062 
1063 template <>
1064 inline void MacroAssemblerX86Shared::storeScalar<int8_t>(Register src,
1065                                                          const Address& dest) {
1066   store8(src, dest);
1067 }
1068 template <>
1069 inline void MacroAssemblerX86Shared::storeScalar<int16_t>(Register src,
1070                                                           const Address& dest) {
1071   store16(src, dest);
1072 }
1073 template <>
1074 inline void MacroAssemblerX86Shared::storeScalar<int32_t>(Register src,
1075                                                           const Address& dest) {
1076   store32(src, dest);
1077 }
1078 template <>
1079 inline void MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src,
1080                                                         const Address& dest) {
1081   vmovss(src, dest);
1082 }
1083 
1084 }  // namespace jit
1085 }  // namespace js
1086 
1087 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */
1088