1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
8 #define jit_x86_shared_MacroAssembler_x86_shared_h
9 
10 #include "mozilla/Casting.h"
11 
12 #if defined(JS_CODEGEN_X86)
13 # include "jit/x86/Assembler-x86.h"
14 #elif defined(JS_CODEGEN_X64)
15 # include "jit/x64/Assembler-x64.h"
16 #endif
17 
18 #ifdef DEBUG
19   #define CHECK_BYTEREG(reg)                                               \
20       JS_BEGIN_MACRO                                                       \
21         AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
22         MOZ_ASSERT(byteRegs.has(reg));                                     \
23       JS_END_MACRO
24   #define CHECK_BYTEREGS(r1, r2)                                           \
25       JS_BEGIN_MACRO                                                       \
26         AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
27         MOZ_ASSERT(byteRegs.has(r1));                                      \
28         MOZ_ASSERT(byteRegs.has(r2));                                      \
29       JS_END_MACRO
30 #else
31   #define CHECK_BYTEREG(reg) (void)0
32   #define CHECK_BYTEREGS(r1, r2) (void)0
33 #endif
34 
35 namespace js {
36 namespace jit {
37 
38 class MacroAssembler;
39 
40 class MacroAssemblerX86Shared : public Assembler
41 {
42   private:
43     // Perform a downcast. Should be removed by Bug 996602.
44     MacroAssembler& asMasm();
45     const MacroAssembler& asMasm() const;
46 
47   public:
48     typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
49 
50   protected:
51 
52     // For Double, Float and SimdData, make the move ctors explicit so that MSVC
53     // knows what to use instead of copying these data structures.
54     template<class T>
55     struct Constant {
56         typedef T Pod;
57 
58         T value;
59         UsesVector uses;
60 
ConstantConstant61         explicit Constant(const T& value) : value(value) {}
ConstantConstant62         Constant(Constant<T>&& other) : value(other.value), uses(mozilla::Move(other.uses)) {}
63         explicit Constant(const Constant<T>&) = delete;
64     };
65 
66     // Containers use SystemAllocPolicy since wasm releases memory after each
67     // function is compiled, and these need to live until after all functions
68     // are compiled.
69     using Double = Constant<uint64_t>;
70     Vector<Double, 0, SystemAllocPolicy> doubles_;
71     typedef HashMap<uint64_t, size_t, DefaultHasher<uint64_t>, SystemAllocPolicy> DoubleMap;
72     DoubleMap doubleMap_;
73 
74     using Float = Constant<uint32_t>;
75     Vector<Float, 0, SystemAllocPolicy> floats_;
76     typedef HashMap<uint32_t, size_t, DefaultHasher<uint32_t>, SystemAllocPolicy> FloatMap;
77     FloatMap floatMap_;
78 
79     struct SimdData : public Constant<SimdConstant> {
SimdDataSimdData80         explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
SimdDataSimdData81         SimdData(SimdData&& d) : Constant<SimdConstant>(mozilla::Move(d)) {}
82         explicit SimdData(const SimdData&) = delete;
typeSimdData83         SimdConstant::Type type() const { return value.type(); }
84     };
85 
86     Vector<SimdData, 0, SystemAllocPolicy> simds_;
87     typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy> SimdMap;
88     SimdMap simdMap_;
89 
90     template<class T, class Map>
91     T* getConstant(const typename T::Pod& value, Map& map, Vector<T, 0, SystemAllocPolicy>& vec);
92 
93     Float* getFloat(wasm::RawF32 f);
94     Double* getDouble(wasm::RawF64 d);
95     SimdData* getSimdData(const SimdConstant& v);
96 
97   public:
98     using Assembler::call;
99 
MacroAssemblerX86Shared()100     MacroAssemblerX86Shared()
101     { }
102 
103     bool asmMergeWith(const MacroAssemblerX86Shared& other);
104 
105     // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
106     // Checks for NaN if canBeNaN is true.
107     void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
108     void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
109 
compareDouble(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)110     void compareDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
111         if (cond & DoubleConditionBitInvert)
112             vucomisd(lhs, rhs);
113         else
114             vucomisd(rhs, lhs);
115     }
116 
compareFloat(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)117     void compareFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
118         if (cond & DoubleConditionBitInvert)
119             vucomiss(lhs, rhs);
120         else
121             vucomiss(rhs, lhs);
122     }
123 
124     void branchNegativeZero(FloatRegister reg, Register scratch, Label* label, bool  maybeNonZero = true);
125     void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label* label);
126 
move32(Imm32 imm,Register dest)127     void move32(Imm32 imm, Register dest) {
128         // Use the ImmWord version of mov to register, which has special
129         // optimizations. Casting to uint32_t here ensures that the value
130         // is zero-extended.
131         mov(ImmWord(uint32_t(imm.value)), dest);
132     }
move32(Imm32 imm,const Operand & dest)133     void move32(Imm32 imm, const Operand& dest) {
134         movl(imm, dest);
135     }
move32(Register src,Register dest)136     void move32(Register src, Register dest) {
137         movl(src, dest);
138     }
move32(Register src,const Operand & dest)139     void move32(Register src, const Operand& dest) {
140         movl(src, dest);
141     }
test32(Register lhs,Register rhs)142     void test32(Register lhs, Register rhs) {
143         testl(rhs, lhs);
144     }
test32(const Address & addr,Imm32 imm)145     void test32(const Address& addr, Imm32 imm) {
146         testl(imm, Operand(addr));
147     }
test32(const Operand lhs,Imm32 imm)148     void test32(const Operand lhs, Imm32 imm) {
149         testl(imm, lhs);
150     }
test32(Register lhs,Imm32 rhs)151     void test32(Register lhs, Imm32 rhs) {
152         testl(rhs, lhs);
153     }
cmp32(Register lhs,Imm32 rhs)154     void cmp32(Register lhs, Imm32 rhs) {
155         cmpl(rhs, lhs);
156     }
cmp32(Register lhs,Register rhs)157     void cmp32(Register lhs, Register rhs) {
158         cmpl(rhs, lhs);
159     }
cmp32(const Address & lhs,Register rhs)160     void cmp32(const Address& lhs, Register rhs) {
161         cmp32(Operand(lhs), rhs);
162     }
cmp32(const Address & lhs,Imm32 rhs)163     void cmp32(const Address& lhs, Imm32 rhs) {
164         cmp32(Operand(lhs), rhs);
165     }
cmp32(const Operand & lhs,Imm32 rhs)166     void cmp32(const Operand& lhs, Imm32 rhs) {
167         cmpl(rhs, lhs);
168     }
cmp32(const Operand & lhs,Register rhs)169     void cmp32(const Operand& lhs, Register rhs) {
170         cmpl(rhs, lhs);
171     }
cmp32(Register lhs,const Operand & rhs)172     void cmp32(Register lhs, const Operand& rhs) {
173         cmpl(rhs, lhs);
174     }
cmp32WithPatch(Register lhs,Imm32 rhs)175     CodeOffset cmp32WithPatch(Register lhs, Imm32 rhs) {
176         return cmplWithPatch(rhs, lhs);
177     }
atomic_inc32(const Operand & addr)178     void atomic_inc32(const Operand& addr) {
179         lock_incl(addr);
180     }
atomic_dec32(const Operand & addr)181     void atomic_dec32(const Operand& addr) {
182         lock_decl(addr);
183     }
184 
185     template <typename T>
atomicFetchAdd8SignExtend(Register src,const T & mem,Register temp,Register output)186     void atomicFetchAdd8SignExtend(Register src, const T& mem, Register temp, Register output) {
187         CHECK_BYTEREGS(src, output);
188         if (src != output)
189             movl(src, output);
190         lock_xaddb(output, Operand(mem));
191         movsbl(output, output);
192     }
193 
194     template <typename T>
atomicFetchAdd8ZeroExtend(Register src,const T & mem,Register temp,Register output)195     void atomicFetchAdd8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
196         CHECK_BYTEREGS(src, output);
197         MOZ_ASSERT(temp == InvalidReg);
198         if (src != output)
199             movl(src, output);
200         lock_xaddb(output, Operand(mem));
201         movzbl(output, output);
202     }
203 
204     template <typename T>
atomicFetchAdd8SignExtend(Imm32 src,const T & mem,Register temp,Register output)205     void atomicFetchAdd8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
206         CHECK_BYTEREG(output);
207         MOZ_ASSERT(temp == InvalidReg);
208         movb(src, output);
209         lock_xaddb(output, Operand(mem));
210         movsbl(output, output);
211     }
212 
213     template <typename T>
atomicFetchAdd8ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)214     void atomicFetchAdd8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
215         CHECK_BYTEREG(output);
216         MOZ_ASSERT(temp == InvalidReg);
217         movb(src, output);
218         lock_xaddb(output, Operand(mem));
219         movzbl(output, output);
220     }
221 
222     template <typename T>
atomicFetchAdd16SignExtend(Register src,const T & mem,Register temp,Register output)223     void atomicFetchAdd16SignExtend(Register src, const T& mem, Register temp, Register output) {
224         MOZ_ASSERT(temp == InvalidReg);
225         if (src != output)
226             movl(src, output);
227         lock_xaddw(output, Operand(mem));
228         movswl(output, output);
229     }
230 
231     template <typename T>
atomicFetchAdd16ZeroExtend(Register src,const T & mem,Register temp,Register output)232     void atomicFetchAdd16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
233         MOZ_ASSERT(temp == InvalidReg);
234         if (src != output)
235             movl(src, output);
236         lock_xaddw(output, Operand(mem));
237         movzwl(output, output);
238     }
239 
240     template <typename T>
atomicFetchAdd16SignExtend(Imm32 src,const T & mem,Register temp,Register output)241     void atomicFetchAdd16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
242         MOZ_ASSERT(temp == InvalidReg);
243         movl(src, output);
244         lock_xaddw(output, Operand(mem));
245         movswl(output, output);
246     }
247 
248     template <typename T>
atomicFetchAdd16ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)249     void atomicFetchAdd16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
250         MOZ_ASSERT(temp == InvalidReg);
251         movl(src, output);
252         lock_xaddw(output, Operand(mem));
253         movzwl(output, output);
254     }
255 
256     template <typename T>
atomicFetchAdd32(Register src,const T & mem,Register temp,Register output)257     void atomicFetchAdd32(Register src, const T& mem, Register temp, Register output) {
258         MOZ_ASSERT(temp == InvalidReg);
259         if (src != output)
260             movl(src, output);
261         lock_xaddl(output, Operand(mem));
262     }
263 
264     template <typename T>
atomicFetchAdd32(Imm32 src,const T & mem,Register temp,Register output)265     void atomicFetchAdd32(Imm32 src, const T& mem, Register temp, Register output) {
266         MOZ_ASSERT(temp == InvalidReg);
267         movl(src, output);
268         lock_xaddl(output, Operand(mem));
269     }
270 
271     template <typename T>
atomicFetchSub8SignExtend(Register src,const T & mem,Register temp,Register output)272     void atomicFetchSub8SignExtend(Register src, const T& mem, Register temp, Register output) {
273         CHECK_BYTEREGS(src, output);
274         MOZ_ASSERT(temp == InvalidReg);
275         if (src != output)
276             movl(src, output);
277         negl(output);
278         lock_xaddb(output, Operand(mem));
279         movsbl(output, output);
280     }
281 
282     template <typename T>
atomicFetchSub8ZeroExtend(Register src,const T & mem,Register temp,Register output)283     void atomicFetchSub8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
284         CHECK_BYTEREGS(src, output);
285         MOZ_ASSERT(temp == InvalidReg);
286         if (src != output)
287             movl(src, output);
288         negl(output);
289         lock_xaddb(output, Operand(mem));
290         movzbl(output, output);
291     }
292 
293     template <typename T>
atomicFetchSub8SignExtend(Imm32 src,const T & mem,Register temp,Register output)294     void atomicFetchSub8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
295         CHECK_BYTEREG(output);
296         MOZ_ASSERT(temp == InvalidReg);
297         movb(Imm32(-src.value), output);
298         lock_xaddb(output, Operand(mem));
299         movsbl(output, output);
300     }
301 
302     template <typename T>
atomicFetchSub8ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)303     void atomicFetchSub8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
304         CHECK_BYTEREG(output);
305         MOZ_ASSERT(temp == InvalidReg);
306         movb(Imm32(-src.value), output);
307         lock_xaddb(output, Operand(mem));
308         movzbl(output, output);
309     }
310 
311     template <typename T>
atomicFetchSub16SignExtend(Register src,const T & mem,Register temp,Register output)312     void atomicFetchSub16SignExtend(Register src, const T& mem, Register temp, Register output) {
313         MOZ_ASSERT(temp == InvalidReg);
314         if (src != output)
315             movl(src, output);
316         negl(output);
317         lock_xaddw(output, Operand(mem));
318         movswl(output, output);
319     }
320 
321     template <typename T>
atomicFetchSub16ZeroExtend(Register src,const T & mem,Register temp,Register output)322     void atomicFetchSub16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
323         MOZ_ASSERT(temp == InvalidReg);
324         if (src != output)
325             movl(src, output);
326         negl(output);
327         lock_xaddw(output, Operand(mem));
328         movzwl(output, output);
329     }
330 
331     template <typename T>
atomicFetchSub16SignExtend(Imm32 src,const T & mem,Register temp,Register output)332     void atomicFetchSub16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
333         MOZ_ASSERT(temp == InvalidReg);
334         movl(Imm32(-src.value), output);
335         lock_xaddw(output, Operand(mem));
336         movswl(output, output);
337     }
338 
339     template <typename T>
atomicFetchSub16ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)340     void atomicFetchSub16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
341         MOZ_ASSERT(temp == InvalidReg);
342         movl(Imm32(-src.value), output);
343         lock_xaddw(output, Operand(mem));
344         movzwl(output, output);
345     }
346 
347     template <typename T>
atomicFetchSub32(Register src,const T & mem,Register temp,Register output)348     void atomicFetchSub32(Register src, const T& mem, Register temp, Register output) {
349         MOZ_ASSERT(temp == InvalidReg);
350         if (src != output)
351             movl(src, output);
352         negl(output);
353         lock_xaddl(output, Operand(mem));
354     }
355 
356     template <typename T>
atomicFetchSub32(Imm32 src,const T & mem,Register temp,Register output)357     void atomicFetchSub32(Imm32 src, const T& mem, Register temp, Register output) {
358         movl(Imm32(-src.value), output);
359         lock_xaddl(output, Operand(mem));
360     }
361 
362     // requires output == eax
363 #define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \
364         MOZ_ASSERT(output == eax);                \
365         LOAD(Operand(mem), eax);                  \
366         Label again;                              \
367         bind(&again);                             \
368         movl(eax, temp);                          \
369         OP(src, temp);                            \
370         LOCK_CMPXCHG(temp, Operand(mem));         \
371         j(NonZero, &again);
372 
373     template <typename S, typename T>
atomicFetchAnd8SignExtend(const S & src,const T & mem,Register temp,Register output)374     void atomicFetchAnd8SignExtend(const S& src, const T& mem, Register temp, Register output) {
375         ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
376         CHECK_BYTEREG(temp);
377         movsbl(eax, eax);
378     }
379     template <typename S, typename T>
atomicFetchAnd8ZeroExtend(const S & src,const T & mem,Register temp,Register output)380     void atomicFetchAnd8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
381         ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
382         CHECK_BYTEREG(temp);
383         movzbl(eax, eax);
384     }
385     template <typename S, typename T>
atomicFetchAnd16SignExtend(const S & src,const T & mem,Register temp,Register output)386     void atomicFetchAnd16SignExtend(const S& src, const T& mem, Register temp, Register output) {
387         ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
388         movswl(eax, eax);
389     }
390     template <typename S, typename T>
atomicFetchAnd16ZeroExtend(const S & src,const T & mem,Register temp,Register output)391     void atomicFetchAnd16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
392         ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
393         movzwl(eax, eax);
394     }
395     template <typename S, typename T>
atomicFetchAnd32(const S & src,const T & mem,Register temp,Register output)396     void atomicFetchAnd32(const S& src, const T& mem, Register temp, Register output) {
397         ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl)
398     }
399 
400     template <typename S, typename T>
atomicFetchOr8SignExtend(const S & src,const T & mem,Register temp,Register output)401     void atomicFetchOr8SignExtend(const S& src, const T& mem, Register temp, Register output) {
402         ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
403         CHECK_BYTEREG(temp);
404         movsbl(eax, eax);
405     }
406     template <typename S, typename T>
atomicFetchOr8ZeroExtend(const S & src,const T & mem,Register temp,Register output)407     void atomicFetchOr8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
408         ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
409         CHECK_BYTEREG(temp);
410         movzbl(eax, eax);
411     }
412     template <typename S, typename T>
atomicFetchOr16SignExtend(const S & src,const T & mem,Register temp,Register output)413     void atomicFetchOr16SignExtend(const S& src, const T& mem, Register temp, Register output) {
414         ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
415         movswl(eax, eax);
416     }
417     template <typename S, typename T>
atomicFetchOr16ZeroExtend(const S & src,const T & mem,Register temp,Register output)418     void atomicFetchOr16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
419         ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
420         movzwl(eax, eax);
421     }
422     template <typename S, typename T>
atomicFetchOr32(const S & src,const T & mem,Register temp,Register output)423     void atomicFetchOr32(const S& src, const T& mem, Register temp, Register output) {
424         ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl)
425     }
426 
427     template <typename S, typename T>
atomicFetchXor8SignExtend(const S & src,const T & mem,Register temp,Register output)428     void atomicFetchXor8SignExtend(const S& src, const T& mem, Register temp, Register output) {
429         ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
430         CHECK_BYTEREG(temp);
431         movsbl(eax, eax);
432     }
433     template <typename S, typename T>
atomicFetchXor8ZeroExtend(const S & src,const T & mem,Register temp,Register output)434     void atomicFetchXor8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
435         ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
436         CHECK_BYTEREG(temp);
437         movzbl(eax, eax);
438     }
439     template <typename S, typename T>
atomicFetchXor16SignExtend(const S & src,const T & mem,Register temp,Register output)440     void atomicFetchXor16SignExtend(const S& src, const T& mem, Register temp, Register output) {
441         ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
442         movswl(eax, eax);
443     }
444     template <typename S, typename T>
atomicFetchXor16ZeroExtend(const S & src,const T & mem,Register temp,Register output)445     void atomicFetchXor16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
446         ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
447         movzwl(eax, eax);
448     }
449     template <typename S, typename T>
atomicFetchXor32(const S & src,const T & mem,Register temp,Register output)450     void atomicFetchXor32(const S& src, const T& mem, Register temp, Register output) {
451         ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchgl)
452     }
453 
454 #undef ATOMIC_BITOP_BODY
455 
456     // S is Register or Imm32; T is Address or BaseIndex.
457 
458     template <typename S, typename T>
atomicAdd8(const S & src,const T & mem)459     void atomicAdd8(const S& src, const T& mem) {
460         lock_addb(src, Operand(mem));
461     }
462     template <typename S, typename T>
atomicAdd16(const S & src,const T & mem)463     void atomicAdd16(const S& src, const T& mem) {
464         lock_addw(src, Operand(mem));
465     }
466     template <typename S, typename T>
atomicAdd32(const S & src,const T & mem)467     void atomicAdd32(const S& src, const T& mem) {
468         lock_addl(src, Operand(mem));
469     }
470     template <typename S, typename T>
atomicSub8(const S & src,const T & mem)471     void atomicSub8(const S& src, const T& mem) {
472         lock_subb(src, Operand(mem));
473     }
474     template <typename S, typename T>
atomicSub16(const S & src,const T & mem)475     void atomicSub16(const S& src, const T& mem) {
476         lock_subw(src, Operand(mem));
477     }
478     template <typename S, typename T>
atomicSub32(const S & src,const T & mem)479     void atomicSub32(const S& src, const T& mem) {
480         lock_subl(src, Operand(mem));
481     }
482     template <typename S, typename T>
atomicAnd8(const S & src,const T & mem)483     void atomicAnd8(const S& src, const T& mem) {
484         lock_andb(src, Operand(mem));
485     }
486     template <typename S, typename T>
atomicAnd16(const S & src,const T & mem)487     void atomicAnd16(const S& src, const T& mem) {
488         lock_andw(src, Operand(mem));
489     }
490     template <typename S, typename T>
atomicAnd32(const S & src,const T & mem)491     void atomicAnd32(const S& src, const T& mem) {
492         lock_andl(src, Operand(mem));
493     }
494     template <typename S, typename T>
atomicOr8(const S & src,const T & mem)495     void atomicOr8(const S& src, const T& mem) {
496         lock_orb(src, Operand(mem));
497     }
498     template <typename S, typename T>
atomicOr16(const S & src,const T & mem)499     void atomicOr16(const S& src, const T& mem) {
500         lock_orw(src, Operand(mem));
501     }
502     template <typename S, typename T>
atomicOr32(const S & src,const T & mem)503     void atomicOr32(const S& src, const T& mem) {
504         lock_orl(src, Operand(mem));
505     }
506     template <typename S, typename T>
atomicXor8(const S & src,const T & mem)507     void atomicXor8(const S& src, const T& mem) {
508         lock_xorb(src, Operand(mem));
509     }
510     template <typename S, typename T>
atomicXor16(const S & src,const T & mem)511     void atomicXor16(const S& src, const T& mem) {
512         lock_xorw(src, Operand(mem));
513     }
514     template <typename S, typename T>
atomicXor32(const S & src,const T & mem)515     void atomicXor32(const S& src, const T& mem) {
516         lock_xorl(src, Operand(mem));
517     }
518 
storeLoadFence()519     void storeLoadFence() {
520         // This implementation follows Linux.
521         if (HasSSE2())
522             masm.mfence();
523         else
524             lock_addl(Imm32(0), Operand(Address(esp, 0)));
525     }
526 
branch16(Condition cond,Register lhs,Register rhs,Label * label)527     void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
528         cmpw(rhs, lhs);
529         j(cond, label);
530     }
branchTest16(Condition cond,Register lhs,Register rhs,Label * label)531     void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
532         testw(rhs, lhs);
533         j(cond, label);
534     }
535 
jump(Label * label)536     void jump(Label* label) {
537         jmp(label);
538     }
jump(JitCode * code)539     void jump(JitCode* code) {
540         jmp(code);
541     }
jump(RepatchLabel * label)542     void jump(RepatchLabel* label) {
543         jmp(label);
544     }
jump(Register reg)545     void jump(Register reg) {
546         jmp(Operand(reg));
547     }
jump(const Address & addr)548     void jump(const Address& addr) {
549         jmp(Operand(addr));
550     }
jump(wasm::TrapDesc target)551     void jump(wasm::TrapDesc target) {
552         jmp(target);
553     }
554 
convertInt32ToDouble(Register src,FloatRegister dest)555     void convertInt32ToDouble(Register src, FloatRegister dest) {
556         // vcvtsi2sd and friends write only part of their output register, which
557         // causes slowdowns on out-of-order processors. Explicitly break
558         // dependencies with vxorpd (and vxorps elsewhere), which are handled
559         // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
560         // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
561         // document.
562         zeroDouble(dest);
563         vcvtsi2sd(src, dest, dest);
564     }
convertInt32ToDouble(const Address & src,FloatRegister dest)565     void convertInt32ToDouble(const Address& src, FloatRegister dest) {
566         convertInt32ToDouble(Operand(src), dest);
567     }
convertInt32ToDouble(const BaseIndex & src,FloatRegister dest)568     void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
569         convertInt32ToDouble(Operand(src), dest);
570     }
convertInt32ToDouble(const Operand & src,FloatRegister dest)571     void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
572         // Clear the output register first to break dependencies; see above;
573         zeroDouble(dest);
574         vcvtsi2sd(Operand(src), dest, dest);
575     }
convertInt32ToFloat32(Register src,FloatRegister dest)576     void convertInt32ToFloat32(Register src, FloatRegister dest) {
577         // Clear the output register first to break dependencies; see above;
578         zeroFloat32(dest);
579         vcvtsi2ss(src, dest, dest);
580     }
convertInt32ToFloat32(const Address & src,FloatRegister dest)581     void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
582         convertInt32ToFloat32(Operand(src), dest);
583     }
convertInt32ToFloat32(const Operand & src,FloatRegister dest)584     void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
585         // Clear the output register first to break dependencies; see above;
586         zeroFloat32(dest);
587         vcvtsi2ss(src, dest, dest);
588     }
testDoubleTruthy(bool truthy,FloatRegister reg)589     Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
590         ScratchDoubleScope scratch(asMasm());
591         zeroDouble(scratch);
592         vucomisd(reg, scratch);
593         return truthy ? NonZero : Zero;
594     }
595 
596     // Class which ensures that registers used in byte ops are compatible with
597     // such instructions, even if the original register passed in wasn't. This
598     // only applies to x86, as on x64 all registers are valid single byte regs.
599     // This doesn't lead to great code but helps to simplify code generation.
600     //
601     // Note that this can currently only be used in cases where the register is
602     // read from by the guarded instruction, not written to.
603     class AutoEnsureByteRegister {
604         MacroAssemblerX86Shared* masm;
605         Register original_;
606         Register substitute_;
607 
608       public:
609         template <typename T>
AutoEnsureByteRegister(MacroAssemblerX86Shared * masm,T address,Register reg)610         AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address, Register reg)
611           : masm(masm), original_(reg)
612         {
613             AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
614             if (singleByteRegs.has(reg)) {
615                 substitute_ = reg;
616             } else {
617                 MOZ_ASSERT(address.base != StackPointer);
618                 do {
619                     substitute_ = singleByteRegs.takeAny();
620                 } while (Operand(address).containsReg(substitute_));
621 
622                 masm->push(substitute_);
623                 masm->mov(reg, substitute_);
624             }
625         }
626 
~AutoEnsureByteRegister()627         ~AutoEnsureByteRegister() {
628             if (original_ != substitute_)
629                 masm->pop(substitute_);
630         }
631 
reg()632         Register reg() {
633             return substitute_;
634         }
635     };
636 
load8ZeroExtend(const Operand & src,Register dest)637     void load8ZeroExtend(const Operand& src, Register dest) {
638         movzbl(src, dest);
639     }
load8ZeroExtend(const Address & src,Register dest)640     void load8ZeroExtend(const Address& src, Register dest) {
641         movzbl(Operand(src), dest);
642     }
load8ZeroExtend(const BaseIndex & src,Register dest)643     void load8ZeroExtend(const BaseIndex& src, Register dest) {
644         movzbl(Operand(src), dest);
645     }
load8SignExtend(const Operand & src,Register dest)646     void load8SignExtend(const Operand& src, Register dest) {
647         movsbl(src, dest);
648     }
load8SignExtend(const Address & src,Register dest)649     void load8SignExtend(const Address& src, Register dest) {
650         movsbl(Operand(src), dest);
651     }
load8SignExtend(const BaseIndex & src,Register dest)652     void load8SignExtend(const BaseIndex& src, Register dest) {
653         movsbl(Operand(src), dest);
654     }
655     template <typename T>
store8(Imm32 src,const T & dest)656     void store8(Imm32 src, const T& dest) {
657         movb(src, Operand(dest));
658     }
659     template <typename T>
store8(Register src,const T & dest)660     void store8(Register src, const T& dest) {
661         AutoEnsureByteRegister ensure(this, dest, src);
662         movb(ensure.reg(), Operand(dest));
663     }
664     template <typename T>
compareExchange8ZeroExtend(const T & mem,Register oldval,Register newval,Register output)665     void compareExchange8ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
666         MOZ_ASSERT(output == eax);
667         CHECK_BYTEREG(newval);
668         if (oldval != output)
669             movl(oldval, output);
670         lock_cmpxchgb(newval, Operand(mem));
671         movzbl(output, output);
672     }
673     template <typename T>
compareExchange8SignExtend(const T & mem,Register oldval,Register newval,Register output)674     void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output) {
675         MOZ_ASSERT(output == eax);
676         CHECK_BYTEREG(newval);
677         if (oldval != output)
678             movl(oldval, output);
679         lock_cmpxchgb(newval, Operand(mem));
680         movsbl(output, output);
681     }
682     template <typename T>
atomicExchange8ZeroExtend(const T & mem,Register value,Register output)683     void atomicExchange8ZeroExtend(const T& mem, Register value, Register output) {
684         if (value != output)
685             movl(value, output);
686         xchgb(output, Operand(mem));
687         movzbl(output, output);
688     }
689     template <typename T>
atomicExchange8SignExtend(const T & mem,Register value,Register output)690     void atomicExchange8SignExtend(const T& mem, Register value, Register output) {
691         if (value != output)
692             movl(value, output);
693         xchgb(output, Operand(mem));
694         movsbl(output, output);
695     }
load16ZeroExtend(const Operand & src,Register dest)696     void load16ZeroExtend(const Operand& src, Register dest) {
697         movzwl(src, dest);
698     }
load16ZeroExtend(const Address & src,Register dest)699     void load16ZeroExtend(const Address& src, Register dest) {
700         movzwl(Operand(src), dest);
701     }
load16ZeroExtend(const BaseIndex & src,Register dest)702     void load16ZeroExtend(const BaseIndex& src, Register dest) {
703         movzwl(Operand(src), dest);
704     }
705     template <typename S, typename T>
store16(const S & src,const T & dest)706     void store16(const S& src, const T& dest) {
707         movw(src, Operand(dest));
708     }
709     template <typename T>
compareExchange16ZeroExtend(const T & mem,Register oldval,Register newval,Register output)710     void compareExchange16ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
711         MOZ_ASSERT(output == eax);
712         if (oldval != output)
713             movl(oldval, output);
714         lock_cmpxchgw(newval, Operand(mem));
715         movzwl(output, output);
716     }
717     template <typename T>
compareExchange16SignExtend(const T & mem,Register oldval,Register newval,Register output)718     void compareExchange16SignExtend(const T& mem, Register oldval, Register newval, Register output) {
719         MOZ_ASSERT(output == eax);
720         if (oldval != output)
721             movl(oldval, output);
722         lock_cmpxchgw(newval, Operand(mem));
723         movswl(output, output);
724     }
725     template <typename T>
atomicExchange16ZeroExtend(const T & mem,Register value,Register output)726     void atomicExchange16ZeroExtend(const T& mem, Register value, Register output) {
727         if (value != output)
728             movl(value, output);
729         xchgw(output, Operand(mem));
730         movzwl(output, output);
731     }
732     template <typename T>
atomicExchange16SignExtend(const T & mem,Register value,Register output)733     void atomicExchange16SignExtend(const T& mem, Register value, Register output) {
734         if (value != output)
735             movl(value, output);
736         xchgw(output, Operand(mem));
737         movswl(output, output);
738     }
load16SignExtend(const Operand & src,Register dest)739     void load16SignExtend(const Operand& src, Register dest) {
740         movswl(src, dest);
741     }
load16SignExtend(const Address & src,Register dest)742     void load16SignExtend(const Address& src, Register dest) {
743         movswl(Operand(src), dest);
744     }
load16SignExtend(const BaseIndex & src,Register dest)745     void load16SignExtend(const BaseIndex& src, Register dest) {
746         movswl(Operand(src), dest);
747     }
load32(const Address & address,Register dest)748     void load32(const Address& address, Register dest) {
749         movl(Operand(address), dest);
750     }
load32(const BaseIndex & src,Register dest)751     void load32(const BaseIndex& src, Register dest) {
752         movl(Operand(src), dest);
753     }
load32(const Operand & src,Register dest)754     void load32(const Operand& src, Register dest) {
755         movl(src, dest);
756     }
757     template <typename S, typename T>
store32(const S & src,const T & dest)758     void store32(const S& src, const T& dest) {
759         movl(src, Operand(dest));
760     }
761     template <typename T>
compareExchange32(const T & mem,Register oldval,Register newval,Register output)762     void compareExchange32(const T& mem, Register oldval, Register newval, Register output) {
763         MOZ_ASSERT(output == eax);
764         if (oldval != output)
765             movl(oldval, output);
766         lock_cmpxchgl(newval, Operand(mem));
767     }
768     template <typename T>
atomicExchange32(const T & mem,Register value,Register output)769     void atomicExchange32(const T& mem, Register value, Register output) {
770         if (value != output)
771             movl(value, output);
772         xchgl(output, Operand(mem));
773     }
774     template <typename S, typename T>
store32_NoSecondScratch(const S & src,const T & dest)775     void store32_NoSecondScratch(const S& src, const T& dest) {
776         store32(src, dest);
777     }
loadDouble(const Address & src,FloatRegister dest)778     void loadDouble(const Address& src, FloatRegister dest) {
779         vmovsd(src, dest);
780     }
loadDouble(const BaseIndex & src,FloatRegister dest)781     void loadDouble(const BaseIndex& src, FloatRegister dest) {
782         vmovsd(src, dest);
783     }
loadDouble(const Operand & src,FloatRegister dest)784     void loadDouble(const Operand& src, FloatRegister dest) {
785         switch (src.kind()) {
786           case Operand::MEM_REG_DISP:
787             loadDouble(src.toAddress(), dest);
788             break;
789           case Operand::MEM_SCALE:
790             loadDouble(src.toBaseIndex(), dest);
791             break;
792           default:
793             MOZ_CRASH("unexpected operand kind");
794         }
795     }
moveDouble(FloatRegister src,FloatRegister dest)796     void moveDouble(FloatRegister src, FloatRegister dest) {
797         // Use vmovapd instead of vmovsd to avoid dependencies.
798         vmovapd(src, dest);
799     }
zeroDouble(FloatRegister reg)800     void zeroDouble(FloatRegister reg) {
801         vxorpd(reg, reg, reg);
802     }
zeroFloat32(FloatRegister reg)803     void zeroFloat32(FloatRegister reg) {
804         vxorps(reg, reg, reg);
805     }
convertFloat32ToDouble(FloatRegister src,FloatRegister dest)806     void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
807         vcvtss2sd(src, dest, dest);
808     }
convertDoubleToFloat32(FloatRegister src,FloatRegister dest)809     void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
810         vcvtsd2ss(src, dest, dest);
811     }
812 
convertFloat32x4ToInt32x4(FloatRegister src,FloatRegister dest)813     void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
814         // Note that if the conversion failed (because the converted
815         // result is larger than the maximum signed int32, or less than the
816         // least signed int32, or NaN), this will return the undefined integer
817         // value (0x8000000).
818         vcvttps2dq(src, dest);
819     }
convertInt32x4ToFloat32x4(FloatRegister src,FloatRegister dest)820     void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
821         vcvtdq2ps(src, dest);
822     }
823 
bitwiseAndSimd128(const Operand & src,FloatRegister dest)824     void bitwiseAndSimd128(const Operand& src, FloatRegister dest) {
825         // TODO Using the "ps" variant for all types incurs a domain crossing
826         // penalty for integer types and double.
827         vandps(src, dest, dest);
828     }
bitwiseAndNotSimd128(const Operand & src,FloatRegister dest)829     void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) {
830         vandnps(src, dest, dest);
831     }
bitwiseOrSimd128(const Operand & src,FloatRegister dest)832     void bitwiseOrSimd128(const Operand& src, FloatRegister dest) {
833         vorps(src, dest, dest);
834     }
bitwiseXorSimd128(const Operand & src,FloatRegister dest)835     void bitwiseXorSimd128(const Operand& src, FloatRegister dest) {
836         vxorps(src, dest, dest);
837     }
zeroSimd128Float(FloatRegister dest)838     void zeroSimd128Float(FloatRegister dest) {
839         vxorps(dest, dest, dest);
840     }
zeroSimd128Int(FloatRegister dest)841     void zeroSimd128Int(FloatRegister dest) {
842         vpxor(dest, dest, dest);
843     }
844 
845     template <class T, class Reg> inline void loadScalar(const Operand& src, Reg dest);
846     template <class T, class Reg> inline void storeScalar(Reg src, const Address& dest);
847     template <class T> inline void loadAlignedVector(const Address& src, FloatRegister dest);
848     template <class T> inline void storeAlignedVector(FloatRegister src, const Address& dest);
849 
loadInt32x1(const Address & src,FloatRegister dest)850     void loadInt32x1(const Address& src, FloatRegister dest) {
851         vmovd(Operand(src), dest);
852     }
loadInt32x1(const BaseIndex & src,FloatRegister dest)853     void loadInt32x1(const BaseIndex& src, FloatRegister dest) {
854         vmovd(Operand(src), dest);
855     }
loadInt32x2(const Address & src,FloatRegister dest)856     void loadInt32x2(const Address& src, FloatRegister dest) {
857         vmovq(Operand(src), dest);
858     }
loadInt32x2(const BaseIndex & src,FloatRegister dest)859     void loadInt32x2(const BaseIndex& src, FloatRegister dest) {
860         vmovq(Operand(src), dest);
861     }
loadInt32x3(const BaseIndex & src,FloatRegister dest)862     void loadInt32x3(const BaseIndex& src, FloatRegister dest) {
863         BaseIndex srcZ(src);
864         srcZ.offset += 2 * sizeof(int32_t);
865 
866         ScratchSimd128Scope scratch(asMasm());
867         vmovq(Operand(src), dest);
868         vmovd(Operand(srcZ), scratch);
869         vmovlhps(scratch, dest, dest);
870     }
loadInt32x3(const Address & src,FloatRegister dest)871     void loadInt32x3(const Address& src, FloatRegister dest) {
872         Address srcZ(src);
873         srcZ.offset += 2 * sizeof(int32_t);
874 
875         ScratchSimd128Scope scratch(asMasm());
876         vmovq(Operand(src), dest);
877         vmovd(Operand(srcZ), scratch);
878         vmovlhps(scratch, dest, dest);
879     }
880 
loadAlignedSimd128Int(const Address & src,FloatRegister dest)881     void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
882         vmovdqa(Operand(src), dest);
883     }
loadAlignedSimd128Int(const Operand & src,FloatRegister dest)884     void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
885         vmovdqa(src, dest);
886     }
storeAlignedSimd128Int(FloatRegister src,const Address & dest)887     void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
888         vmovdqa(src, Operand(dest));
889     }
moveSimd128Int(FloatRegister src,FloatRegister dest)890     void moveSimd128Int(FloatRegister src, FloatRegister dest) {
891         vmovdqa(src, dest);
892     }
reusedInputInt32x4(FloatRegister src,FloatRegister dest)893     FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
894         if (HasAVX())
895             return src;
896         moveSimd128Int(src, dest);
897         return dest;
898     }
reusedInputAlignedInt32x4(const Operand & src,FloatRegister dest)899     FloatRegister reusedInputAlignedInt32x4(const Operand& src, FloatRegister dest) {
900         if (HasAVX() && src.kind() == Operand::FPREG)
901             return FloatRegister::FromCode(src.fpu());
902         loadAlignedSimd128Int(src, dest);
903         return dest;
904     }
loadUnalignedSimd128Int(const Address & src,FloatRegister dest)905     void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
906         vmovdqu(Operand(src), dest);
907     }
loadUnalignedSimd128Int(const BaseIndex & src,FloatRegister dest)908     void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
909         vmovdqu(Operand(src), dest);
910     }
loadUnalignedSimd128Int(const Operand & src,FloatRegister dest)911     void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
912         vmovdqu(src, dest);
913     }
914 
storeInt32x1(FloatRegister src,const Address & dest)915     void storeInt32x1(FloatRegister src, const Address& dest) {
916         vmovd(src, Operand(dest));
917     }
storeInt32x1(FloatRegister src,const BaseIndex & dest)918     void storeInt32x1(FloatRegister src, const BaseIndex& dest) {
919         vmovd(src, Operand(dest));
920     }
storeInt32x2(FloatRegister src,const Address & dest)921     void storeInt32x2(FloatRegister src, const Address& dest) {
922         vmovq(src, Operand(dest));
923     }
storeInt32x2(FloatRegister src,const BaseIndex & dest)924     void storeInt32x2(FloatRegister src, const BaseIndex& dest) {
925         vmovq(src, Operand(dest));
926     }
storeInt32x3(FloatRegister src,const Address & dest)927     void storeInt32x3(FloatRegister src, const Address& dest) {
928         Address destZ(dest);
929         destZ.offset += 2 * sizeof(int32_t);
930         vmovq(src, Operand(dest));
931         ScratchSimd128Scope scratch(asMasm());
932         vmovhlps(src, scratch, scratch);
933         vmovd(scratch, Operand(destZ));
934     }
storeInt32x3(FloatRegister src,const BaseIndex & dest)935     void storeInt32x3(FloatRegister src, const BaseIndex& dest) {
936         BaseIndex destZ(dest);
937         destZ.offset += 2 * sizeof(int32_t);
938         vmovq(src, Operand(dest));
939         ScratchSimd128Scope scratch(asMasm());
940         vmovhlps(src, scratch, scratch);
941         vmovd(scratch, Operand(destZ));
942     }
943 
storeUnalignedSimd128Int(FloatRegister src,const Address & dest)944     void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
945         vmovdqu(src, Operand(dest));
946     }
storeUnalignedSimd128Int(FloatRegister src,const BaseIndex & dest)947     void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
948         vmovdqu(src, Operand(dest));
949     }
storeUnalignedSimd128Int(FloatRegister src,const Operand & dest)950     void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
951         vmovdqu(src, dest);
952     }
packedEqualInt32x4(const Operand & src,FloatRegister dest)953     void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
954         vpcmpeqd(src, dest, dest);
955     }
packedGreaterThanInt32x4(const Operand & src,FloatRegister dest)956     void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
957         vpcmpgtd(src, dest, dest);
958     }
packedAddInt8(const Operand & src,FloatRegister dest)959     void packedAddInt8(const Operand& src, FloatRegister dest) {
960         vpaddb(src, dest, dest);
961     }
packedSubInt8(const Operand & src,FloatRegister dest)962     void packedSubInt8(const Operand& src, FloatRegister dest) {
963         vpsubb(src, dest, dest);
964     }
packedAddInt16(const Operand & src,FloatRegister dest)965     void packedAddInt16(const Operand& src, FloatRegister dest) {
966         vpaddw(src, dest, dest);
967     }
packedSubInt16(const Operand & src,FloatRegister dest)968     void packedSubInt16(const Operand& src, FloatRegister dest) {
969         vpsubw(src, dest, dest);
970     }
packedAddInt32(const Operand & src,FloatRegister dest)971     void packedAddInt32(const Operand& src, FloatRegister dest) {
972         vpaddd(src, dest, dest);
973     }
packedSubInt32(const Operand & src,FloatRegister dest)974     void packedSubInt32(const Operand& src, FloatRegister dest) {
975         vpsubd(src, dest, dest);
976     }
packedRcpApproximationFloat32x4(const Operand & src,FloatRegister dest)977     void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
978         // This function is an approximation of the result, this might need
979         // fix up if the spec requires a given precision for this operation.
980         // TODO See also bug 1068028.
981         vrcpps(src, dest);
982     }
packedRcpSqrtApproximationFloat32x4(const Operand & src,FloatRegister dest)983     void packedRcpSqrtApproximationFloat32x4(const Operand& src, FloatRegister dest) {
984         // TODO See comment above. See also bug 1068028.
985         vrsqrtps(src, dest);
986     }
packedSqrtFloat32x4(const Operand & src,FloatRegister dest)987     void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
988         vsqrtps(src, dest);
989     }
990 
packedLeftShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)991     void packedLeftShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
992         vpsllw(src, dest, dest);
993     }
packedLeftShiftByScalarInt16x8(Imm32 count,FloatRegister dest)994     void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
995         vpsllw(count, dest, dest);
996     }
packedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)997     void packedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
998         vpsraw(src, dest, dest);
999     }
packedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)1000     void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
1001         vpsraw(count, dest, dest);
1002     }
packedUnsignedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)1003     void packedUnsignedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
1004         vpsrlw(src, dest, dest);
1005     }
packedUnsignedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)1006     void packedUnsignedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
1007         vpsrlw(count, dest, dest);
1008     }
1009 
packedLeftShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)1010     void packedLeftShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1011         vpslld(src, dest, dest);
1012     }
packedLeftShiftByScalarInt32x4(Imm32 count,FloatRegister dest)1013     void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1014         vpslld(count, dest, dest);
1015     }
packedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)1016     void packedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1017         vpsrad(src, dest, dest);
1018     }
packedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)1019     void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1020         vpsrad(count, dest, dest);
1021     }
packedUnsignedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)1022     void packedUnsignedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1023         vpsrld(src, dest, dest);
1024     }
packedUnsignedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)1025     void packedUnsignedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1026         vpsrld(count, dest, dest);
1027     }
1028 
loadFloat32x3(const Address & src,FloatRegister dest)1029     void loadFloat32x3(const Address& src, FloatRegister dest) {
1030         Address srcZ(src);
1031         srcZ.offset += 2 * sizeof(float);
1032         vmovsd(src, dest);
1033         ScratchSimd128Scope scratch(asMasm());
1034         vmovss(srcZ, scratch);
1035         vmovlhps(scratch, dest, dest);
1036     }
loadFloat32x3(const BaseIndex & src,FloatRegister dest)1037     void loadFloat32x3(const BaseIndex& src, FloatRegister dest) {
1038         BaseIndex srcZ(src);
1039         srcZ.offset += 2 * sizeof(float);
1040         vmovsd(src, dest);
1041         ScratchSimd128Scope scratch(asMasm());
1042         vmovss(srcZ, scratch);
1043         vmovlhps(scratch, dest, dest);
1044     }
1045 
loadAlignedSimd128Float(const Address & src,FloatRegister dest)1046     void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
1047         vmovaps(Operand(src), dest);
1048     }
loadAlignedSimd128Float(const Operand & src,FloatRegister dest)1049     void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
1050         vmovaps(src, dest);
1051     }
1052 
storeAlignedSimd128Float(FloatRegister src,const Address & dest)1053     void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
1054         vmovaps(src, Operand(dest));
1055     }
moveSimd128Float(FloatRegister src,FloatRegister dest)1056     void moveSimd128Float(FloatRegister src, FloatRegister dest) {
1057         vmovaps(src, dest);
1058     }
reusedInputFloat32x4(FloatRegister src,FloatRegister dest)1059     FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) {
1060         if (HasAVX())
1061             return src;
1062         moveSimd128Float(src, dest);
1063         return dest;
1064     }
reusedInputAlignedFloat32x4(const Operand & src,FloatRegister dest)1065     FloatRegister reusedInputAlignedFloat32x4(const Operand& src, FloatRegister dest) {
1066         if (HasAVX() && src.kind() == Operand::FPREG)
1067             return FloatRegister::FromCode(src.fpu());
1068         loadAlignedSimd128Float(src, dest);
1069         return dest;
1070     }
loadUnalignedSimd128Float(const Address & src,FloatRegister dest)1071     void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
1072         vmovups(Operand(src), dest);
1073     }
loadUnalignedSimd128Float(const BaseIndex & src,FloatRegister dest)1074     void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
1075         vmovdqu(Operand(src), dest);
1076     }
loadUnalignedSimd128Float(const Operand & src,FloatRegister dest)1077     void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
1078         vmovups(src, dest);
1079     }
storeUnalignedSimd128Float(FloatRegister src,const Address & dest)1080     void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
1081         vmovups(src, Operand(dest));
1082     }
storeUnalignedSimd128Float(FloatRegister src,const BaseIndex & dest)1083     void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
1084         vmovups(src, Operand(dest));
1085     }
storeUnalignedSimd128Float(FloatRegister src,const Operand & dest)1086     void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
1087         vmovups(src, dest);
1088     }
packedAddFloat32(const Operand & src,FloatRegister dest)1089     void packedAddFloat32(const Operand& src, FloatRegister dest) {
1090         vaddps(src, dest, dest);
1091     }
packedSubFloat32(const Operand & src,FloatRegister dest)1092     void packedSubFloat32(const Operand& src, FloatRegister dest) {
1093         vsubps(src, dest, dest);
1094     }
packedMulFloat32(const Operand & src,FloatRegister dest)1095     void packedMulFloat32(const Operand& src, FloatRegister dest) {
1096         vmulps(src, dest, dest);
1097     }
packedDivFloat32(const Operand & src,FloatRegister dest)1098     void packedDivFloat32(const Operand& src, FloatRegister dest) {
1099         vdivps(src, dest, dest);
1100     }
1101 
1102     static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
1103                                        uint32_t z = 2, uint32_t w = 3)
1104     {
1105         MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
1106         uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
1107         MOZ_ASSERT(r < 256);
1108         return r;
1109     }
1110 
shuffleInt32(uint32_t mask,FloatRegister src,FloatRegister dest)1111     void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
1112         vpshufd(mask, src, dest);
1113     }
moveLowInt32(FloatRegister src,Register dest)1114     void moveLowInt32(FloatRegister src, Register dest) {
1115         vmovd(src, dest);
1116     }
1117 
moveHighPairToLowPairFloat32(FloatRegister src,FloatRegister dest)1118     void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
1119         vmovhlps(src, dest, dest);
1120     }
shuffleFloat32(uint32_t mask,FloatRegister src,FloatRegister dest)1121     void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
1122         // The shuffle instruction on x86 is such that it moves 2 words from
1123         // the dest and 2 words from the src operands. To simplify things, just
1124         // clobber the output with the input and apply the instruction
1125         // afterwards.
1126         // Note: this is useAtStart-safe because src isn't read afterwards.
1127         FloatRegister srcCopy = reusedInputFloat32x4(src, dest);
1128         vshufps(mask, srcCopy, srcCopy, dest);
1129     }
shuffleMix(uint32_t mask,const Operand & src,FloatRegister dest)1130     void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
1131         // Note this uses vshufps, which is a cross-domain penalty on CPU where it
1132         // applies, but that's the way clang and gcc do it.
1133         vshufps(mask, src, dest, dest);
1134     }
1135 
moveFloatAsDouble(Register src,FloatRegister dest)1136     void moveFloatAsDouble(Register src, FloatRegister dest) {
1137         vmovd(src, dest);
1138         vcvtss2sd(dest, dest, dest);
1139     }
loadFloatAsDouble(const Address & src,FloatRegister dest)1140     void loadFloatAsDouble(const Address& src, FloatRegister dest) {
1141         vmovss(src, dest);
1142         vcvtss2sd(dest, dest, dest);
1143     }
loadFloatAsDouble(const BaseIndex & src,FloatRegister dest)1144     void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
1145         vmovss(src, dest);
1146         vcvtss2sd(dest, dest, dest);
1147     }
loadFloatAsDouble(const Operand & src,FloatRegister dest)1148     void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
1149         loadFloat32(src, dest);
1150         vcvtss2sd(dest, dest, dest);
1151     }
loadFloat32(const Address & src,FloatRegister dest)1152     void loadFloat32(const Address& src, FloatRegister dest) {
1153         vmovss(src, dest);
1154     }
loadFloat32(const BaseIndex & src,FloatRegister dest)1155     void loadFloat32(const BaseIndex& src, FloatRegister dest) {
1156         vmovss(src, dest);
1157     }
loadFloat32(const Operand & src,FloatRegister dest)1158     void loadFloat32(const Operand& src, FloatRegister dest) {
1159         switch (src.kind()) {
1160           case Operand::MEM_REG_DISP:
1161             loadFloat32(src.toAddress(), dest);
1162             break;
1163           case Operand::MEM_SCALE:
1164             loadFloat32(src.toBaseIndex(), dest);
1165             break;
1166           default:
1167             MOZ_CRASH("unexpected operand kind");
1168         }
1169     }
moveFloat32(FloatRegister src,FloatRegister dest)1170     void moveFloat32(FloatRegister src, FloatRegister dest) {
1171         // Use vmovaps instead of vmovss to avoid dependencies.
1172         vmovaps(src, dest);
1173     }
1174 
1175     // Checks whether a double is representable as a 32-bit integer. If so, the
1176     // integer is written to the output register. Otherwise, a bailout is taken to
1177     // the given snapshot. This function overwrites the scratch float register.
1178     void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
1179                               bool negativeZeroCheck = true)
1180     {
1181         // Check for -0.0
1182         if (negativeZeroCheck)
1183             branchNegativeZero(src, dest, fail);
1184 
1185         ScratchDoubleScope scratch(asMasm());
1186         vcvttsd2si(src, dest);
1187         convertInt32ToDouble(dest, scratch);
1188         vucomisd(scratch, src);
1189         j(Assembler::Parity, fail);
1190         j(Assembler::NotEqual, fail);
1191     }
1192 
1193     // Checks whether a float32 is representable as a 32-bit integer. If so, the
1194     // integer is written to the output register. Otherwise, a bailout is taken to
1195     // the given snapshot. This function overwrites the scratch float register.
1196     void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
1197                                bool negativeZeroCheck = true)
1198     {
1199         // Check for -0.0
1200         if (negativeZeroCheck)
1201             branchNegativeZeroFloat32(src, dest, fail);
1202 
1203         ScratchFloat32Scope scratch(asMasm());
1204         vcvttss2si(src, dest);
1205         convertInt32ToFloat32(dest, scratch);
1206         vucomiss(scratch, src);
1207         j(Assembler::Parity, fail);
1208         j(Assembler::NotEqual, fail);
1209     }
1210 
1211     inline void clampIntToUint8(Register reg);
1212 
maybeInlineDouble(wasm::RawF64 d,FloatRegister dest)1213     bool maybeInlineDouble(wasm::RawF64 d, FloatRegister dest) {
1214         // Loading zero with xor is specially optimized in hardware.
1215         if (d.bits() == 0) {
1216             zeroDouble(dest);
1217             return true;
1218         }
1219 
1220         // It is also possible to load several common constants using vpcmpeqw
1221         // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
1222         // as described in "13.4 Generating constants" of
1223         // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
1224         // previously implemented here. However, with x86 and x64 both using
1225         // constant pool loads for double constants, this is probably only
1226         // worthwhile in cases where a load is likely to be delayed.
1227 
1228         return false;
1229     }
1230 
maybeInlineFloat(wasm::RawF32 f,FloatRegister dest)1231     bool maybeInlineFloat(wasm::RawF32 f, FloatRegister dest) {
1232         // See comment above
1233         if (f.bits() == 0) {
1234             zeroFloat32(dest);
1235             return true;
1236         }
1237         return false;
1238     }
1239 
maybeInlineSimd128Int(const SimdConstant & v,const FloatRegister & dest)1240     bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
1241         static const SimdConstant zero = SimdConstant::SplatX4(0);
1242         static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
1243         if (v == zero) {
1244             zeroSimd128Int(dest);
1245             return true;
1246         }
1247         if (v == minusOne) {
1248             vpcmpeqw(Operand(dest), dest, dest);
1249             return true;
1250         }
1251         return false;
1252     }
maybeInlineSimd128Float(const SimdConstant & v,const FloatRegister & dest)1253     bool maybeInlineSimd128Float(const SimdConstant& v, const FloatRegister& dest) {
1254         static const SimdConstant zero = SimdConstant::SplatX4(0.f);
1255         if (v == zero) {
1256             // This won't get inlined if the SimdConstant v contains -0 in any
1257             // lane, as operator== here does a memcmp.
1258             zeroSimd128Float(dest);
1259             return true;
1260         }
1261         return false;
1262     }
1263 
convertBoolToInt32(Register source,Register dest)1264     void convertBoolToInt32(Register source, Register dest) {
1265         // Note that C++ bool is only 1 byte, so zero extend it to clear the
1266         // higher-order bits.
1267         movzbl(source, dest);
1268     }
1269 
1270     void emitSet(Assembler::Condition cond, Register dest,
1271                  Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
1272         if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
1273             // If the register we're defining is a single byte register,
1274             // take advantage of the setCC instruction
1275             setCC(cond, dest);
1276             movzbl(dest, dest);
1277 
1278             if (ifNaN != Assembler::NaN_HandledByCond) {
1279                 Label noNaN;
1280                 j(Assembler::NoParity, &noNaN);
1281                 mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
1282                 bind(&noNaN);
1283             }
1284         } else {
1285             Label end;
1286             Label ifFalse;
1287 
1288             if (ifNaN == Assembler::NaN_IsFalse)
1289                 j(Assembler::Parity, &ifFalse);
1290             // Note a subtlety here: FLAGS is live at this point, and the
1291             // mov interface doesn't guarantee to preserve FLAGS. Use
1292             // movl instead of mov, because the movl instruction
1293             // preserves FLAGS.
1294             movl(Imm32(1), dest);
1295             j(cond, &end);
1296             if (ifNaN == Assembler::NaN_IsTrue)
1297                 j(Assembler::Parity, &end);
1298             bind(&ifFalse);
1299             mov(ImmWord(0), dest);
1300 
1301             bind(&end);
1302         }
1303     }
1304 
1305     // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
toggledJump(Label * label)1306     CodeOffset toggledJump(Label* label) {
1307         CodeOffset offset(size());
1308         jump(label);
1309         return offset;
1310     }
1311 
1312     template <typename T>
computeEffectiveAddress(const T & address,Register dest)1313     void computeEffectiveAddress(const T& address, Register dest) {
1314         lea(Operand(address), dest);
1315     }
1316 
checkStackAlignment()1317     void checkStackAlignment() {
1318         // Exists for ARM compatibility.
1319     }
1320 
labelForPatch()1321     CodeOffset labelForPatch() {
1322         return CodeOffset(size());
1323     }
1324 
abiret()1325     void abiret() {
1326         ret();
1327     }
1328 
1329     template<typename T>
1330     void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register oldval, Register newval,
1331                                         Register temp, AnyRegister output);
1332 
1333     template<typename T>
1334     void atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register value,
1335                                        Register temp, AnyRegister output);
1336 
1337   protected:
1338     bool buildOOLFakeExitFrame(void* fakeReturnAddr);
1339 };
1340 
1341 // Specialize for float to use movaps. Use movdqa for everything else.
1342 template <>
1343 inline void
1344 MacroAssemblerX86Shared::loadAlignedVector<float>(const Address& src, FloatRegister dest)
1345 {
1346     loadAlignedSimd128Float(src, dest);
1347 }
1348 
1349 template <typename T>
1350 inline void
loadAlignedVector(const Address & src,FloatRegister dest)1351 MacroAssemblerX86Shared::loadAlignedVector(const Address& src, FloatRegister dest)
1352 {
1353     loadAlignedSimd128Int(src, dest);
1354 }
1355 
1356 // Specialize for float to use movaps. Use movdqa for everything else.
1357 template <>
1358 inline void
1359 MacroAssemblerX86Shared::storeAlignedVector<float>(FloatRegister src, const Address& dest)
1360 {
1361     storeAlignedSimd128Float(src, dest);
1362 }
1363 
1364 template <typename T>
1365 inline void
storeAlignedVector(FloatRegister src,const Address & dest)1366 MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src, const Address& dest)
1367 {
1368     storeAlignedSimd128Int(src, dest);
1369 }
1370 
1371 template <> inline void
1372 MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src, Register dest) {
1373     load8ZeroExtend(src, dest);
1374 }
1375 template <> inline void
1376 MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src, Register dest) {
1377     load16ZeroExtend(src, dest);
1378 }
1379 template <> inline void
1380 MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src, Register dest) {
1381     load32(src, dest);
1382 }
1383 template <> inline void
1384 MacroAssemblerX86Shared::loadScalar<float>(const Operand& src, FloatRegister dest) {
1385     loadFloat32(src, dest);
1386 }
1387 
1388 template <> inline void
1389 MacroAssemblerX86Shared::storeScalar<int8_t>(Register src, const Address& dest) {
1390     store8(src, dest);
1391 }
1392 template <> inline void
1393 MacroAssemblerX86Shared::storeScalar<int16_t>(Register src, const Address& dest) {
1394     store16(src, dest);
1395 }
1396 template <> inline void
1397 MacroAssemblerX86Shared::storeScalar<int32_t>(Register src, const Address& dest) {
1398     store32(src, dest);
1399 }
1400 template <> inline void
1401 MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src, const Address& dest) {
1402     vmovss(src, dest);
1403 }
1404 
1405 } // namespace jit
1406 } // namespace js
1407 
1408 #undef CHECK_BYTEREG
1409 #undef CHECK_BYTEREGS
1410 
1411 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */
1412