1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
8 #define jit_x86_shared_MacroAssembler_x86_shared_h
9
10 #include "mozilla/Casting.h"
11
12 #if defined(JS_CODEGEN_X86)
13 # include "jit/x86/Assembler-x86.h"
14 #elif defined(JS_CODEGEN_X64)
15 # include "jit/x64/Assembler-x64.h"
16 #endif
17
18 #ifdef DEBUG
19 #define CHECK_BYTEREG(reg) \
20 JS_BEGIN_MACRO \
21 AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
22 MOZ_ASSERT(byteRegs.has(reg)); \
23 JS_END_MACRO
24 #define CHECK_BYTEREGS(r1, r2) \
25 JS_BEGIN_MACRO \
26 AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
27 MOZ_ASSERT(byteRegs.has(r1)); \
28 MOZ_ASSERT(byteRegs.has(r2)); \
29 JS_END_MACRO
30 #else
31 #define CHECK_BYTEREG(reg) (void)0
32 #define CHECK_BYTEREGS(r1, r2) (void)0
33 #endif
34
35 namespace js {
36 namespace jit {
37
38 class MacroAssembler;
39
40 class MacroAssemblerX86Shared : public Assembler
41 {
42 private:
43 // Perform a downcast. Should be removed by Bug 996602.
44 MacroAssembler& asMasm();
45 const MacroAssembler& asMasm() const;
46
47 public:
48 typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
49
50 protected:
51
52 // For Double, Float and SimdData, make the move ctors explicit so that MSVC
53 // knows what to use instead of copying these data structures.
54 template<class T>
55 struct Constant {
56 typedef T Pod;
57
58 T value;
59 UsesVector uses;
60
ConstantConstant61 explicit Constant(const T& value) : value(value) {}
ConstantConstant62 Constant(Constant<T>&& other) : value(other.value), uses(mozilla::Move(other.uses)) {}
63 explicit Constant(const Constant<T>&) = delete;
64 };
65
66 // Containers use SystemAllocPolicy since wasm releases memory after each
67 // function is compiled, and these need to live until after all functions
68 // are compiled.
69 using Double = Constant<uint64_t>;
70 Vector<Double, 0, SystemAllocPolicy> doubles_;
71 typedef HashMap<uint64_t, size_t, DefaultHasher<uint64_t>, SystemAllocPolicy> DoubleMap;
72 DoubleMap doubleMap_;
73
74 using Float = Constant<uint32_t>;
75 Vector<Float, 0, SystemAllocPolicy> floats_;
76 typedef HashMap<uint32_t, size_t, DefaultHasher<uint32_t>, SystemAllocPolicy> FloatMap;
77 FloatMap floatMap_;
78
79 struct SimdData : public Constant<SimdConstant> {
SimdDataSimdData80 explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
SimdDataSimdData81 SimdData(SimdData&& d) : Constant<SimdConstant>(mozilla::Move(d)) {}
82 explicit SimdData(const SimdData&) = delete;
typeSimdData83 SimdConstant::Type type() const { return value.type(); }
84 };
85
86 Vector<SimdData, 0, SystemAllocPolicy> simds_;
87 typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy> SimdMap;
88 SimdMap simdMap_;
89
90 template<class T, class Map>
91 T* getConstant(const typename T::Pod& value, Map& map, Vector<T, 0, SystemAllocPolicy>& vec);
92
93 Float* getFloat(wasm::RawF32 f);
94 Double* getDouble(wasm::RawF64 d);
95 SimdData* getSimdData(const SimdConstant& v);
96
97 public:
98 using Assembler::call;
99
MacroAssemblerX86Shared()100 MacroAssemblerX86Shared()
101 { }
102
103 bool asmMergeWith(const MacroAssemblerX86Shared& other);
104
105 // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
106 // Checks for NaN if canBeNaN is true.
107 void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
108 void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
109
compareDouble(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)110 void compareDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
111 if (cond & DoubleConditionBitInvert)
112 vucomisd(lhs, rhs);
113 else
114 vucomisd(rhs, lhs);
115 }
116
compareFloat(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)117 void compareFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
118 if (cond & DoubleConditionBitInvert)
119 vucomiss(lhs, rhs);
120 else
121 vucomiss(rhs, lhs);
122 }
123
124 void branchNegativeZero(FloatRegister reg, Register scratch, Label* label, bool maybeNonZero = true);
125 void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label* label);
126
move32(Imm32 imm,Register dest)127 void move32(Imm32 imm, Register dest) {
128 // Use the ImmWord version of mov to register, which has special
129 // optimizations. Casting to uint32_t here ensures that the value
130 // is zero-extended.
131 mov(ImmWord(uint32_t(imm.value)), dest);
132 }
move32(Imm32 imm,const Operand & dest)133 void move32(Imm32 imm, const Operand& dest) {
134 movl(imm, dest);
135 }
move32(Register src,Register dest)136 void move32(Register src, Register dest) {
137 movl(src, dest);
138 }
move32(Register src,const Operand & dest)139 void move32(Register src, const Operand& dest) {
140 movl(src, dest);
141 }
test32(Register lhs,Register rhs)142 void test32(Register lhs, Register rhs) {
143 testl(rhs, lhs);
144 }
test32(const Address & addr,Imm32 imm)145 void test32(const Address& addr, Imm32 imm) {
146 testl(imm, Operand(addr));
147 }
test32(const Operand lhs,Imm32 imm)148 void test32(const Operand lhs, Imm32 imm) {
149 testl(imm, lhs);
150 }
test32(Register lhs,Imm32 rhs)151 void test32(Register lhs, Imm32 rhs) {
152 testl(rhs, lhs);
153 }
cmp32(Register lhs,Imm32 rhs)154 void cmp32(Register lhs, Imm32 rhs) {
155 cmpl(rhs, lhs);
156 }
cmp32(Register lhs,Register rhs)157 void cmp32(Register lhs, Register rhs) {
158 cmpl(rhs, lhs);
159 }
cmp32(const Address & lhs,Register rhs)160 void cmp32(const Address& lhs, Register rhs) {
161 cmp32(Operand(lhs), rhs);
162 }
cmp32(const Address & lhs,Imm32 rhs)163 void cmp32(const Address& lhs, Imm32 rhs) {
164 cmp32(Operand(lhs), rhs);
165 }
cmp32(const Operand & lhs,Imm32 rhs)166 void cmp32(const Operand& lhs, Imm32 rhs) {
167 cmpl(rhs, lhs);
168 }
cmp32(const Operand & lhs,Register rhs)169 void cmp32(const Operand& lhs, Register rhs) {
170 cmpl(rhs, lhs);
171 }
cmp32(Register lhs,const Operand & rhs)172 void cmp32(Register lhs, const Operand& rhs) {
173 cmpl(rhs, lhs);
174 }
cmp32WithPatch(Register lhs,Imm32 rhs)175 CodeOffset cmp32WithPatch(Register lhs, Imm32 rhs) {
176 return cmplWithPatch(rhs, lhs);
177 }
atomic_inc32(const Operand & addr)178 void atomic_inc32(const Operand& addr) {
179 lock_incl(addr);
180 }
atomic_dec32(const Operand & addr)181 void atomic_dec32(const Operand& addr) {
182 lock_decl(addr);
183 }
184
185 template <typename T>
atomicFetchAdd8SignExtend(Register src,const T & mem,Register temp,Register output)186 void atomicFetchAdd8SignExtend(Register src, const T& mem, Register temp, Register output) {
187 CHECK_BYTEREGS(src, output);
188 if (src != output)
189 movl(src, output);
190 lock_xaddb(output, Operand(mem));
191 movsbl(output, output);
192 }
193
194 template <typename T>
atomicFetchAdd8ZeroExtend(Register src,const T & mem,Register temp,Register output)195 void atomicFetchAdd8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
196 CHECK_BYTEREGS(src, output);
197 MOZ_ASSERT(temp == InvalidReg);
198 if (src != output)
199 movl(src, output);
200 lock_xaddb(output, Operand(mem));
201 movzbl(output, output);
202 }
203
204 template <typename T>
atomicFetchAdd8SignExtend(Imm32 src,const T & mem,Register temp,Register output)205 void atomicFetchAdd8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
206 CHECK_BYTEREG(output);
207 MOZ_ASSERT(temp == InvalidReg);
208 movb(src, output);
209 lock_xaddb(output, Operand(mem));
210 movsbl(output, output);
211 }
212
213 template <typename T>
atomicFetchAdd8ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)214 void atomicFetchAdd8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
215 CHECK_BYTEREG(output);
216 MOZ_ASSERT(temp == InvalidReg);
217 movb(src, output);
218 lock_xaddb(output, Operand(mem));
219 movzbl(output, output);
220 }
221
222 template <typename T>
atomicFetchAdd16SignExtend(Register src,const T & mem,Register temp,Register output)223 void atomicFetchAdd16SignExtend(Register src, const T& mem, Register temp, Register output) {
224 MOZ_ASSERT(temp == InvalidReg);
225 if (src != output)
226 movl(src, output);
227 lock_xaddw(output, Operand(mem));
228 movswl(output, output);
229 }
230
231 template <typename T>
atomicFetchAdd16ZeroExtend(Register src,const T & mem,Register temp,Register output)232 void atomicFetchAdd16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
233 MOZ_ASSERT(temp == InvalidReg);
234 if (src != output)
235 movl(src, output);
236 lock_xaddw(output, Operand(mem));
237 movzwl(output, output);
238 }
239
240 template <typename T>
atomicFetchAdd16SignExtend(Imm32 src,const T & mem,Register temp,Register output)241 void atomicFetchAdd16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
242 MOZ_ASSERT(temp == InvalidReg);
243 movl(src, output);
244 lock_xaddw(output, Operand(mem));
245 movswl(output, output);
246 }
247
248 template <typename T>
atomicFetchAdd16ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)249 void atomicFetchAdd16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
250 MOZ_ASSERT(temp == InvalidReg);
251 movl(src, output);
252 lock_xaddw(output, Operand(mem));
253 movzwl(output, output);
254 }
255
256 template <typename T>
atomicFetchAdd32(Register src,const T & mem,Register temp,Register output)257 void atomicFetchAdd32(Register src, const T& mem, Register temp, Register output) {
258 MOZ_ASSERT(temp == InvalidReg);
259 if (src != output)
260 movl(src, output);
261 lock_xaddl(output, Operand(mem));
262 }
263
264 template <typename T>
atomicFetchAdd32(Imm32 src,const T & mem,Register temp,Register output)265 void atomicFetchAdd32(Imm32 src, const T& mem, Register temp, Register output) {
266 MOZ_ASSERT(temp == InvalidReg);
267 movl(src, output);
268 lock_xaddl(output, Operand(mem));
269 }
270
271 template <typename T>
atomicFetchSub8SignExtend(Register src,const T & mem,Register temp,Register output)272 void atomicFetchSub8SignExtend(Register src, const T& mem, Register temp, Register output) {
273 CHECK_BYTEREGS(src, output);
274 MOZ_ASSERT(temp == InvalidReg);
275 if (src != output)
276 movl(src, output);
277 negl(output);
278 lock_xaddb(output, Operand(mem));
279 movsbl(output, output);
280 }
281
282 template <typename T>
atomicFetchSub8ZeroExtend(Register src,const T & mem,Register temp,Register output)283 void atomicFetchSub8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
284 CHECK_BYTEREGS(src, output);
285 MOZ_ASSERT(temp == InvalidReg);
286 if (src != output)
287 movl(src, output);
288 negl(output);
289 lock_xaddb(output, Operand(mem));
290 movzbl(output, output);
291 }
292
293 template <typename T>
atomicFetchSub8SignExtend(Imm32 src,const T & mem,Register temp,Register output)294 void atomicFetchSub8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
295 CHECK_BYTEREG(output);
296 MOZ_ASSERT(temp == InvalidReg);
297 movb(Imm32(-src.value), output);
298 lock_xaddb(output, Operand(mem));
299 movsbl(output, output);
300 }
301
302 template <typename T>
atomicFetchSub8ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)303 void atomicFetchSub8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
304 CHECK_BYTEREG(output);
305 MOZ_ASSERT(temp == InvalidReg);
306 movb(Imm32(-src.value), output);
307 lock_xaddb(output, Operand(mem));
308 movzbl(output, output);
309 }
310
311 template <typename T>
atomicFetchSub16SignExtend(Register src,const T & mem,Register temp,Register output)312 void atomicFetchSub16SignExtend(Register src, const T& mem, Register temp, Register output) {
313 MOZ_ASSERT(temp == InvalidReg);
314 if (src != output)
315 movl(src, output);
316 negl(output);
317 lock_xaddw(output, Operand(mem));
318 movswl(output, output);
319 }
320
321 template <typename T>
atomicFetchSub16ZeroExtend(Register src,const T & mem,Register temp,Register output)322 void atomicFetchSub16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
323 MOZ_ASSERT(temp == InvalidReg);
324 if (src != output)
325 movl(src, output);
326 negl(output);
327 lock_xaddw(output, Operand(mem));
328 movzwl(output, output);
329 }
330
331 template <typename T>
atomicFetchSub16SignExtend(Imm32 src,const T & mem,Register temp,Register output)332 void atomicFetchSub16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
333 MOZ_ASSERT(temp == InvalidReg);
334 movl(Imm32(-src.value), output);
335 lock_xaddw(output, Operand(mem));
336 movswl(output, output);
337 }
338
339 template <typename T>
atomicFetchSub16ZeroExtend(Imm32 src,const T & mem,Register temp,Register output)340 void atomicFetchSub16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
341 MOZ_ASSERT(temp == InvalidReg);
342 movl(Imm32(-src.value), output);
343 lock_xaddw(output, Operand(mem));
344 movzwl(output, output);
345 }
346
347 template <typename T>
atomicFetchSub32(Register src,const T & mem,Register temp,Register output)348 void atomicFetchSub32(Register src, const T& mem, Register temp, Register output) {
349 MOZ_ASSERT(temp == InvalidReg);
350 if (src != output)
351 movl(src, output);
352 negl(output);
353 lock_xaddl(output, Operand(mem));
354 }
355
356 template <typename T>
atomicFetchSub32(Imm32 src,const T & mem,Register temp,Register output)357 void atomicFetchSub32(Imm32 src, const T& mem, Register temp, Register output) {
358 movl(Imm32(-src.value), output);
359 lock_xaddl(output, Operand(mem));
360 }
361
362 // requires output == eax
363 #define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \
364 MOZ_ASSERT(output == eax); \
365 LOAD(Operand(mem), eax); \
366 Label again; \
367 bind(&again); \
368 movl(eax, temp); \
369 OP(src, temp); \
370 LOCK_CMPXCHG(temp, Operand(mem)); \
371 j(NonZero, &again);
372
373 template <typename S, typename T>
atomicFetchAnd8SignExtend(const S & src,const T & mem,Register temp,Register output)374 void atomicFetchAnd8SignExtend(const S& src, const T& mem, Register temp, Register output) {
375 ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
376 CHECK_BYTEREG(temp);
377 movsbl(eax, eax);
378 }
379 template <typename S, typename T>
atomicFetchAnd8ZeroExtend(const S & src,const T & mem,Register temp,Register output)380 void atomicFetchAnd8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
381 ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
382 CHECK_BYTEREG(temp);
383 movzbl(eax, eax);
384 }
385 template <typename S, typename T>
atomicFetchAnd16SignExtend(const S & src,const T & mem,Register temp,Register output)386 void atomicFetchAnd16SignExtend(const S& src, const T& mem, Register temp, Register output) {
387 ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
388 movswl(eax, eax);
389 }
390 template <typename S, typename T>
atomicFetchAnd16ZeroExtend(const S & src,const T & mem,Register temp,Register output)391 void atomicFetchAnd16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
392 ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
393 movzwl(eax, eax);
394 }
395 template <typename S, typename T>
atomicFetchAnd32(const S & src,const T & mem,Register temp,Register output)396 void atomicFetchAnd32(const S& src, const T& mem, Register temp, Register output) {
397 ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl)
398 }
399
400 template <typename S, typename T>
atomicFetchOr8SignExtend(const S & src,const T & mem,Register temp,Register output)401 void atomicFetchOr8SignExtend(const S& src, const T& mem, Register temp, Register output) {
402 ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
403 CHECK_BYTEREG(temp);
404 movsbl(eax, eax);
405 }
406 template <typename S, typename T>
atomicFetchOr8ZeroExtend(const S & src,const T & mem,Register temp,Register output)407 void atomicFetchOr8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
408 ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
409 CHECK_BYTEREG(temp);
410 movzbl(eax, eax);
411 }
412 template <typename S, typename T>
atomicFetchOr16SignExtend(const S & src,const T & mem,Register temp,Register output)413 void atomicFetchOr16SignExtend(const S& src, const T& mem, Register temp, Register output) {
414 ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
415 movswl(eax, eax);
416 }
417 template <typename S, typename T>
atomicFetchOr16ZeroExtend(const S & src,const T & mem,Register temp,Register output)418 void atomicFetchOr16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
419 ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
420 movzwl(eax, eax);
421 }
422 template <typename S, typename T>
atomicFetchOr32(const S & src,const T & mem,Register temp,Register output)423 void atomicFetchOr32(const S& src, const T& mem, Register temp, Register output) {
424 ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl)
425 }
426
427 template <typename S, typename T>
atomicFetchXor8SignExtend(const S & src,const T & mem,Register temp,Register output)428 void atomicFetchXor8SignExtend(const S& src, const T& mem, Register temp, Register output) {
429 ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
430 CHECK_BYTEREG(temp);
431 movsbl(eax, eax);
432 }
433 template <typename S, typename T>
atomicFetchXor8ZeroExtend(const S & src,const T & mem,Register temp,Register output)434 void atomicFetchXor8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
435 ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
436 CHECK_BYTEREG(temp);
437 movzbl(eax, eax);
438 }
439 template <typename S, typename T>
atomicFetchXor16SignExtend(const S & src,const T & mem,Register temp,Register output)440 void atomicFetchXor16SignExtend(const S& src, const T& mem, Register temp, Register output) {
441 ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
442 movswl(eax, eax);
443 }
444 template <typename S, typename T>
atomicFetchXor16ZeroExtend(const S & src,const T & mem,Register temp,Register output)445 void atomicFetchXor16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
446 ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
447 movzwl(eax, eax);
448 }
449 template <typename S, typename T>
atomicFetchXor32(const S & src,const T & mem,Register temp,Register output)450 void atomicFetchXor32(const S& src, const T& mem, Register temp, Register output) {
451 ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchgl)
452 }
453
454 #undef ATOMIC_BITOP_BODY
455
456 // S is Register or Imm32; T is Address or BaseIndex.
457
458 template <typename S, typename T>
atomicAdd8(const S & src,const T & mem)459 void atomicAdd8(const S& src, const T& mem) {
460 lock_addb(src, Operand(mem));
461 }
462 template <typename S, typename T>
atomicAdd16(const S & src,const T & mem)463 void atomicAdd16(const S& src, const T& mem) {
464 lock_addw(src, Operand(mem));
465 }
466 template <typename S, typename T>
atomicAdd32(const S & src,const T & mem)467 void atomicAdd32(const S& src, const T& mem) {
468 lock_addl(src, Operand(mem));
469 }
470 template <typename S, typename T>
atomicSub8(const S & src,const T & mem)471 void atomicSub8(const S& src, const T& mem) {
472 lock_subb(src, Operand(mem));
473 }
474 template <typename S, typename T>
atomicSub16(const S & src,const T & mem)475 void atomicSub16(const S& src, const T& mem) {
476 lock_subw(src, Operand(mem));
477 }
478 template <typename S, typename T>
atomicSub32(const S & src,const T & mem)479 void atomicSub32(const S& src, const T& mem) {
480 lock_subl(src, Operand(mem));
481 }
482 template <typename S, typename T>
atomicAnd8(const S & src,const T & mem)483 void atomicAnd8(const S& src, const T& mem) {
484 lock_andb(src, Operand(mem));
485 }
486 template <typename S, typename T>
atomicAnd16(const S & src,const T & mem)487 void atomicAnd16(const S& src, const T& mem) {
488 lock_andw(src, Operand(mem));
489 }
490 template <typename S, typename T>
atomicAnd32(const S & src,const T & mem)491 void atomicAnd32(const S& src, const T& mem) {
492 lock_andl(src, Operand(mem));
493 }
494 template <typename S, typename T>
atomicOr8(const S & src,const T & mem)495 void atomicOr8(const S& src, const T& mem) {
496 lock_orb(src, Operand(mem));
497 }
498 template <typename S, typename T>
atomicOr16(const S & src,const T & mem)499 void atomicOr16(const S& src, const T& mem) {
500 lock_orw(src, Operand(mem));
501 }
502 template <typename S, typename T>
atomicOr32(const S & src,const T & mem)503 void atomicOr32(const S& src, const T& mem) {
504 lock_orl(src, Operand(mem));
505 }
506 template <typename S, typename T>
atomicXor8(const S & src,const T & mem)507 void atomicXor8(const S& src, const T& mem) {
508 lock_xorb(src, Operand(mem));
509 }
510 template <typename S, typename T>
atomicXor16(const S & src,const T & mem)511 void atomicXor16(const S& src, const T& mem) {
512 lock_xorw(src, Operand(mem));
513 }
514 template <typename S, typename T>
atomicXor32(const S & src,const T & mem)515 void atomicXor32(const S& src, const T& mem) {
516 lock_xorl(src, Operand(mem));
517 }
518
storeLoadFence()519 void storeLoadFence() {
520 // This implementation follows Linux.
521 if (HasSSE2())
522 masm.mfence();
523 else
524 lock_addl(Imm32(0), Operand(Address(esp, 0)));
525 }
526
branch16(Condition cond,Register lhs,Register rhs,Label * label)527 void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
528 cmpw(rhs, lhs);
529 j(cond, label);
530 }
branchTest16(Condition cond,Register lhs,Register rhs,Label * label)531 void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
532 testw(rhs, lhs);
533 j(cond, label);
534 }
535
jump(Label * label)536 void jump(Label* label) {
537 jmp(label);
538 }
jump(JitCode * code)539 void jump(JitCode* code) {
540 jmp(code);
541 }
jump(RepatchLabel * label)542 void jump(RepatchLabel* label) {
543 jmp(label);
544 }
jump(Register reg)545 void jump(Register reg) {
546 jmp(Operand(reg));
547 }
jump(const Address & addr)548 void jump(const Address& addr) {
549 jmp(Operand(addr));
550 }
jump(wasm::TrapDesc target)551 void jump(wasm::TrapDesc target) {
552 jmp(target);
553 }
554
convertInt32ToDouble(Register src,FloatRegister dest)555 void convertInt32ToDouble(Register src, FloatRegister dest) {
556 // vcvtsi2sd and friends write only part of their output register, which
557 // causes slowdowns on out-of-order processors. Explicitly break
558 // dependencies with vxorpd (and vxorps elsewhere), which are handled
559 // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
560 // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
561 // document.
562 zeroDouble(dest);
563 vcvtsi2sd(src, dest, dest);
564 }
convertInt32ToDouble(const Address & src,FloatRegister dest)565 void convertInt32ToDouble(const Address& src, FloatRegister dest) {
566 convertInt32ToDouble(Operand(src), dest);
567 }
convertInt32ToDouble(const BaseIndex & src,FloatRegister dest)568 void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
569 convertInt32ToDouble(Operand(src), dest);
570 }
convertInt32ToDouble(const Operand & src,FloatRegister dest)571 void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
572 // Clear the output register first to break dependencies; see above;
573 zeroDouble(dest);
574 vcvtsi2sd(Operand(src), dest, dest);
575 }
convertInt32ToFloat32(Register src,FloatRegister dest)576 void convertInt32ToFloat32(Register src, FloatRegister dest) {
577 // Clear the output register first to break dependencies; see above;
578 zeroFloat32(dest);
579 vcvtsi2ss(src, dest, dest);
580 }
convertInt32ToFloat32(const Address & src,FloatRegister dest)581 void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
582 convertInt32ToFloat32(Operand(src), dest);
583 }
convertInt32ToFloat32(const Operand & src,FloatRegister dest)584 void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
585 // Clear the output register first to break dependencies; see above;
586 zeroFloat32(dest);
587 vcvtsi2ss(src, dest, dest);
588 }
testDoubleTruthy(bool truthy,FloatRegister reg)589 Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
590 ScratchDoubleScope scratch(asMasm());
591 zeroDouble(scratch);
592 vucomisd(reg, scratch);
593 return truthy ? NonZero : Zero;
594 }
595
596 // Class which ensures that registers used in byte ops are compatible with
597 // such instructions, even if the original register passed in wasn't. This
598 // only applies to x86, as on x64 all registers are valid single byte regs.
599 // This doesn't lead to great code but helps to simplify code generation.
600 //
601 // Note that this can currently only be used in cases where the register is
602 // read from by the guarded instruction, not written to.
603 class AutoEnsureByteRegister {
604 MacroAssemblerX86Shared* masm;
605 Register original_;
606 Register substitute_;
607
608 public:
609 template <typename T>
AutoEnsureByteRegister(MacroAssemblerX86Shared * masm,T address,Register reg)610 AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address, Register reg)
611 : masm(masm), original_(reg)
612 {
613 AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
614 if (singleByteRegs.has(reg)) {
615 substitute_ = reg;
616 } else {
617 MOZ_ASSERT(address.base != StackPointer);
618 do {
619 substitute_ = singleByteRegs.takeAny();
620 } while (Operand(address).containsReg(substitute_));
621
622 masm->push(substitute_);
623 masm->mov(reg, substitute_);
624 }
625 }
626
~AutoEnsureByteRegister()627 ~AutoEnsureByteRegister() {
628 if (original_ != substitute_)
629 masm->pop(substitute_);
630 }
631
reg()632 Register reg() {
633 return substitute_;
634 }
635 };
636
load8ZeroExtend(const Operand & src,Register dest)637 void load8ZeroExtend(const Operand& src, Register dest) {
638 movzbl(src, dest);
639 }
load8ZeroExtend(const Address & src,Register dest)640 void load8ZeroExtend(const Address& src, Register dest) {
641 movzbl(Operand(src), dest);
642 }
load8ZeroExtend(const BaseIndex & src,Register dest)643 void load8ZeroExtend(const BaseIndex& src, Register dest) {
644 movzbl(Operand(src), dest);
645 }
load8SignExtend(const Operand & src,Register dest)646 void load8SignExtend(const Operand& src, Register dest) {
647 movsbl(src, dest);
648 }
load8SignExtend(const Address & src,Register dest)649 void load8SignExtend(const Address& src, Register dest) {
650 movsbl(Operand(src), dest);
651 }
load8SignExtend(const BaseIndex & src,Register dest)652 void load8SignExtend(const BaseIndex& src, Register dest) {
653 movsbl(Operand(src), dest);
654 }
655 template <typename T>
store8(Imm32 src,const T & dest)656 void store8(Imm32 src, const T& dest) {
657 movb(src, Operand(dest));
658 }
659 template <typename T>
store8(Register src,const T & dest)660 void store8(Register src, const T& dest) {
661 AutoEnsureByteRegister ensure(this, dest, src);
662 movb(ensure.reg(), Operand(dest));
663 }
664 template <typename T>
compareExchange8ZeroExtend(const T & mem,Register oldval,Register newval,Register output)665 void compareExchange8ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
666 MOZ_ASSERT(output == eax);
667 CHECK_BYTEREG(newval);
668 if (oldval != output)
669 movl(oldval, output);
670 lock_cmpxchgb(newval, Operand(mem));
671 movzbl(output, output);
672 }
673 template <typename T>
compareExchange8SignExtend(const T & mem,Register oldval,Register newval,Register output)674 void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output) {
675 MOZ_ASSERT(output == eax);
676 CHECK_BYTEREG(newval);
677 if (oldval != output)
678 movl(oldval, output);
679 lock_cmpxchgb(newval, Operand(mem));
680 movsbl(output, output);
681 }
682 template <typename T>
atomicExchange8ZeroExtend(const T & mem,Register value,Register output)683 void atomicExchange8ZeroExtend(const T& mem, Register value, Register output) {
684 if (value != output)
685 movl(value, output);
686 xchgb(output, Operand(mem));
687 movzbl(output, output);
688 }
689 template <typename T>
atomicExchange8SignExtend(const T & mem,Register value,Register output)690 void atomicExchange8SignExtend(const T& mem, Register value, Register output) {
691 if (value != output)
692 movl(value, output);
693 xchgb(output, Operand(mem));
694 movsbl(output, output);
695 }
load16ZeroExtend(const Operand & src,Register dest)696 void load16ZeroExtend(const Operand& src, Register dest) {
697 movzwl(src, dest);
698 }
load16ZeroExtend(const Address & src,Register dest)699 void load16ZeroExtend(const Address& src, Register dest) {
700 movzwl(Operand(src), dest);
701 }
load16ZeroExtend(const BaseIndex & src,Register dest)702 void load16ZeroExtend(const BaseIndex& src, Register dest) {
703 movzwl(Operand(src), dest);
704 }
705 template <typename S, typename T>
store16(const S & src,const T & dest)706 void store16(const S& src, const T& dest) {
707 movw(src, Operand(dest));
708 }
709 template <typename T>
compareExchange16ZeroExtend(const T & mem,Register oldval,Register newval,Register output)710 void compareExchange16ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
711 MOZ_ASSERT(output == eax);
712 if (oldval != output)
713 movl(oldval, output);
714 lock_cmpxchgw(newval, Operand(mem));
715 movzwl(output, output);
716 }
717 template <typename T>
compareExchange16SignExtend(const T & mem,Register oldval,Register newval,Register output)718 void compareExchange16SignExtend(const T& mem, Register oldval, Register newval, Register output) {
719 MOZ_ASSERT(output == eax);
720 if (oldval != output)
721 movl(oldval, output);
722 lock_cmpxchgw(newval, Operand(mem));
723 movswl(output, output);
724 }
725 template <typename T>
atomicExchange16ZeroExtend(const T & mem,Register value,Register output)726 void atomicExchange16ZeroExtend(const T& mem, Register value, Register output) {
727 if (value != output)
728 movl(value, output);
729 xchgw(output, Operand(mem));
730 movzwl(output, output);
731 }
732 template <typename T>
atomicExchange16SignExtend(const T & mem,Register value,Register output)733 void atomicExchange16SignExtend(const T& mem, Register value, Register output) {
734 if (value != output)
735 movl(value, output);
736 xchgw(output, Operand(mem));
737 movswl(output, output);
738 }
load16SignExtend(const Operand & src,Register dest)739 void load16SignExtend(const Operand& src, Register dest) {
740 movswl(src, dest);
741 }
load16SignExtend(const Address & src,Register dest)742 void load16SignExtend(const Address& src, Register dest) {
743 movswl(Operand(src), dest);
744 }
load16SignExtend(const BaseIndex & src,Register dest)745 void load16SignExtend(const BaseIndex& src, Register dest) {
746 movswl(Operand(src), dest);
747 }
load32(const Address & address,Register dest)748 void load32(const Address& address, Register dest) {
749 movl(Operand(address), dest);
750 }
load32(const BaseIndex & src,Register dest)751 void load32(const BaseIndex& src, Register dest) {
752 movl(Operand(src), dest);
753 }
load32(const Operand & src,Register dest)754 void load32(const Operand& src, Register dest) {
755 movl(src, dest);
756 }
757 template <typename S, typename T>
store32(const S & src,const T & dest)758 void store32(const S& src, const T& dest) {
759 movl(src, Operand(dest));
760 }
761 template <typename T>
compareExchange32(const T & mem,Register oldval,Register newval,Register output)762 void compareExchange32(const T& mem, Register oldval, Register newval, Register output) {
763 MOZ_ASSERT(output == eax);
764 if (oldval != output)
765 movl(oldval, output);
766 lock_cmpxchgl(newval, Operand(mem));
767 }
768 template <typename T>
atomicExchange32(const T & mem,Register value,Register output)769 void atomicExchange32(const T& mem, Register value, Register output) {
770 if (value != output)
771 movl(value, output);
772 xchgl(output, Operand(mem));
773 }
774 template <typename S, typename T>
store32_NoSecondScratch(const S & src,const T & dest)775 void store32_NoSecondScratch(const S& src, const T& dest) {
776 store32(src, dest);
777 }
loadDouble(const Address & src,FloatRegister dest)778 void loadDouble(const Address& src, FloatRegister dest) {
779 vmovsd(src, dest);
780 }
loadDouble(const BaseIndex & src,FloatRegister dest)781 void loadDouble(const BaseIndex& src, FloatRegister dest) {
782 vmovsd(src, dest);
783 }
loadDouble(const Operand & src,FloatRegister dest)784 void loadDouble(const Operand& src, FloatRegister dest) {
785 switch (src.kind()) {
786 case Operand::MEM_REG_DISP:
787 loadDouble(src.toAddress(), dest);
788 break;
789 case Operand::MEM_SCALE:
790 loadDouble(src.toBaseIndex(), dest);
791 break;
792 default:
793 MOZ_CRASH("unexpected operand kind");
794 }
795 }
moveDouble(FloatRegister src,FloatRegister dest)796 void moveDouble(FloatRegister src, FloatRegister dest) {
797 // Use vmovapd instead of vmovsd to avoid dependencies.
798 vmovapd(src, dest);
799 }
zeroDouble(FloatRegister reg)800 void zeroDouble(FloatRegister reg) {
801 vxorpd(reg, reg, reg);
802 }
zeroFloat32(FloatRegister reg)803 void zeroFloat32(FloatRegister reg) {
804 vxorps(reg, reg, reg);
805 }
convertFloat32ToDouble(FloatRegister src,FloatRegister dest)806 void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
807 vcvtss2sd(src, dest, dest);
808 }
convertDoubleToFloat32(FloatRegister src,FloatRegister dest)809 void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
810 vcvtsd2ss(src, dest, dest);
811 }
812
convertFloat32x4ToInt32x4(FloatRegister src,FloatRegister dest)813 void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
814 // Note that if the conversion failed (because the converted
815 // result is larger than the maximum signed int32, or less than the
816 // least signed int32, or NaN), this will return the undefined integer
817 // value (0x8000000).
818 vcvttps2dq(src, dest);
819 }
convertInt32x4ToFloat32x4(FloatRegister src,FloatRegister dest)820 void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
821 vcvtdq2ps(src, dest);
822 }
823
bitwiseAndSimd128(const Operand & src,FloatRegister dest)824 void bitwiseAndSimd128(const Operand& src, FloatRegister dest) {
825 // TODO Using the "ps" variant for all types incurs a domain crossing
826 // penalty for integer types and double.
827 vandps(src, dest, dest);
828 }
bitwiseAndNotSimd128(const Operand & src,FloatRegister dest)829 void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) {
830 vandnps(src, dest, dest);
831 }
bitwiseOrSimd128(const Operand & src,FloatRegister dest)832 void bitwiseOrSimd128(const Operand& src, FloatRegister dest) {
833 vorps(src, dest, dest);
834 }
bitwiseXorSimd128(const Operand & src,FloatRegister dest)835 void bitwiseXorSimd128(const Operand& src, FloatRegister dest) {
836 vxorps(src, dest, dest);
837 }
zeroSimd128Float(FloatRegister dest)838 void zeroSimd128Float(FloatRegister dest) {
839 vxorps(dest, dest, dest);
840 }
zeroSimd128Int(FloatRegister dest)841 void zeroSimd128Int(FloatRegister dest) {
842 vpxor(dest, dest, dest);
843 }
844
845 template <class T, class Reg> inline void loadScalar(const Operand& src, Reg dest);
846 template <class T, class Reg> inline void storeScalar(Reg src, const Address& dest);
847 template <class T> inline void loadAlignedVector(const Address& src, FloatRegister dest);
848 template <class T> inline void storeAlignedVector(FloatRegister src, const Address& dest);
849
loadInt32x1(const Address & src,FloatRegister dest)850 void loadInt32x1(const Address& src, FloatRegister dest) {
851 vmovd(Operand(src), dest);
852 }
loadInt32x1(const BaseIndex & src,FloatRegister dest)853 void loadInt32x1(const BaseIndex& src, FloatRegister dest) {
854 vmovd(Operand(src), dest);
855 }
loadInt32x2(const Address & src,FloatRegister dest)856 void loadInt32x2(const Address& src, FloatRegister dest) {
857 vmovq(Operand(src), dest);
858 }
loadInt32x2(const BaseIndex & src,FloatRegister dest)859 void loadInt32x2(const BaseIndex& src, FloatRegister dest) {
860 vmovq(Operand(src), dest);
861 }
loadInt32x3(const BaseIndex & src,FloatRegister dest)862 void loadInt32x3(const BaseIndex& src, FloatRegister dest) {
863 BaseIndex srcZ(src);
864 srcZ.offset += 2 * sizeof(int32_t);
865
866 ScratchSimd128Scope scratch(asMasm());
867 vmovq(Operand(src), dest);
868 vmovd(Operand(srcZ), scratch);
869 vmovlhps(scratch, dest, dest);
870 }
loadInt32x3(const Address & src,FloatRegister dest)871 void loadInt32x3(const Address& src, FloatRegister dest) {
872 Address srcZ(src);
873 srcZ.offset += 2 * sizeof(int32_t);
874
875 ScratchSimd128Scope scratch(asMasm());
876 vmovq(Operand(src), dest);
877 vmovd(Operand(srcZ), scratch);
878 vmovlhps(scratch, dest, dest);
879 }
880
loadAlignedSimd128Int(const Address & src,FloatRegister dest)881 void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
882 vmovdqa(Operand(src), dest);
883 }
loadAlignedSimd128Int(const Operand & src,FloatRegister dest)884 void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
885 vmovdqa(src, dest);
886 }
storeAlignedSimd128Int(FloatRegister src,const Address & dest)887 void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
888 vmovdqa(src, Operand(dest));
889 }
moveSimd128Int(FloatRegister src,FloatRegister dest)890 void moveSimd128Int(FloatRegister src, FloatRegister dest) {
891 vmovdqa(src, dest);
892 }
reusedInputInt32x4(FloatRegister src,FloatRegister dest)893 FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
894 if (HasAVX())
895 return src;
896 moveSimd128Int(src, dest);
897 return dest;
898 }
reusedInputAlignedInt32x4(const Operand & src,FloatRegister dest)899 FloatRegister reusedInputAlignedInt32x4(const Operand& src, FloatRegister dest) {
900 if (HasAVX() && src.kind() == Operand::FPREG)
901 return FloatRegister::FromCode(src.fpu());
902 loadAlignedSimd128Int(src, dest);
903 return dest;
904 }
loadUnalignedSimd128Int(const Address & src,FloatRegister dest)905 void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
906 vmovdqu(Operand(src), dest);
907 }
loadUnalignedSimd128Int(const BaseIndex & src,FloatRegister dest)908 void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
909 vmovdqu(Operand(src), dest);
910 }
loadUnalignedSimd128Int(const Operand & src,FloatRegister dest)911 void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
912 vmovdqu(src, dest);
913 }
914
storeInt32x1(FloatRegister src,const Address & dest)915 void storeInt32x1(FloatRegister src, const Address& dest) {
916 vmovd(src, Operand(dest));
917 }
storeInt32x1(FloatRegister src,const BaseIndex & dest)918 void storeInt32x1(FloatRegister src, const BaseIndex& dest) {
919 vmovd(src, Operand(dest));
920 }
storeInt32x2(FloatRegister src,const Address & dest)921 void storeInt32x2(FloatRegister src, const Address& dest) {
922 vmovq(src, Operand(dest));
923 }
storeInt32x2(FloatRegister src,const BaseIndex & dest)924 void storeInt32x2(FloatRegister src, const BaseIndex& dest) {
925 vmovq(src, Operand(dest));
926 }
storeInt32x3(FloatRegister src,const Address & dest)927 void storeInt32x3(FloatRegister src, const Address& dest) {
928 Address destZ(dest);
929 destZ.offset += 2 * sizeof(int32_t);
930 vmovq(src, Operand(dest));
931 ScratchSimd128Scope scratch(asMasm());
932 vmovhlps(src, scratch, scratch);
933 vmovd(scratch, Operand(destZ));
934 }
storeInt32x3(FloatRegister src,const BaseIndex & dest)935 void storeInt32x3(FloatRegister src, const BaseIndex& dest) {
936 BaseIndex destZ(dest);
937 destZ.offset += 2 * sizeof(int32_t);
938 vmovq(src, Operand(dest));
939 ScratchSimd128Scope scratch(asMasm());
940 vmovhlps(src, scratch, scratch);
941 vmovd(scratch, Operand(destZ));
942 }
943
storeUnalignedSimd128Int(FloatRegister src,const Address & dest)944 void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
945 vmovdqu(src, Operand(dest));
946 }
storeUnalignedSimd128Int(FloatRegister src,const BaseIndex & dest)947 void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
948 vmovdqu(src, Operand(dest));
949 }
storeUnalignedSimd128Int(FloatRegister src,const Operand & dest)950 void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
951 vmovdqu(src, dest);
952 }
packedEqualInt32x4(const Operand & src,FloatRegister dest)953 void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
954 vpcmpeqd(src, dest, dest);
955 }
packedGreaterThanInt32x4(const Operand & src,FloatRegister dest)956 void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
957 vpcmpgtd(src, dest, dest);
958 }
packedAddInt8(const Operand & src,FloatRegister dest)959 void packedAddInt8(const Operand& src, FloatRegister dest) {
960 vpaddb(src, dest, dest);
961 }
packedSubInt8(const Operand & src,FloatRegister dest)962 void packedSubInt8(const Operand& src, FloatRegister dest) {
963 vpsubb(src, dest, dest);
964 }
packedAddInt16(const Operand & src,FloatRegister dest)965 void packedAddInt16(const Operand& src, FloatRegister dest) {
966 vpaddw(src, dest, dest);
967 }
packedSubInt16(const Operand & src,FloatRegister dest)968 void packedSubInt16(const Operand& src, FloatRegister dest) {
969 vpsubw(src, dest, dest);
970 }
packedAddInt32(const Operand & src,FloatRegister dest)971 void packedAddInt32(const Operand& src, FloatRegister dest) {
972 vpaddd(src, dest, dest);
973 }
packedSubInt32(const Operand & src,FloatRegister dest)974 void packedSubInt32(const Operand& src, FloatRegister dest) {
975 vpsubd(src, dest, dest);
976 }
packedRcpApproximationFloat32x4(const Operand & src,FloatRegister dest)977 void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
978 // This function is an approximation of the result, this might need
979 // fix up if the spec requires a given precision for this operation.
980 // TODO See also bug 1068028.
981 vrcpps(src, dest);
982 }
packedRcpSqrtApproximationFloat32x4(const Operand & src,FloatRegister dest)983 void packedRcpSqrtApproximationFloat32x4(const Operand& src, FloatRegister dest) {
984 // TODO See comment above. See also bug 1068028.
985 vrsqrtps(src, dest);
986 }
packedSqrtFloat32x4(const Operand & src,FloatRegister dest)987 void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
988 vsqrtps(src, dest);
989 }
990
packedLeftShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)991 void packedLeftShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
992 vpsllw(src, dest, dest);
993 }
packedLeftShiftByScalarInt16x8(Imm32 count,FloatRegister dest)994 void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
995 vpsllw(count, dest, dest);
996 }
packedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)997 void packedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
998 vpsraw(src, dest, dest);
999 }
packedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)1000 void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
1001 vpsraw(count, dest, dest);
1002 }
packedUnsignedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)1003 void packedUnsignedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
1004 vpsrlw(src, dest, dest);
1005 }
packedUnsignedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)1006 void packedUnsignedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
1007 vpsrlw(count, dest, dest);
1008 }
1009
packedLeftShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)1010 void packedLeftShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1011 vpslld(src, dest, dest);
1012 }
packedLeftShiftByScalarInt32x4(Imm32 count,FloatRegister dest)1013 void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1014 vpslld(count, dest, dest);
1015 }
packedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)1016 void packedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1017 vpsrad(src, dest, dest);
1018 }
packedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)1019 void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1020 vpsrad(count, dest, dest);
1021 }
packedUnsignedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)1022 void packedUnsignedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1023 vpsrld(src, dest, dest);
1024 }
packedUnsignedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)1025 void packedUnsignedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1026 vpsrld(count, dest, dest);
1027 }
1028
loadFloat32x3(const Address & src,FloatRegister dest)1029 void loadFloat32x3(const Address& src, FloatRegister dest) {
1030 Address srcZ(src);
1031 srcZ.offset += 2 * sizeof(float);
1032 vmovsd(src, dest);
1033 ScratchSimd128Scope scratch(asMasm());
1034 vmovss(srcZ, scratch);
1035 vmovlhps(scratch, dest, dest);
1036 }
loadFloat32x3(const BaseIndex & src,FloatRegister dest)1037 void loadFloat32x3(const BaseIndex& src, FloatRegister dest) {
1038 BaseIndex srcZ(src);
1039 srcZ.offset += 2 * sizeof(float);
1040 vmovsd(src, dest);
1041 ScratchSimd128Scope scratch(asMasm());
1042 vmovss(srcZ, scratch);
1043 vmovlhps(scratch, dest, dest);
1044 }
1045
loadAlignedSimd128Float(const Address & src,FloatRegister dest)1046 void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
1047 vmovaps(Operand(src), dest);
1048 }
loadAlignedSimd128Float(const Operand & src,FloatRegister dest)1049 void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
1050 vmovaps(src, dest);
1051 }
1052
storeAlignedSimd128Float(FloatRegister src,const Address & dest)1053 void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
1054 vmovaps(src, Operand(dest));
1055 }
moveSimd128Float(FloatRegister src,FloatRegister dest)1056 void moveSimd128Float(FloatRegister src, FloatRegister dest) {
1057 vmovaps(src, dest);
1058 }
reusedInputFloat32x4(FloatRegister src,FloatRegister dest)1059 FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) {
1060 if (HasAVX())
1061 return src;
1062 moveSimd128Float(src, dest);
1063 return dest;
1064 }
reusedInputAlignedFloat32x4(const Operand & src,FloatRegister dest)1065 FloatRegister reusedInputAlignedFloat32x4(const Operand& src, FloatRegister dest) {
1066 if (HasAVX() && src.kind() == Operand::FPREG)
1067 return FloatRegister::FromCode(src.fpu());
1068 loadAlignedSimd128Float(src, dest);
1069 return dest;
1070 }
loadUnalignedSimd128Float(const Address & src,FloatRegister dest)1071 void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
1072 vmovups(Operand(src), dest);
1073 }
loadUnalignedSimd128Float(const BaseIndex & src,FloatRegister dest)1074 void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
1075 vmovdqu(Operand(src), dest);
1076 }
loadUnalignedSimd128Float(const Operand & src,FloatRegister dest)1077 void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
1078 vmovups(src, dest);
1079 }
storeUnalignedSimd128Float(FloatRegister src,const Address & dest)1080 void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
1081 vmovups(src, Operand(dest));
1082 }
storeUnalignedSimd128Float(FloatRegister src,const BaseIndex & dest)1083 void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
1084 vmovups(src, Operand(dest));
1085 }
storeUnalignedSimd128Float(FloatRegister src,const Operand & dest)1086 void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
1087 vmovups(src, dest);
1088 }
packedAddFloat32(const Operand & src,FloatRegister dest)1089 void packedAddFloat32(const Operand& src, FloatRegister dest) {
1090 vaddps(src, dest, dest);
1091 }
packedSubFloat32(const Operand & src,FloatRegister dest)1092 void packedSubFloat32(const Operand& src, FloatRegister dest) {
1093 vsubps(src, dest, dest);
1094 }
packedMulFloat32(const Operand & src,FloatRegister dest)1095 void packedMulFloat32(const Operand& src, FloatRegister dest) {
1096 vmulps(src, dest, dest);
1097 }
packedDivFloat32(const Operand & src,FloatRegister dest)1098 void packedDivFloat32(const Operand& src, FloatRegister dest) {
1099 vdivps(src, dest, dest);
1100 }
1101
1102 static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
1103 uint32_t z = 2, uint32_t w = 3)
1104 {
1105 MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
1106 uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
1107 MOZ_ASSERT(r < 256);
1108 return r;
1109 }
1110
shuffleInt32(uint32_t mask,FloatRegister src,FloatRegister dest)1111 void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
1112 vpshufd(mask, src, dest);
1113 }
moveLowInt32(FloatRegister src,Register dest)1114 void moveLowInt32(FloatRegister src, Register dest) {
1115 vmovd(src, dest);
1116 }
1117
moveHighPairToLowPairFloat32(FloatRegister src,FloatRegister dest)1118 void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
1119 vmovhlps(src, dest, dest);
1120 }
shuffleFloat32(uint32_t mask,FloatRegister src,FloatRegister dest)1121 void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
1122 // The shuffle instruction on x86 is such that it moves 2 words from
1123 // the dest and 2 words from the src operands. To simplify things, just
1124 // clobber the output with the input and apply the instruction
1125 // afterwards.
1126 // Note: this is useAtStart-safe because src isn't read afterwards.
1127 FloatRegister srcCopy = reusedInputFloat32x4(src, dest);
1128 vshufps(mask, srcCopy, srcCopy, dest);
1129 }
shuffleMix(uint32_t mask,const Operand & src,FloatRegister dest)1130 void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
1131 // Note this uses vshufps, which is a cross-domain penalty on CPU where it
1132 // applies, but that's the way clang and gcc do it.
1133 vshufps(mask, src, dest, dest);
1134 }
1135
moveFloatAsDouble(Register src,FloatRegister dest)1136 void moveFloatAsDouble(Register src, FloatRegister dest) {
1137 vmovd(src, dest);
1138 vcvtss2sd(dest, dest, dest);
1139 }
loadFloatAsDouble(const Address & src,FloatRegister dest)1140 void loadFloatAsDouble(const Address& src, FloatRegister dest) {
1141 vmovss(src, dest);
1142 vcvtss2sd(dest, dest, dest);
1143 }
loadFloatAsDouble(const BaseIndex & src,FloatRegister dest)1144 void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
1145 vmovss(src, dest);
1146 vcvtss2sd(dest, dest, dest);
1147 }
loadFloatAsDouble(const Operand & src,FloatRegister dest)1148 void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
1149 loadFloat32(src, dest);
1150 vcvtss2sd(dest, dest, dest);
1151 }
loadFloat32(const Address & src,FloatRegister dest)1152 void loadFloat32(const Address& src, FloatRegister dest) {
1153 vmovss(src, dest);
1154 }
loadFloat32(const BaseIndex & src,FloatRegister dest)1155 void loadFloat32(const BaseIndex& src, FloatRegister dest) {
1156 vmovss(src, dest);
1157 }
loadFloat32(const Operand & src,FloatRegister dest)1158 void loadFloat32(const Operand& src, FloatRegister dest) {
1159 switch (src.kind()) {
1160 case Operand::MEM_REG_DISP:
1161 loadFloat32(src.toAddress(), dest);
1162 break;
1163 case Operand::MEM_SCALE:
1164 loadFloat32(src.toBaseIndex(), dest);
1165 break;
1166 default:
1167 MOZ_CRASH("unexpected operand kind");
1168 }
1169 }
moveFloat32(FloatRegister src,FloatRegister dest)1170 void moveFloat32(FloatRegister src, FloatRegister dest) {
1171 // Use vmovaps instead of vmovss to avoid dependencies.
1172 vmovaps(src, dest);
1173 }
1174
1175 // Checks whether a double is representable as a 32-bit integer. If so, the
1176 // integer is written to the output register. Otherwise, a bailout is taken to
1177 // the given snapshot. This function overwrites the scratch float register.
1178 void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
1179 bool negativeZeroCheck = true)
1180 {
1181 // Check for -0.0
1182 if (negativeZeroCheck)
1183 branchNegativeZero(src, dest, fail);
1184
1185 ScratchDoubleScope scratch(asMasm());
1186 vcvttsd2si(src, dest);
1187 convertInt32ToDouble(dest, scratch);
1188 vucomisd(scratch, src);
1189 j(Assembler::Parity, fail);
1190 j(Assembler::NotEqual, fail);
1191 }
1192
1193 // Checks whether a float32 is representable as a 32-bit integer. If so, the
1194 // integer is written to the output register. Otherwise, a bailout is taken to
1195 // the given snapshot. This function overwrites the scratch float register.
1196 void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
1197 bool negativeZeroCheck = true)
1198 {
1199 // Check for -0.0
1200 if (negativeZeroCheck)
1201 branchNegativeZeroFloat32(src, dest, fail);
1202
1203 ScratchFloat32Scope scratch(asMasm());
1204 vcvttss2si(src, dest);
1205 convertInt32ToFloat32(dest, scratch);
1206 vucomiss(scratch, src);
1207 j(Assembler::Parity, fail);
1208 j(Assembler::NotEqual, fail);
1209 }
1210
1211 inline void clampIntToUint8(Register reg);
1212
maybeInlineDouble(wasm::RawF64 d,FloatRegister dest)1213 bool maybeInlineDouble(wasm::RawF64 d, FloatRegister dest) {
1214 // Loading zero with xor is specially optimized in hardware.
1215 if (d.bits() == 0) {
1216 zeroDouble(dest);
1217 return true;
1218 }
1219
1220 // It is also possible to load several common constants using vpcmpeqw
1221 // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
1222 // as described in "13.4 Generating constants" of
1223 // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
1224 // previously implemented here. However, with x86 and x64 both using
1225 // constant pool loads for double constants, this is probably only
1226 // worthwhile in cases where a load is likely to be delayed.
1227
1228 return false;
1229 }
1230
maybeInlineFloat(wasm::RawF32 f,FloatRegister dest)1231 bool maybeInlineFloat(wasm::RawF32 f, FloatRegister dest) {
1232 // See comment above
1233 if (f.bits() == 0) {
1234 zeroFloat32(dest);
1235 return true;
1236 }
1237 return false;
1238 }
1239
maybeInlineSimd128Int(const SimdConstant & v,const FloatRegister & dest)1240 bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
1241 static const SimdConstant zero = SimdConstant::SplatX4(0);
1242 static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
1243 if (v == zero) {
1244 zeroSimd128Int(dest);
1245 return true;
1246 }
1247 if (v == minusOne) {
1248 vpcmpeqw(Operand(dest), dest, dest);
1249 return true;
1250 }
1251 return false;
1252 }
maybeInlineSimd128Float(const SimdConstant & v,const FloatRegister & dest)1253 bool maybeInlineSimd128Float(const SimdConstant& v, const FloatRegister& dest) {
1254 static const SimdConstant zero = SimdConstant::SplatX4(0.f);
1255 if (v == zero) {
1256 // This won't get inlined if the SimdConstant v contains -0 in any
1257 // lane, as operator== here does a memcmp.
1258 zeroSimd128Float(dest);
1259 return true;
1260 }
1261 return false;
1262 }
1263
convertBoolToInt32(Register source,Register dest)1264 void convertBoolToInt32(Register source, Register dest) {
1265 // Note that C++ bool is only 1 byte, so zero extend it to clear the
1266 // higher-order bits.
1267 movzbl(source, dest);
1268 }
1269
1270 void emitSet(Assembler::Condition cond, Register dest,
1271 Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
1272 if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
1273 // If the register we're defining is a single byte register,
1274 // take advantage of the setCC instruction
1275 setCC(cond, dest);
1276 movzbl(dest, dest);
1277
1278 if (ifNaN != Assembler::NaN_HandledByCond) {
1279 Label noNaN;
1280 j(Assembler::NoParity, &noNaN);
1281 mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
1282 bind(&noNaN);
1283 }
1284 } else {
1285 Label end;
1286 Label ifFalse;
1287
1288 if (ifNaN == Assembler::NaN_IsFalse)
1289 j(Assembler::Parity, &ifFalse);
1290 // Note a subtlety here: FLAGS is live at this point, and the
1291 // mov interface doesn't guarantee to preserve FLAGS. Use
1292 // movl instead of mov, because the movl instruction
1293 // preserves FLAGS.
1294 movl(Imm32(1), dest);
1295 j(cond, &end);
1296 if (ifNaN == Assembler::NaN_IsTrue)
1297 j(Assembler::Parity, &end);
1298 bind(&ifFalse);
1299 mov(ImmWord(0), dest);
1300
1301 bind(&end);
1302 }
1303 }
1304
1305 // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
toggledJump(Label * label)1306 CodeOffset toggledJump(Label* label) {
1307 CodeOffset offset(size());
1308 jump(label);
1309 return offset;
1310 }
1311
1312 template <typename T>
computeEffectiveAddress(const T & address,Register dest)1313 void computeEffectiveAddress(const T& address, Register dest) {
1314 lea(Operand(address), dest);
1315 }
1316
checkStackAlignment()1317 void checkStackAlignment() {
1318 // Exists for ARM compatibility.
1319 }
1320
labelForPatch()1321 CodeOffset labelForPatch() {
1322 return CodeOffset(size());
1323 }
1324
abiret()1325 void abiret() {
1326 ret();
1327 }
1328
1329 template<typename T>
1330 void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register oldval, Register newval,
1331 Register temp, AnyRegister output);
1332
1333 template<typename T>
1334 void atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register value,
1335 Register temp, AnyRegister output);
1336
1337 protected:
1338 bool buildOOLFakeExitFrame(void* fakeReturnAddr);
1339 };
1340
1341 // Specialize for float to use movaps. Use movdqa for everything else.
1342 template <>
1343 inline void
1344 MacroAssemblerX86Shared::loadAlignedVector<float>(const Address& src, FloatRegister dest)
1345 {
1346 loadAlignedSimd128Float(src, dest);
1347 }
1348
1349 template <typename T>
1350 inline void
loadAlignedVector(const Address & src,FloatRegister dest)1351 MacroAssemblerX86Shared::loadAlignedVector(const Address& src, FloatRegister dest)
1352 {
1353 loadAlignedSimd128Int(src, dest);
1354 }
1355
1356 // Specialize for float to use movaps. Use movdqa for everything else.
1357 template <>
1358 inline void
1359 MacroAssemblerX86Shared::storeAlignedVector<float>(FloatRegister src, const Address& dest)
1360 {
1361 storeAlignedSimd128Float(src, dest);
1362 }
1363
1364 template <typename T>
1365 inline void
storeAlignedVector(FloatRegister src,const Address & dest)1366 MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src, const Address& dest)
1367 {
1368 storeAlignedSimd128Int(src, dest);
1369 }
1370
1371 template <> inline void
1372 MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src, Register dest) {
1373 load8ZeroExtend(src, dest);
1374 }
1375 template <> inline void
1376 MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src, Register dest) {
1377 load16ZeroExtend(src, dest);
1378 }
1379 template <> inline void
1380 MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src, Register dest) {
1381 load32(src, dest);
1382 }
1383 template <> inline void
1384 MacroAssemblerX86Shared::loadScalar<float>(const Operand& src, FloatRegister dest) {
1385 loadFloat32(src, dest);
1386 }
1387
1388 template <> inline void
1389 MacroAssemblerX86Shared::storeScalar<int8_t>(Register src, const Address& dest) {
1390 store8(src, dest);
1391 }
1392 template <> inline void
1393 MacroAssemblerX86Shared::storeScalar<int16_t>(Register src, const Address& dest) {
1394 store16(src, dest);
1395 }
1396 template <> inline void
1397 MacroAssemblerX86Shared::storeScalar<int32_t>(Register src, const Address& dest) {
1398 store32(src, dest);
1399 }
1400 template <> inline void
1401 MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src, const Address& dest) {
1402 vmovss(src, dest);
1403 }
1404
1405 } // namespace jit
1406 } // namespace js
1407
1408 #undef CHECK_BYTEREG
1409 #undef CHECK_BYTEREGS
1410
1411 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */
1412