1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
8 #define jit_x86_shared_MacroAssembler_x86_shared_h
9
10 #include "mozilla/Casting.h"
11
12 #if defined(JS_CODEGEN_X86)
13 #include "jit/x86/Assembler-x86.h"
14 #elif defined(JS_CODEGEN_X64)
15 #include "jit/x64/Assembler-x64.h"
16 #endif
17
18 namespace js {
19 namespace jit {
20
21 class MacroAssembler;
22
23 class MacroAssemblerX86Shared : public Assembler {
24 private:
25 // Perform a downcast. Should be removed by Bug 996602.
26 MacroAssembler& asMasm();
27 const MacroAssembler& asMasm() const;
28
29 public:
30 typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
31
32 protected:
33 // For Double, Float and SimdData, make the move ctors explicit so that MSVC
34 // knows what to use instead of copying these data structures.
35 template <class T>
36 struct Constant {
37 typedef T Pod;
38
39 T value;
40 UsesVector uses;
41
ConstantConstant42 explicit Constant(const T& value) : value(value) {}
ConstantConstant43 Constant(Constant<T>&& other)
44 : value(other.value), uses(mozilla::Move(other.uses)) {}
45 explicit Constant(const Constant<T>&) = delete;
46 };
47
48 // Containers use SystemAllocPolicy since wasm releases memory after each
49 // function is compiled, and these need to live until after all functions
50 // are compiled.
51 using Double = Constant<double>;
52 Vector<Double, 0, SystemAllocPolicy> doubles_;
53 typedef HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy>
54 DoubleMap;
55 DoubleMap doubleMap_;
56
57 using Float = Constant<float>;
58 Vector<Float, 0, SystemAllocPolicy> floats_;
59 typedef HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy>
60 FloatMap;
61 FloatMap floatMap_;
62
63 struct SimdData : public Constant<SimdConstant> {
SimdDataSimdData64 explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
SimdDataSimdData65 SimdData(SimdData&& d) : Constant<SimdConstant>(mozilla::Move(d)) {}
66 explicit SimdData(const SimdData&) = delete;
typeSimdData67 SimdConstant::Type type() const { return value.type(); }
68 };
69
70 Vector<SimdData, 0, SystemAllocPolicy> simds_;
71 typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy>
72 SimdMap;
73 SimdMap simdMap_;
74
75 template <class T, class Map>
76 T* getConstant(const typename T::Pod& value, Map& map,
77 Vector<T, 0, SystemAllocPolicy>& vec);
78
79 Float* getFloat(float f);
80 Double* getDouble(double d);
81 SimdData* getSimdData(const SimdConstant& v);
82
83 public:
84 using Assembler::call;
85
MacroAssemblerX86Shared()86 MacroAssemblerX86Shared() {}
87
appendRawCode(const uint8_t * code,size_t numBytes)88 bool appendRawCode(const uint8_t* code, size_t numBytes) {
89 return masm.appendRawCode(code, numBytes);
90 }
91
92 // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
93 // Checks for NaN if canBeNaN is true.
94 void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
95 bool isMax);
96 void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
97 bool isMax);
98
compareDouble(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)99 void compareDouble(DoubleCondition cond, FloatRegister lhs,
100 FloatRegister rhs) {
101 if (cond & DoubleConditionBitInvert)
102 vucomisd(lhs, rhs);
103 else
104 vucomisd(rhs, lhs);
105 }
106
compareFloat(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)107 void compareFloat(DoubleCondition cond, FloatRegister lhs,
108 FloatRegister rhs) {
109 if (cond & DoubleConditionBitInvert)
110 vucomiss(lhs, rhs);
111 else
112 vucomiss(rhs, lhs);
113 }
114
115 void branchNegativeZero(FloatRegister reg, Register scratch, Label* label,
116 bool maybeNonZero = true);
117 void branchNegativeZeroFloat32(FloatRegister reg, Register scratch,
118 Label* label);
119
move32(Imm32 imm,Register dest)120 void move32(Imm32 imm, Register dest) {
121 // Use the ImmWord version of mov to register, which has special
122 // optimizations. Casting to uint32_t here ensures that the value
123 // is zero-extended.
124 mov(ImmWord(uint32_t(imm.value)), dest);
125 }
move32(Imm32 imm,const Operand & dest)126 void move32(Imm32 imm, const Operand& dest) { movl(imm, dest); }
move32(Register src,Register dest)127 void move32(Register src, Register dest) { movl(src, dest); }
move32(Register src,const Operand & dest)128 void move32(Register src, const Operand& dest) { movl(src, dest); }
test32(Register lhs,Register rhs)129 void test32(Register lhs, Register rhs) { testl(rhs, lhs); }
test32(const Address & addr,Imm32 imm)130 void test32(const Address& addr, Imm32 imm) { testl(imm, Operand(addr)); }
test32(const Operand lhs,Imm32 imm)131 void test32(const Operand lhs, Imm32 imm) { testl(imm, lhs); }
test32(Register lhs,Imm32 rhs)132 void test32(Register lhs, Imm32 rhs) { testl(rhs, lhs); }
cmp32(Register lhs,Imm32 rhs)133 void cmp32(Register lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,Register rhs)134 void cmp32(Register lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(const Address & lhs,Register rhs)135 void cmp32(const Address& lhs, Register rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Address & lhs,Imm32 rhs)136 void cmp32(const Address& lhs, Imm32 rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Operand & lhs,Imm32 rhs)137 void cmp32(const Operand& lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(const Operand & lhs,Register rhs)138 void cmp32(const Operand& lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,const Operand & rhs)139 void cmp32(Register lhs, const Operand& rhs) { cmpl(rhs, lhs); }
cmp32WithPatch(Register lhs,Imm32 rhs)140 CodeOffset cmp32WithPatch(Register lhs, Imm32 rhs) {
141 return cmplWithPatch(rhs, lhs);
142 }
atomic_inc32(const Operand & addr)143 void atomic_inc32(const Operand& addr) { lock_incl(addr); }
atomic_dec32(const Operand & addr)144 void atomic_dec32(const Operand& addr) { lock_decl(addr); }
145
storeLoadFence()146 void storeLoadFence() {
147 // This implementation follows Linux.
148 if (HasSSE2())
149 masm.mfence();
150 else
151 lock_addl(Imm32(0), Operand(Address(esp, 0)));
152 }
153
branch16(Condition cond,Register lhs,Register rhs,Label * label)154 void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
155 cmpw(rhs, lhs);
156 j(cond, label);
157 }
branchTest16(Condition cond,Register lhs,Register rhs,Label * label)158 void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
159 testw(rhs, lhs);
160 j(cond, label);
161 }
162
jump(Label * label)163 void jump(Label* label) { jmp(label); }
jump(JitCode * code)164 void jump(JitCode* code) { jmp(code); }
jump(TrampolinePtr code)165 void jump(TrampolinePtr code) { jmp(ImmPtr(code.value)); }
jump(RepatchLabel * label)166 void jump(RepatchLabel* label) { jmp(label); }
jump(Register reg)167 void jump(Register reg) { jmp(Operand(reg)); }
jump(const Address & addr)168 void jump(const Address& addr) { jmp(Operand(addr)); }
jump(wasm::OldTrapDesc target)169 void jump(wasm::OldTrapDesc target) { jmp(target); }
170
convertInt32ToDouble(Register src,FloatRegister dest)171 void convertInt32ToDouble(Register src, FloatRegister dest) {
172 // vcvtsi2sd and friends write only part of their output register, which
173 // causes slowdowns on out-of-order processors. Explicitly break
174 // dependencies with vxorpd (and vxorps elsewhere), which are handled
175 // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
176 // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
177 // document.
178 zeroDouble(dest);
179 vcvtsi2sd(src, dest, dest);
180 }
convertInt32ToDouble(const Address & src,FloatRegister dest)181 void convertInt32ToDouble(const Address& src, FloatRegister dest) {
182 convertInt32ToDouble(Operand(src), dest);
183 }
convertInt32ToDouble(const BaseIndex & src,FloatRegister dest)184 void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
185 convertInt32ToDouble(Operand(src), dest);
186 }
convertInt32ToDouble(const Operand & src,FloatRegister dest)187 void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
188 // Clear the output register first to break dependencies; see above;
189 zeroDouble(dest);
190 vcvtsi2sd(Operand(src), dest, dest);
191 }
convertInt32ToFloat32(Register src,FloatRegister dest)192 void convertInt32ToFloat32(Register src, FloatRegister dest) {
193 // Clear the output register first to break dependencies; see above;
194 zeroFloat32(dest);
195 vcvtsi2ss(src, dest, dest);
196 }
convertInt32ToFloat32(const Address & src,FloatRegister dest)197 void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
198 convertInt32ToFloat32(Operand(src), dest);
199 }
convertInt32ToFloat32(const Operand & src,FloatRegister dest)200 void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
201 // Clear the output register first to break dependencies; see above;
202 zeroFloat32(dest);
203 vcvtsi2ss(src, dest, dest);
204 }
testDoubleTruthy(bool truthy,FloatRegister reg)205 Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
206 ScratchDoubleScope scratch(asMasm());
207 zeroDouble(scratch);
208 vucomisd(reg, scratch);
209 return truthy ? NonZero : Zero;
210 }
211
212 // Class which ensures that registers used in byte ops are compatible with
213 // such instructions, even if the original register passed in wasn't. This
214 // only applies to x86, as on x64 all registers are valid single byte regs.
215 // This doesn't lead to great code but helps to simplify code generation.
216 //
217 // Note that this can currently only be used in cases where the register is
218 // read from by the guarded instruction, not written to.
219 class AutoEnsureByteRegister {
220 MacroAssemblerX86Shared* masm;
221 Register original_;
222 Register substitute_;
223
224 public:
225 template <typename T>
AutoEnsureByteRegister(MacroAssemblerX86Shared * masm,T address,Register reg)226 AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address,
227 Register reg)
228 : masm(masm), original_(reg) {
229 AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
230 if (singleByteRegs.has(reg)) {
231 substitute_ = reg;
232 } else {
233 MOZ_ASSERT(address.base != StackPointer);
234 do {
235 substitute_ = singleByteRegs.takeAny();
236 } while (Operand(address).containsReg(substitute_));
237
238 masm->push(substitute_);
239 masm->mov(reg, substitute_);
240 }
241 }
242
~AutoEnsureByteRegister()243 ~AutoEnsureByteRegister() {
244 if (original_ != substitute_) masm->pop(substitute_);
245 }
246
reg()247 Register reg() { return substitute_; }
248 };
249
load8ZeroExtend(const Operand & src,Register dest)250 void load8ZeroExtend(const Operand& src, Register dest) { movzbl(src, dest); }
load8ZeroExtend(const Address & src,Register dest)251 void load8ZeroExtend(const Address& src, Register dest) {
252 movzbl(Operand(src), dest);
253 }
load8ZeroExtend(const BaseIndex & src,Register dest)254 void load8ZeroExtend(const BaseIndex& src, Register dest) {
255 movzbl(Operand(src), dest);
256 }
load8SignExtend(const Operand & src,Register dest)257 void load8SignExtend(const Operand& src, Register dest) { movsbl(src, dest); }
load8SignExtend(const Address & src,Register dest)258 void load8SignExtend(const Address& src, Register dest) {
259 movsbl(Operand(src), dest);
260 }
load8SignExtend(const BaseIndex & src,Register dest)261 void load8SignExtend(const BaseIndex& src, Register dest) {
262 movsbl(Operand(src), dest);
263 }
264 template <typename T>
store8(Imm32 src,const T & dest)265 void store8(Imm32 src, const T& dest) {
266 movb(src, Operand(dest));
267 }
268 template <typename T>
store8(Register src,const T & dest)269 void store8(Register src, const T& dest) {
270 AutoEnsureByteRegister ensure(this, dest, src);
271 movb(ensure.reg(), Operand(dest));
272 }
load16ZeroExtend(const Operand & src,Register dest)273 void load16ZeroExtend(const Operand& src, Register dest) {
274 movzwl(src, dest);
275 }
load16ZeroExtend(const Address & src,Register dest)276 void load16ZeroExtend(const Address& src, Register dest) {
277 movzwl(Operand(src), dest);
278 }
load16ZeroExtend(const BaseIndex & src,Register dest)279 void load16ZeroExtend(const BaseIndex& src, Register dest) {
280 movzwl(Operand(src), dest);
281 }
282 template <typename S, typename T>
store16(const S & src,const T & dest)283 void store16(const S& src, const T& dest) {
284 movw(src, Operand(dest));
285 }
load16SignExtend(const Operand & src,Register dest)286 void load16SignExtend(const Operand& src, Register dest) {
287 movswl(src, dest);
288 }
load16SignExtend(const Address & src,Register dest)289 void load16SignExtend(const Address& src, Register dest) {
290 movswl(Operand(src), dest);
291 }
load16SignExtend(const BaseIndex & src,Register dest)292 void load16SignExtend(const BaseIndex& src, Register dest) {
293 movswl(Operand(src), dest);
294 }
load32(const Address & address,Register dest)295 void load32(const Address& address, Register dest) {
296 movl(Operand(address), dest);
297 }
load32(const BaseIndex & src,Register dest)298 void load32(const BaseIndex& src, Register dest) { movl(Operand(src), dest); }
load32(const Operand & src,Register dest)299 void load32(const Operand& src, Register dest) { movl(src, dest); }
300 template <typename S, typename T>
store32(const S & src,const T & dest)301 void store32(const S& src, const T& dest) {
302 movl(src, Operand(dest));
303 }
304 template <typename S, typename T>
store32_NoSecondScratch(const S & src,const T & dest)305 void store32_NoSecondScratch(const S& src, const T& dest) {
306 store32(src, dest);
307 }
loadDouble(const Address & src,FloatRegister dest)308 void loadDouble(const Address& src, FloatRegister dest) { vmovsd(src, dest); }
loadDouble(const BaseIndex & src,FloatRegister dest)309 void loadDouble(const BaseIndex& src, FloatRegister dest) {
310 vmovsd(src, dest);
311 }
loadDouble(const Operand & src,FloatRegister dest)312 void loadDouble(const Operand& src, FloatRegister dest) {
313 switch (src.kind()) {
314 case Operand::MEM_REG_DISP:
315 loadDouble(src.toAddress(), dest);
316 break;
317 case Operand::MEM_SCALE:
318 loadDouble(src.toBaseIndex(), dest);
319 break;
320 default:
321 MOZ_CRASH("unexpected operand kind");
322 }
323 }
moveDouble(FloatRegister src,FloatRegister dest)324 void moveDouble(FloatRegister src, FloatRegister dest) {
325 // Use vmovapd instead of vmovsd to avoid dependencies.
326 vmovapd(src, dest);
327 }
zeroDouble(FloatRegister reg)328 void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); }
zeroFloat32(FloatRegister reg)329 void zeroFloat32(FloatRegister reg) { vxorps(reg, reg, reg); }
convertFloat32ToDouble(FloatRegister src,FloatRegister dest)330 void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
331 vcvtss2sd(src, dest, dest);
332 }
convertDoubleToFloat32(FloatRegister src,FloatRegister dest)333 void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
334 vcvtsd2ss(src, dest, dest);
335 }
336
loadInt32x4(const Address & addr,FloatRegister dest)337 void loadInt32x4(const Address& addr, FloatRegister dest) {
338 vmovdqa(Operand(addr), dest);
339 }
loadFloat32x4(const Address & addr,FloatRegister dest)340 void loadFloat32x4(const Address& addr, FloatRegister dest) {
341 vmovaps(Operand(addr), dest);
342 }
storeInt32x4(FloatRegister src,const Address & addr)343 void storeInt32x4(FloatRegister src, const Address& addr) {
344 vmovdqa(src, Operand(addr));
345 }
storeFloat32x4(FloatRegister src,const Address & addr)346 void storeFloat32x4(FloatRegister src, const Address& addr) {
347 vmovaps(src, Operand(addr));
348 }
349
convertFloat32x4ToInt32x4(FloatRegister src,FloatRegister dest)350 void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
351 // Note that if the conversion failed (because the converted
352 // result is larger than the maximum signed int32, or less than the
353 // least signed int32, or NaN), this will return the undefined integer
354 // value (0x8000000).
355 vcvttps2dq(src, dest);
356 }
convertInt32x4ToFloat32x4(FloatRegister src,FloatRegister dest)357 void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
358 vcvtdq2ps(src, dest);
359 }
360
bitwiseAndSimd128(const Operand & src,FloatRegister dest)361 void bitwiseAndSimd128(const Operand& src, FloatRegister dest) {
362 // TODO Using the "ps" variant for all types incurs a domain crossing
363 // penalty for integer types and double.
364 vandps(src, dest, dest);
365 }
bitwiseAndNotSimd128(const Operand & src,FloatRegister dest)366 void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) {
367 vandnps(src, dest, dest);
368 }
bitwiseOrSimd128(const Operand & src,FloatRegister dest)369 void bitwiseOrSimd128(const Operand& src, FloatRegister dest) {
370 vorps(src, dest, dest);
371 }
bitwiseXorSimd128(const Operand & src,FloatRegister dest)372 void bitwiseXorSimd128(const Operand& src, FloatRegister dest) {
373 vxorps(src, dest, dest);
374 }
zeroSimd128Float(FloatRegister dest)375 void zeroSimd128Float(FloatRegister dest) { vxorps(dest, dest, dest); }
zeroSimd128Int(FloatRegister dest)376 void zeroSimd128Int(FloatRegister dest) { vpxor(dest, dest, dest); }
377
378 template <class T, class Reg>
379 inline void loadScalar(const Operand& src, Reg dest);
380 template <class T, class Reg>
381 inline void storeScalar(Reg src, const Address& dest);
382 template <class T>
383 inline void loadAlignedVector(const Address& src, FloatRegister dest);
384 template <class T>
385 inline void storeAlignedVector(FloatRegister src, const Address& dest);
386
loadInt32x1(const Address & src,FloatRegister dest)387 void loadInt32x1(const Address& src, FloatRegister dest) {
388 vmovd(Operand(src), dest);
389 }
loadInt32x1(const BaseIndex & src,FloatRegister dest)390 void loadInt32x1(const BaseIndex& src, FloatRegister dest) {
391 vmovd(Operand(src), dest);
392 }
loadInt32x2(const Address & src,FloatRegister dest)393 void loadInt32x2(const Address& src, FloatRegister dest) {
394 vmovq(Operand(src), dest);
395 }
loadInt32x2(const BaseIndex & src,FloatRegister dest)396 void loadInt32x2(const BaseIndex& src, FloatRegister dest) {
397 vmovq(Operand(src), dest);
398 }
loadInt32x3(const BaseIndex & src,FloatRegister dest)399 void loadInt32x3(const BaseIndex& src, FloatRegister dest) {
400 BaseIndex srcZ(src);
401 srcZ.offset += 2 * sizeof(int32_t);
402
403 ScratchSimd128Scope scratch(asMasm());
404 vmovq(Operand(src), dest);
405 vmovd(Operand(srcZ), scratch);
406 vmovlhps(scratch, dest, dest);
407 }
loadInt32x3(const Address & src,FloatRegister dest)408 void loadInt32x3(const Address& src, FloatRegister dest) {
409 Address srcZ(src);
410 srcZ.offset += 2 * sizeof(int32_t);
411
412 ScratchSimd128Scope scratch(asMasm());
413 vmovq(Operand(src), dest);
414 vmovd(Operand(srcZ), scratch);
415 vmovlhps(scratch, dest, dest);
416 }
417
loadAlignedSimd128Int(const Address & src,FloatRegister dest)418 void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
419 vmovdqa(Operand(src), dest);
420 }
loadAlignedSimd128Int(const Operand & src,FloatRegister dest)421 void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
422 vmovdqa(src, dest);
423 }
storeAlignedSimd128Int(FloatRegister src,const Address & dest)424 void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
425 vmovdqa(src, Operand(dest));
426 }
moveSimd128Int(FloatRegister src,FloatRegister dest)427 void moveSimd128Int(FloatRegister src, FloatRegister dest) {
428 vmovdqa(src, dest);
429 }
reusedInputInt32x4(FloatRegister src,FloatRegister dest)430 FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
431 if (HasAVX()) return src;
432 moveSimd128Int(src, dest);
433 return dest;
434 }
reusedInputAlignedInt32x4(const Operand & src,FloatRegister dest)435 FloatRegister reusedInputAlignedInt32x4(const Operand& src,
436 FloatRegister dest) {
437 if (HasAVX() && src.kind() == Operand::FPREG)
438 return FloatRegister::FromCode(src.fpu());
439 loadAlignedSimd128Int(src, dest);
440 return dest;
441 }
loadUnalignedSimd128Int(const Address & src,FloatRegister dest)442 void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
443 vmovdqu(Operand(src), dest);
444 }
loadUnalignedSimd128Int(const BaseIndex & src,FloatRegister dest)445 void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
446 vmovdqu(Operand(src), dest);
447 }
loadUnalignedSimd128Int(const Operand & src,FloatRegister dest)448 void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
449 vmovdqu(src, dest);
450 }
451
storeInt32x1(FloatRegister src,const Address & dest)452 void storeInt32x1(FloatRegister src, const Address& dest) {
453 vmovd(src, Operand(dest));
454 }
storeInt32x1(FloatRegister src,const BaseIndex & dest)455 void storeInt32x1(FloatRegister src, const BaseIndex& dest) {
456 vmovd(src, Operand(dest));
457 }
storeInt32x2(FloatRegister src,const Address & dest)458 void storeInt32x2(FloatRegister src, const Address& dest) {
459 vmovq(src, Operand(dest));
460 }
storeInt32x2(FloatRegister src,const BaseIndex & dest)461 void storeInt32x2(FloatRegister src, const BaseIndex& dest) {
462 vmovq(src, Operand(dest));
463 }
storeInt32x3(FloatRegister src,const Address & dest)464 void storeInt32x3(FloatRegister src, const Address& dest) {
465 Address destZ(dest);
466 destZ.offset += 2 * sizeof(int32_t);
467 vmovq(src, Operand(dest));
468 ScratchSimd128Scope scratch(asMasm());
469 vmovhlps(src, scratch, scratch);
470 vmovd(scratch, Operand(destZ));
471 }
storeInt32x3(FloatRegister src,const BaseIndex & dest)472 void storeInt32x3(FloatRegister src, const BaseIndex& dest) {
473 BaseIndex destZ(dest);
474 destZ.offset += 2 * sizeof(int32_t);
475 vmovq(src, Operand(dest));
476 ScratchSimd128Scope scratch(asMasm());
477 vmovhlps(src, scratch, scratch);
478 vmovd(scratch, Operand(destZ));
479 }
480
storeUnalignedSimd128Int(FloatRegister src,const Address & dest)481 void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
482 vmovdqu(src, Operand(dest));
483 }
storeUnalignedSimd128Int(FloatRegister src,const BaseIndex & dest)484 void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
485 vmovdqu(src, Operand(dest));
486 }
storeUnalignedSimd128Int(FloatRegister src,const Operand & dest)487 void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
488 vmovdqu(src, dest);
489 }
packedEqualInt32x4(const Operand & src,FloatRegister dest)490 void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
491 vpcmpeqd(src, dest, dest);
492 }
packedGreaterThanInt32x4(const Operand & src,FloatRegister dest)493 void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
494 vpcmpgtd(src, dest, dest);
495 }
packedAddInt8(const Operand & src,FloatRegister dest)496 void packedAddInt8(const Operand& src, FloatRegister dest) {
497 vpaddb(src, dest, dest);
498 }
packedSubInt8(const Operand & src,FloatRegister dest)499 void packedSubInt8(const Operand& src, FloatRegister dest) {
500 vpsubb(src, dest, dest);
501 }
packedAddInt16(const Operand & src,FloatRegister dest)502 void packedAddInt16(const Operand& src, FloatRegister dest) {
503 vpaddw(src, dest, dest);
504 }
packedSubInt16(const Operand & src,FloatRegister dest)505 void packedSubInt16(const Operand& src, FloatRegister dest) {
506 vpsubw(src, dest, dest);
507 }
packedAddInt32(const Operand & src,FloatRegister dest)508 void packedAddInt32(const Operand& src, FloatRegister dest) {
509 vpaddd(src, dest, dest);
510 }
packedSubInt32(const Operand & src,FloatRegister dest)511 void packedSubInt32(const Operand& src, FloatRegister dest) {
512 vpsubd(src, dest, dest);
513 }
packedRcpApproximationFloat32x4(const Operand & src,FloatRegister dest)514 void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
515 // This function is an approximation of the result, this might need
516 // fix up if the spec requires a given precision for this operation.
517 // TODO See also bug 1068028.
518 vrcpps(src, dest);
519 }
packedRcpSqrtApproximationFloat32x4(const Operand & src,FloatRegister dest)520 void packedRcpSqrtApproximationFloat32x4(const Operand& src,
521 FloatRegister dest) {
522 // TODO See comment above. See also bug 1068028.
523 vrsqrtps(src, dest);
524 }
packedSqrtFloat32x4(const Operand & src,FloatRegister dest)525 void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
526 vsqrtps(src, dest);
527 }
528
packedLeftShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)529 void packedLeftShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
530 vpsllw(src, dest, dest);
531 }
packedLeftShiftByScalarInt16x8(Imm32 count,FloatRegister dest)532 void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
533 vpsllw(count, dest, dest);
534 }
packedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)535 void packedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
536 vpsraw(src, dest, dest);
537 }
packedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)538 void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
539 vpsraw(count, dest, dest);
540 }
packedUnsignedRightShiftByScalarInt16x8(FloatRegister src,FloatRegister dest)541 void packedUnsignedRightShiftByScalarInt16x8(FloatRegister src,
542 FloatRegister dest) {
543 vpsrlw(src, dest, dest);
544 }
packedUnsignedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)545 void packedUnsignedRightShiftByScalarInt16x8(Imm32 count,
546 FloatRegister dest) {
547 vpsrlw(count, dest, dest);
548 }
549
packedLeftShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)550 void packedLeftShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
551 vpslld(src, dest, dest);
552 }
packedLeftShiftByScalarInt32x4(Imm32 count,FloatRegister dest)553 void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
554 vpslld(count, dest, dest);
555 }
packedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)556 void packedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
557 vpsrad(src, dest, dest);
558 }
packedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)559 void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
560 vpsrad(count, dest, dest);
561 }
packedUnsignedRightShiftByScalarInt32x4(FloatRegister src,FloatRegister dest)562 void packedUnsignedRightShiftByScalarInt32x4(FloatRegister src,
563 FloatRegister dest) {
564 vpsrld(src, dest, dest);
565 }
packedUnsignedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)566 void packedUnsignedRightShiftByScalarInt32x4(Imm32 count,
567 FloatRegister dest) {
568 vpsrld(count, dest, dest);
569 }
570
loadFloat32x3(const Address & src,FloatRegister dest)571 void loadFloat32x3(const Address& src, FloatRegister dest) {
572 Address srcZ(src);
573 srcZ.offset += 2 * sizeof(float);
574 vmovsd(src, dest);
575 ScratchSimd128Scope scratch(asMasm());
576 vmovss(srcZ, scratch);
577 vmovlhps(scratch, dest, dest);
578 }
loadFloat32x3(const BaseIndex & src,FloatRegister dest)579 void loadFloat32x3(const BaseIndex& src, FloatRegister dest) {
580 BaseIndex srcZ(src);
581 srcZ.offset += 2 * sizeof(float);
582 vmovsd(src, dest);
583 ScratchSimd128Scope scratch(asMasm());
584 vmovss(srcZ, scratch);
585 vmovlhps(scratch, dest, dest);
586 }
587
loadAlignedSimd128Float(const Address & src,FloatRegister dest)588 void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
589 vmovaps(Operand(src), dest);
590 }
loadAlignedSimd128Float(const Operand & src,FloatRegister dest)591 void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
592 vmovaps(src, dest);
593 }
594
storeAlignedSimd128Float(FloatRegister src,const Address & dest)595 void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
596 vmovaps(src, Operand(dest));
597 }
moveSimd128Float(FloatRegister src,FloatRegister dest)598 void moveSimd128Float(FloatRegister src, FloatRegister dest) {
599 vmovaps(src, dest);
600 }
reusedInputFloat32x4(FloatRegister src,FloatRegister dest)601 FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) {
602 if (HasAVX()) return src;
603 moveSimd128Float(src, dest);
604 return dest;
605 }
reusedInputAlignedFloat32x4(const Operand & src,FloatRegister dest)606 FloatRegister reusedInputAlignedFloat32x4(const Operand& src,
607 FloatRegister dest) {
608 if (HasAVX() && src.kind() == Operand::FPREG)
609 return FloatRegister::FromCode(src.fpu());
610 loadAlignedSimd128Float(src, dest);
611 return dest;
612 }
loadUnalignedSimd128Float(const Address & src,FloatRegister dest)613 void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
614 vmovups(Operand(src), dest);
615 }
loadUnalignedSimd128Float(const BaseIndex & src,FloatRegister dest)616 void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
617 vmovdqu(Operand(src), dest);
618 }
loadUnalignedSimd128Float(const Operand & src,FloatRegister dest)619 void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
620 vmovups(src, dest);
621 }
storeUnalignedSimd128Float(FloatRegister src,const Address & dest)622 void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
623 vmovups(src, Operand(dest));
624 }
storeUnalignedSimd128Float(FloatRegister src,const BaseIndex & dest)625 void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
626 vmovups(src, Operand(dest));
627 }
storeUnalignedSimd128Float(FloatRegister src,const Operand & dest)628 void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
629 vmovups(src, dest);
630 }
packedAddFloat32(const Operand & src,FloatRegister dest)631 void packedAddFloat32(const Operand& src, FloatRegister dest) {
632 vaddps(src, dest, dest);
633 }
packedSubFloat32(const Operand & src,FloatRegister dest)634 void packedSubFloat32(const Operand& src, FloatRegister dest) {
635 vsubps(src, dest, dest);
636 }
packedMulFloat32(const Operand & src,FloatRegister dest)637 void packedMulFloat32(const Operand& src, FloatRegister dest) {
638 vmulps(src, dest, dest);
639 }
packedDivFloat32(const Operand & src,FloatRegister dest)640 void packedDivFloat32(const Operand& src, FloatRegister dest) {
641 vdivps(src, dest, dest);
642 }
643
644 static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
645 uint32_t z = 2, uint32_t w = 3) {
646 MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
647 uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
648 MOZ_ASSERT(r < 256);
649 return r;
650 }
651
shuffleInt32(uint32_t mask,FloatRegister src,FloatRegister dest)652 void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
653 vpshufd(mask, src, dest);
654 }
moveLowInt32(FloatRegister src,Register dest)655 void moveLowInt32(FloatRegister src, Register dest) { vmovd(src, dest); }
656
moveHighPairToLowPairFloat32(FloatRegister src,FloatRegister dest)657 void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
658 vmovhlps(src, dest, dest);
659 }
shuffleFloat32(uint32_t mask,FloatRegister src,FloatRegister dest)660 void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
661 // The shuffle instruction on x86 is such that it moves 2 words from
662 // the dest and 2 words from the src operands. To simplify things, just
663 // clobber the output with the input and apply the instruction
664 // afterwards.
665 // Note: this is useAtStart-safe because src isn't read afterwards.
666 FloatRegister srcCopy = reusedInputFloat32x4(src, dest);
667 vshufps(mask, srcCopy, srcCopy, dest);
668 }
shuffleMix(uint32_t mask,const Operand & src,FloatRegister dest)669 void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
670 // Note this uses vshufps, which is a cross-domain penalty on CPU where it
671 // applies, but that's the way clang and gcc do it.
672 vshufps(mask, src, dest, dest);
673 }
674
moveFloatAsDouble(Register src,FloatRegister dest)675 void moveFloatAsDouble(Register src, FloatRegister dest) {
676 vmovd(src, dest);
677 vcvtss2sd(dest, dest, dest);
678 }
loadFloatAsDouble(const Address & src,FloatRegister dest)679 void loadFloatAsDouble(const Address& src, FloatRegister dest) {
680 vmovss(src, dest);
681 vcvtss2sd(dest, dest, dest);
682 }
loadFloatAsDouble(const BaseIndex & src,FloatRegister dest)683 void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
684 vmovss(src, dest);
685 vcvtss2sd(dest, dest, dest);
686 }
loadFloatAsDouble(const Operand & src,FloatRegister dest)687 void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
688 loadFloat32(src, dest);
689 vcvtss2sd(dest, dest, dest);
690 }
loadFloat32(const Address & src,FloatRegister dest)691 void loadFloat32(const Address& src, FloatRegister dest) {
692 vmovss(src, dest);
693 }
loadFloat32(const BaseIndex & src,FloatRegister dest)694 void loadFloat32(const BaseIndex& src, FloatRegister dest) {
695 vmovss(src, dest);
696 }
loadFloat32(const Operand & src,FloatRegister dest)697 void loadFloat32(const Operand& src, FloatRegister dest) {
698 switch (src.kind()) {
699 case Operand::MEM_REG_DISP:
700 loadFloat32(src.toAddress(), dest);
701 break;
702 case Operand::MEM_SCALE:
703 loadFloat32(src.toBaseIndex(), dest);
704 break;
705 default:
706 MOZ_CRASH("unexpected operand kind");
707 }
708 }
moveFloat32(FloatRegister src,FloatRegister dest)709 void moveFloat32(FloatRegister src, FloatRegister dest) {
710 // Use vmovaps instead of vmovss to avoid dependencies.
711 vmovaps(src, dest);
712 }
713
714 // Checks whether a double is representable as a 32-bit integer. If so, the
715 // integer is written to the output register. Otherwise, a bailout is taken to
716 // the given snapshot. This function overwrites the scratch float register.
717 void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
718 bool negativeZeroCheck = true) {
719 // Check for -0.0
720 if (negativeZeroCheck) branchNegativeZero(src, dest, fail);
721
722 ScratchDoubleScope scratch(asMasm());
723 vcvttsd2si(src, dest);
724 convertInt32ToDouble(dest, scratch);
725 vucomisd(scratch, src);
726 j(Assembler::Parity, fail);
727 j(Assembler::NotEqual, fail);
728 }
729
730 // Checks whether a float32 is representable as a 32-bit integer. If so, the
731 // integer is written to the output register. Otherwise, a bailout is taken to
732 // the given snapshot. This function overwrites the scratch float register.
733 void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
734 bool negativeZeroCheck = true) {
735 // Check for -0.0
736 if (negativeZeroCheck) branchNegativeZeroFloat32(src, dest, fail);
737
738 ScratchFloat32Scope scratch(asMasm());
739 vcvttss2si(src, dest);
740 convertInt32ToFloat32(dest, scratch);
741 vucomiss(scratch, src);
742 j(Assembler::Parity, fail);
743 j(Assembler::NotEqual, fail);
744 }
745
746 inline void clampIntToUint8(Register reg);
747
maybeInlineDouble(double d,FloatRegister dest)748 bool maybeInlineDouble(double d, FloatRegister dest) {
749 // Loading zero with xor is specially optimized in hardware.
750 if (mozilla::IsPositiveZero(d)) {
751 zeroDouble(dest);
752 return true;
753 }
754
755 // It is also possible to load several common constants using vpcmpeqw
756 // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
757 // as described in "13.4 Generating constants" of
758 // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
759 // previously implemented here. However, with x86 and x64 both using
760 // constant pool loads for double constants, this is probably only
761 // worthwhile in cases where a load is likely to be delayed.
762
763 return false;
764 }
765
maybeInlineFloat(float f,FloatRegister dest)766 bool maybeInlineFloat(float f, FloatRegister dest) {
767 // See comment above
768 if (mozilla::IsPositiveZero(f)) {
769 zeroFloat32(dest);
770 return true;
771 }
772 return false;
773 }
774
maybeInlineSimd128Int(const SimdConstant & v,const FloatRegister & dest)775 bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
776 static const SimdConstant zero = SimdConstant::SplatX4(0);
777 static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
778 if (v == zero) {
779 zeroSimd128Int(dest);
780 return true;
781 }
782 if (v == minusOne) {
783 vpcmpeqw(Operand(dest), dest, dest);
784 return true;
785 }
786 return false;
787 }
maybeInlineSimd128Float(const SimdConstant & v,const FloatRegister & dest)788 bool maybeInlineSimd128Float(const SimdConstant& v,
789 const FloatRegister& dest) {
790 static const SimdConstant zero = SimdConstant::SplatX4(0.f);
791 if (v == zero) {
792 // This won't get inlined if the SimdConstant v contains -0 in any
793 // lane, as operator== here does a memcmp.
794 zeroSimd128Float(dest);
795 return true;
796 }
797 return false;
798 }
799
convertBoolToInt32(Register source,Register dest)800 void convertBoolToInt32(Register source, Register dest) {
801 // Note that C++ bool is only 1 byte, so zero extend it to clear the
802 // higher-order bits.
803 movzbl(source, dest);
804 }
805
806 void emitSet(Assembler::Condition cond, Register dest,
807 Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
808 if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
809 // If the register we're defining is a single byte register,
810 // take advantage of the setCC instruction
811 setCC(cond, dest);
812 movzbl(dest, dest);
813
814 if (ifNaN != Assembler::NaN_HandledByCond) {
815 Label noNaN;
816 j(Assembler::NoParity, &noNaN);
817 mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
818 bind(&noNaN);
819 }
820 } else {
821 Label end;
822 Label ifFalse;
823
824 if (ifNaN == Assembler::NaN_IsFalse) j(Assembler::Parity, &ifFalse);
825 // Note a subtlety here: FLAGS is live at this point, and the
826 // mov interface doesn't guarantee to preserve FLAGS. Use
827 // movl instead of mov, because the movl instruction
828 // preserves FLAGS.
829 movl(Imm32(1), dest);
830 j(cond, &end);
831 if (ifNaN == Assembler::NaN_IsTrue) j(Assembler::Parity, &end);
832 bind(&ifFalse);
833 mov(ImmWord(0), dest);
834
835 bind(&end);
836 }
837 }
838
839 // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
toggledJump(Label * label)840 CodeOffset toggledJump(Label* label) {
841 CodeOffset offset(size());
842 jump(label);
843 return offset;
844 }
845
846 template <typename T>
computeEffectiveAddress(const T & address,Register dest)847 void computeEffectiveAddress(const T& address, Register dest) {
848 lea(Operand(address), dest);
849 }
850
checkStackAlignment()851 void checkStackAlignment() {
852 // Exists for ARM compatibility.
853 }
854
labelForPatch()855 CodeOffset labelForPatch() { return CodeOffset(size()); }
856
abiret()857 void abiret() { ret(); }
858
859 protected:
860 bool buildOOLFakeExitFrame(void* fakeReturnAddr);
861 };
862
863 // Specialize for float to use movaps. Use movdqa for everything else.
864 template <>
865 inline void MacroAssemblerX86Shared::loadAlignedVector<float>(
866 const Address& src, FloatRegister dest) {
867 loadAlignedSimd128Float(src, dest);
868 }
869
870 template <typename T>
loadAlignedVector(const Address & src,FloatRegister dest)871 inline void MacroAssemblerX86Shared::loadAlignedVector(const Address& src,
872 FloatRegister dest) {
873 loadAlignedSimd128Int(src, dest);
874 }
875
876 // Specialize for float to use movaps. Use movdqa for everything else.
877 template <>
878 inline void MacroAssemblerX86Shared::storeAlignedVector<float>(
879 FloatRegister src, const Address& dest) {
880 storeAlignedSimd128Float(src, dest);
881 }
882
883 template <typename T>
storeAlignedVector(FloatRegister src,const Address & dest)884 inline void MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src,
885 const Address& dest) {
886 storeAlignedSimd128Int(src, dest);
887 }
888
889 template <>
890 inline void MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src,
891 Register dest) {
892 load8ZeroExtend(src, dest);
893 }
894 template <>
895 inline void MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src,
896 Register dest) {
897 load16ZeroExtend(src, dest);
898 }
899 template <>
900 inline void MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src,
901 Register dest) {
902 load32(src, dest);
903 }
904 template <>
905 inline void MacroAssemblerX86Shared::loadScalar<float>(const Operand& src,
906 FloatRegister dest) {
907 loadFloat32(src, dest);
908 }
909
910 template <>
911 inline void MacroAssemblerX86Shared::storeScalar<int8_t>(Register src,
912 const Address& dest) {
913 store8(src, dest);
914 }
915 template <>
916 inline void MacroAssemblerX86Shared::storeScalar<int16_t>(Register src,
917 const Address& dest) {
918 store16(src, dest);
919 }
920 template <>
921 inline void MacroAssemblerX86Shared::storeScalar<int32_t>(Register src,
922 const Address& dest) {
923 store32(src, dest);
924 }
925 template <>
926 inline void MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src,
927 const Address& dest) {
928 vmovss(src, dest);
929 }
930
931 } // namespace jit
932 } // namespace js
933
934 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */
935