1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
8 #define jit_x86_shared_MacroAssembler_x86_shared_h
9
10 #include "mozilla/Casting.h"
11
12 #if defined(JS_CODEGEN_X86)
13 # include "jit/x86/Assembler-x86.h"
14 #elif defined(JS_CODEGEN_X64)
15 # include "jit/x64/Assembler-x64.h"
16 #endif
17
18 namespace js {
19 namespace jit {
20
21 class MacroAssembler;
22
23 class MacroAssemblerX86Shared : public Assembler {
24 private:
25 // Perform a downcast. Should be removed by Bug 996602.
26 MacroAssembler& asMasm();
27 const MacroAssembler& asMasm() const;
28
29 public:
30 typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
31
32 protected:
33 // For Double, Float and SimdData, make the move ctors explicit so that MSVC
34 // knows what to use instead of copying these data structures.
35 template <class T>
36 struct Constant {
37 using Pod = T;
38
39 T value;
40 UsesVector uses;
41
ConstantConstant42 explicit Constant(const T& value) : value(value) {}
ConstantConstant43 Constant(Constant<T>&& other)
44 : value(other.value), uses(std::move(other.uses)) {}
45 explicit Constant(const Constant<T>&) = delete;
46 };
47
48 // Containers use SystemAllocPolicy since wasm releases memory after each
49 // function is compiled, and these need to live until after all functions
50 // are compiled.
51 using Double = Constant<double>;
52 Vector<Double, 0, SystemAllocPolicy> doubles_;
53 typedef HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy>
54 DoubleMap;
55 DoubleMap doubleMap_;
56
57 using Float = Constant<float>;
58 Vector<Float, 0, SystemAllocPolicy> floats_;
59 typedef HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy>
60 FloatMap;
61 FloatMap floatMap_;
62
63 struct SimdData : public Constant<SimdConstant> {
SimdDataSimdData64 explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
SimdDataSimdData65 SimdData(SimdData&& d) : Constant<SimdConstant>(std::move(d)) {}
66 explicit SimdData(const SimdData&) = delete;
typeSimdData67 SimdConstant::Type type() const { return value.type(); }
68 };
69
70 Vector<SimdData, 0, SystemAllocPolicy> simds_;
71 typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy>
72 SimdMap;
73 SimdMap simdMap_;
74
75 template <class T, class Map>
76 T* getConstant(const typename T::Pod& value, Map& map,
77 Vector<T, 0, SystemAllocPolicy>& vec);
78
79 Float* getFloat(float f);
80 Double* getDouble(double d);
81 SimdData* getSimdData(const SimdConstant& v);
82
83 public:
84 using Assembler::call;
85
86 MacroAssemblerX86Shared() = default;
87
appendRawCode(const uint8_t * code,size_t numBytes)88 bool appendRawCode(const uint8_t* code, size_t numBytes) {
89 return masm.appendRawCode(code, numBytes);
90 }
91
addToPCRel4(uint32_t offset,int32_t bias)92 void addToPCRel4(uint32_t offset, int32_t bias) {
93 return masm.addToPCRel4(offset, bias);
94 }
95
96 // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
97 // Checks for NaN if canBeNaN is true.
98 void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
99 bool isMax);
100 void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
101 bool isMax);
102
compareDouble(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)103 void compareDouble(DoubleCondition cond, FloatRegister lhs,
104 FloatRegister rhs) {
105 if (cond & DoubleConditionBitInvert) {
106 vucomisd(lhs, rhs);
107 } else {
108 vucomisd(rhs, lhs);
109 }
110 }
111
compareFloat(DoubleCondition cond,FloatRegister lhs,FloatRegister rhs)112 void compareFloat(DoubleCondition cond, FloatRegister lhs,
113 FloatRegister rhs) {
114 if (cond & DoubleConditionBitInvert) {
115 vucomiss(lhs, rhs);
116 } else {
117 vucomiss(rhs, lhs);
118 }
119 }
120
121 void branchNegativeZero(FloatRegister reg, Register scratch, Label* label,
122 bool maybeNonZero = true);
123 void branchNegativeZeroFloat32(FloatRegister reg, Register scratch,
124 Label* label);
125
move32(Imm32 imm,Register dest)126 void move32(Imm32 imm, Register dest) {
127 // Use the ImmWord version of mov to register, which has special
128 // optimizations. Casting to uint32_t here ensures that the value
129 // is zero-extended.
130 mov(ImmWord(uint32_t(imm.value)), dest);
131 }
move32(Imm32 imm,const Operand & dest)132 void move32(Imm32 imm, const Operand& dest) { movl(imm, dest); }
move32(Register src,Register dest)133 void move32(Register src, Register dest) { movl(src, dest); }
move32(Register src,const Operand & dest)134 void move32(Register src, const Operand& dest) { movl(src, dest); }
test32(Register lhs,Register rhs)135 void test32(Register lhs, Register rhs) { testl(rhs, lhs); }
test32(const Address & addr,Imm32 imm)136 void test32(const Address& addr, Imm32 imm) { testl(imm, Operand(addr)); }
test32(const Operand lhs,Imm32 imm)137 void test32(const Operand lhs, Imm32 imm) { testl(imm, lhs); }
test32(Register lhs,Imm32 rhs)138 void test32(Register lhs, Imm32 rhs) { testl(rhs, lhs); }
cmp32(Register lhs,Imm32 rhs)139 void cmp32(Register lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,Register rhs)140 void cmp32(Register lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(const Address & lhs,Register rhs)141 void cmp32(const Address& lhs, Register rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Address & lhs,Imm32 rhs)142 void cmp32(const Address& lhs, Imm32 rhs) { cmp32(Operand(lhs), rhs); }
cmp32(const Operand & lhs,Imm32 rhs)143 void cmp32(const Operand& lhs, Imm32 rhs) { cmpl(rhs, lhs); }
cmp32(const Operand & lhs,Register rhs)144 void cmp32(const Operand& lhs, Register rhs) { cmpl(rhs, lhs); }
cmp32(Register lhs,const Operand & rhs)145 void cmp32(Register lhs, const Operand& rhs) { cmpl(rhs, lhs); }
146
atomic_inc32(const Operand & addr)147 void atomic_inc32(const Operand& addr) { lock_incl(addr); }
atomic_dec32(const Operand & addr)148 void atomic_dec32(const Operand& addr) { lock_decl(addr); }
149
storeLoadFence()150 void storeLoadFence() {
151 // This implementation follows Linux.
152 if (HasSSE2()) {
153 masm.mfence();
154 } else {
155 lock_addl(Imm32(0), Operand(Address(esp, 0)));
156 }
157 }
158
branch16(Condition cond,Register lhs,Register rhs,Label * label)159 void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
160 cmpw(rhs, lhs);
161 j(cond, label);
162 }
branchTest16(Condition cond,Register lhs,Register rhs,Label * label)163 void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
164 testw(rhs, lhs);
165 j(cond, label);
166 }
167
jump(Label * label)168 void jump(Label* label) { jmp(label); }
jump(JitCode * code)169 void jump(JitCode* code) { jmp(code); }
jump(TrampolinePtr code)170 void jump(TrampolinePtr code) { jmp(ImmPtr(code.value)); }
jump(ImmPtr ptr)171 void jump(ImmPtr ptr) { jmp(ptr); }
jump(Register reg)172 void jump(Register reg) { jmp(Operand(reg)); }
jump(const Address & addr)173 void jump(const Address& addr) { jmp(Operand(addr)); }
174
convertInt32ToDouble(Register src,FloatRegister dest)175 void convertInt32ToDouble(Register src, FloatRegister dest) {
176 // vcvtsi2sd and friends write only part of their output register, which
177 // causes slowdowns on out-of-order processors. Explicitly break
178 // dependencies with vxorpd (and vxorps elsewhere), which are handled
179 // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
180 // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
181 // document.
182 zeroDouble(dest);
183 vcvtsi2sd(src, dest, dest);
184 }
convertInt32ToDouble(const Address & src,FloatRegister dest)185 void convertInt32ToDouble(const Address& src, FloatRegister dest) {
186 convertInt32ToDouble(Operand(src), dest);
187 }
convertInt32ToDouble(const BaseIndex & src,FloatRegister dest)188 void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
189 convertInt32ToDouble(Operand(src), dest);
190 }
convertInt32ToDouble(const Operand & src,FloatRegister dest)191 void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
192 // Clear the output register first to break dependencies; see above;
193 zeroDouble(dest);
194 vcvtsi2sd(Operand(src), dest, dest);
195 }
convertInt32ToFloat32(Register src,FloatRegister dest)196 void convertInt32ToFloat32(Register src, FloatRegister dest) {
197 // Clear the output register first to break dependencies; see above;
198 zeroFloat32(dest);
199 vcvtsi2ss(src, dest, dest);
200 }
convertInt32ToFloat32(const Address & src,FloatRegister dest)201 void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
202 convertInt32ToFloat32(Operand(src), dest);
203 }
convertInt32ToFloat32(const Operand & src,FloatRegister dest)204 void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
205 // Clear the output register first to break dependencies; see above;
206 zeroFloat32(dest);
207 vcvtsi2ss(src, dest, dest);
208 }
testDoubleTruthy(bool truthy,FloatRegister reg)209 Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
210 ScratchDoubleScope scratch(asMasm());
211 zeroDouble(scratch);
212 vucomisd(reg, scratch);
213 return truthy ? NonZero : Zero;
214 }
215
216 // Class which ensures that registers used in byte ops are compatible with
217 // such instructions, even if the original register passed in wasn't. This
218 // only applies to x86, as on x64 all registers are valid single byte regs.
219 // This doesn't lead to great code but helps to simplify code generation.
220 //
221 // Note that this can currently only be used in cases where the register is
222 // read from by the guarded instruction, not written to.
223 class AutoEnsureByteRegister {
224 MacroAssemblerX86Shared* masm;
225 Register original_;
226 Register substitute_;
227
228 public:
229 template <typename T>
AutoEnsureByteRegister(MacroAssemblerX86Shared * masm,T address,Register reg)230 AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address,
231 Register reg)
232 : masm(masm), original_(reg) {
233 AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
234 if (singleByteRegs.has(reg)) {
235 substitute_ = reg;
236 } else {
237 MOZ_ASSERT(address.base != StackPointer);
238 do {
239 substitute_ = singleByteRegs.takeAny();
240 } while (Operand(address).containsReg(substitute_));
241
242 masm->push(substitute_);
243 masm->mov(reg, substitute_);
244 }
245 }
246
~AutoEnsureByteRegister()247 ~AutoEnsureByteRegister() {
248 if (original_ != substitute_) {
249 masm->pop(substitute_);
250 }
251 }
252
reg()253 Register reg() { return substitute_; }
254 };
255
load8ZeroExtend(const Operand & src,Register dest)256 void load8ZeroExtend(const Operand& src, Register dest) { movzbl(src, dest); }
load8ZeroExtend(const Address & src,Register dest)257 void load8ZeroExtend(const Address& src, Register dest) {
258 movzbl(Operand(src), dest);
259 }
load8ZeroExtend(const BaseIndex & src,Register dest)260 void load8ZeroExtend(const BaseIndex& src, Register dest) {
261 movzbl(Operand(src), dest);
262 }
load8SignExtend(const Operand & src,Register dest)263 void load8SignExtend(const Operand& src, Register dest) { movsbl(src, dest); }
load8SignExtend(const Address & src,Register dest)264 void load8SignExtend(const Address& src, Register dest) {
265 movsbl(Operand(src), dest);
266 }
load8SignExtend(const BaseIndex & src,Register dest)267 void load8SignExtend(const BaseIndex& src, Register dest) {
268 movsbl(Operand(src), dest);
269 }
270 template <typename T>
store8(Imm32 src,const T & dest)271 void store8(Imm32 src, const T& dest) {
272 movb(src, Operand(dest));
273 }
274 template <typename T>
store8(Register src,const T & dest)275 void store8(Register src, const T& dest) {
276 AutoEnsureByteRegister ensure(this, dest, src);
277 movb(ensure.reg(), Operand(dest));
278 }
load16ZeroExtend(const Operand & src,Register dest)279 void load16ZeroExtend(const Operand& src, Register dest) {
280 movzwl(src, dest);
281 }
load16ZeroExtend(const Address & src,Register dest)282 void load16ZeroExtend(const Address& src, Register dest) {
283 movzwl(Operand(src), dest);
284 }
load16ZeroExtend(const BaseIndex & src,Register dest)285 void load16ZeroExtend(const BaseIndex& src, Register dest) {
286 movzwl(Operand(src), dest);
287 }
288 template <typename S>
load16UnalignedZeroExtend(const S & src,Register dest)289 void load16UnalignedZeroExtend(const S& src, Register dest) {
290 load16ZeroExtend(src, dest);
291 }
292 template <typename S, typename T>
store16(const S & src,const T & dest)293 void store16(const S& src, const T& dest) {
294 movw(src, Operand(dest));
295 }
296 template <typename S, typename T>
store16Unaligned(const S & src,const T & dest)297 void store16Unaligned(const S& src, const T& dest) {
298 store16(src, dest);
299 }
load16SignExtend(const Operand & src,Register dest)300 void load16SignExtend(const Operand& src, Register dest) {
301 movswl(src, dest);
302 }
load16SignExtend(const Address & src,Register dest)303 void load16SignExtend(const Address& src, Register dest) {
304 movswl(Operand(src), dest);
305 }
load16SignExtend(const BaseIndex & src,Register dest)306 void load16SignExtend(const BaseIndex& src, Register dest) {
307 movswl(Operand(src), dest);
308 }
309 template <typename S>
load16UnalignedSignExtend(const S & src,Register dest)310 void load16UnalignedSignExtend(const S& src, Register dest) {
311 load16SignExtend(src, dest);
312 }
load32(const Address & address,Register dest)313 void load32(const Address& address, Register dest) {
314 movl(Operand(address), dest);
315 }
load32(const BaseIndex & src,Register dest)316 void load32(const BaseIndex& src, Register dest) { movl(Operand(src), dest); }
load32(const Operand & src,Register dest)317 void load32(const Operand& src, Register dest) { movl(src, dest); }
318 template <typename S>
load32Unaligned(const S & src,Register dest)319 void load32Unaligned(const S& src, Register dest) {
320 load32(src, dest);
321 }
322 template <typename S, typename T>
store32(const S & src,const T & dest)323 void store32(const S& src, const T& dest) {
324 movl(src, Operand(dest));
325 }
326 template <typename S, typename T>
store32_NoSecondScratch(const S & src,const T & dest)327 void store32_NoSecondScratch(const S& src, const T& dest) {
328 store32(src, dest);
329 }
330 template <typename S, typename T>
store32Unaligned(const S & src,const T & dest)331 void store32Unaligned(const S& src, const T& dest) {
332 store32(src, dest);
333 }
loadDouble(const Address & src,FloatRegister dest)334 void loadDouble(const Address& src, FloatRegister dest) { vmovsd(src, dest); }
loadDouble(const BaseIndex & src,FloatRegister dest)335 void loadDouble(const BaseIndex& src, FloatRegister dest) {
336 vmovsd(src, dest);
337 }
loadDouble(const Operand & src,FloatRegister dest)338 void loadDouble(const Operand& src, FloatRegister dest) {
339 switch (src.kind()) {
340 case Operand::MEM_REG_DISP:
341 loadDouble(src.toAddress(), dest);
342 break;
343 case Operand::MEM_SCALE:
344 loadDouble(src.toBaseIndex(), dest);
345 break;
346 default:
347 MOZ_CRASH("unexpected operand kind");
348 }
349 }
moveDouble(FloatRegister src,FloatRegister dest)350 void moveDouble(FloatRegister src, FloatRegister dest) {
351 // Use vmovapd instead of vmovsd to avoid dependencies.
352 vmovapd(src, dest);
353 }
zeroDouble(FloatRegister reg)354 void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); }
zeroFloat32(FloatRegister reg)355 void zeroFloat32(FloatRegister reg) { vxorps(reg, reg, reg); }
convertFloat32ToDouble(FloatRegister src,FloatRegister dest)356 void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
357 vcvtss2sd(src, dest, dest);
358 }
convertDoubleToFloat32(FloatRegister src,FloatRegister dest)359 void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
360 vcvtsd2ss(src, dest, dest);
361 }
362
loadInt32x4(const Address & addr,FloatRegister dest)363 void loadInt32x4(const Address& addr, FloatRegister dest) {
364 vmovdqa(Operand(addr), dest);
365 }
loadFloat32x4(const Address & addr,FloatRegister dest)366 void loadFloat32x4(const Address& addr, FloatRegister dest) {
367 vmovaps(Operand(addr), dest);
368 }
storeInt32x4(FloatRegister src,const Address & addr)369 void storeInt32x4(FloatRegister src, const Address& addr) {
370 vmovdqa(src, Operand(addr));
371 }
storeFloat32x4(FloatRegister src,const Address & addr)372 void storeFloat32x4(FloatRegister src, const Address& addr) {
373 vmovaps(src, Operand(addr));
374 }
375
convertFloat32x4ToInt32x4(FloatRegister src,FloatRegister dest)376 void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
377 // Note that if the conversion failed (because the converted
378 // result is larger than the maximum signed int32, or less than the
379 // least signed int32, or NaN), this will return the undefined integer
380 // value (0x8000000).
381 vcvttps2dq(src, dest);
382 }
convertInt32x4ToFloat32x4(FloatRegister src,FloatRegister dest)383 void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
384 vcvtdq2ps(src, dest);
385 }
386
387 // SIMD methods, defined in MacroAssembler-x86-shared-SIMD.cpp.
388 void checkedConvertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest,
389 Register temp, Label* oolCheck,
390 Label* rejoin);
391 void oolConvertFloat32x4ToInt32x4(FloatRegister src, Register temp,
392 Label* rejoin, Label* onConversionError);
393 void checkedConvertFloat32x4ToUint32x4(FloatRegister src, FloatRegister dest,
394 Register temp, FloatRegister tempF,
395 Label* failed);
396
397 void unsignedConvertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest);
398
399 void truncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest);
400 void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister temp,
401 FloatRegister dest);
402
403 void createInt32x4(Register lane0, Register lane1, Register lane2,
404 Register lane3, FloatRegister dest);
405 void createFloat32x4(FloatRegister lane0, FloatRegister lane1,
406 FloatRegister lane2, FloatRegister lane3,
407 FloatRegister temp, FloatRegister output);
408
409 void splatX16(Register input, FloatRegister output);
410 void splatX8(Register input, FloatRegister output);
411 void splatX4(Register input, FloatRegister output);
412 void splatX4(FloatRegister input, FloatRegister output);
413 void splatX2(FloatRegister input, FloatRegister output);
414
415 void reinterpretSimd(bool isIntegerLaneType, FloatRegister input,
416 FloatRegister output);
417
418 void extractLaneInt32x4(FloatRegister input, Register output, unsigned lane);
419 void extractLaneFloat32x4(FloatRegister input, FloatRegister output,
420 unsigned lane);
421 void extractLaneFloat64x2(FloatRegister input, FloatRegister output,
422 unsigned lane);
423 void extractLaneInt16x8(FloatRegister input, Register output, unsigned lane,
424 SimdSign sign);
425 void extractLaneInt8x16(FloatRegister input, Register output, unsigned lane,
426 SimdSign sign);
427 void extractLaneSimdBool(FloatRegister input, Register output,
428 unsigned numLanes, unsigned lane);
429
430 void insertLaneSimdInt(FloatRegister input, Register value,
431 FloatRegister output, unsigned lane,
432 unsigned numLanes);
433 void insertLaneFloat32x4(FloatRegister input, FloatRegister value,
434 FloatRegister output, unsigned lane);
435 void insertLaneFloat64x2(FloatRegister input, FloatRegister value,
436 FloatRegister output, unsigned lane);
437
438 void allTrueSimdBool(FloatRegister input, Register output);
439 void anyTrueSimdBool(FloatRegister input, Register output);
440
441 void swizzleInt32x4(FloatRegister input, FloatRegister output,
442 unsigned lanes[4]);
443 void swizzleFloat32x4(FloatRegister input, FloatRegister output,
444 unsigned lanes[4]);
445 void oldSwizzleInt8x16(FloatRegister input, FloatRegister output,
446 const mozilla::Maybe<Register>& temp,
447 int8_t lanes[16]);
448
449 void shuffleX4(FloatRegister lhs, Operand rhs, FloatRegister out,
450 const mozilla::Maybe<FloatRegister>& maybeTemp,
451 unsigned lanes[4]);
452 void shuffleInt8x16(FloatRegister lhs, FloatRegister rhs,
453 FloatRegister output,
454 const mozilla::Maybe<FloatRegister>& maybeFloatTemp,
455 const mozilla::Maybe<Register>& maybeTemp,
456 const uint8_t lanes[16]);
457 void blendInt8x16(FloatRegister lhs, FloatRegister rhs, FloatRegister output,
458 FloatRegister temp, const uint8_t lanes[16]);
459 void blendInt16x8(FloatRegister lhs, FloatRegister rhs, FloatRegister output,
460 const uint16_t lanes[8]);
461
462 void compareInt8x16(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
463 FloatRegister output);
464 void unsignedCompareInt8x16(FloatRegister lhs, Operand rhs,
465 Assembler::Condition cond, FloatRegister output,
466 FloatRegister tmp1, FloatRegister tmp2);
467 void compareInt16x8(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
468 FloatRegister output);
469 void unsignedCompareInt16x8(FloatRegister lhs, Operand rhs,
470 Assembler::Condition cond, FloatRegister output,
471 FloatRegister tmp1, FloatRegister tmp2);
472 void compareInt32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
473 FloatRegister output);
474 void unsignedCompareInt32x4(FloatRegister lhs, Operand rhs,
475 Assembler::Condition cond, FloatRegister output,
476 FloatRegister tmp1, FloatRegister tmp2);
477 void compareFloat32x4(FloatRegister lhs, Operand rhs,
478 Assembler::Condition cond, FloatRegister output);
479 void compareFloat64x2(FloatRegister lhs, Operand rhs,
480 Assembler::Condition cond, FloatRegister output);
481
482 void mulInt32x4(FloatRegister lhs, Operand rhs,
483 const mozilla::Maybe<FloatRegister>& temp,
484 FloatRegister output);
485
486 void negFloat32x4(Operand in, FloatRegister out);
487 void negFloat64x2(Operand in, FloatRegister out);
488
489 void notInt8x16(Operand in, FloatRegister out);
490 void notInt16x8(Operand in, FloatRegister out);
491 void notInt32x4(Operand in, FloatRegister out);
492 void notFloat32x4(Operand in, FloatRegister out);
493
494 void minFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output);
495 void maxFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
496 FloatRegister output);
497 void minNumFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
498 FloatRegister output);
499 void maxNumFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
500 FloatRegister output);
501
502 void minFloat64x2(FloatRegister lhs, Operand rhs, FloatRegister output);
503 void maxFloat64x2(FloatRegister lhs, Operand rhs, FloatRegister temp,
504 FloatRegister output);
505
506 void absFloat32x4(Operand in, FloatRegister out);
507 void absFloat64x2(Operand in, FloatRegister out);
508
bitwiseAndFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)509 void bitwiseAndFloat32x4(FloatRegister lhs, const Operand& rhs,
510 FloatRegister dest) {
511 vandps(rhs, lhs, dest);
512 }
bitwiseAndSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)513 void bitwiseAndSimdInt(FloatRegister lhs, const Operand& rhs,
514 FloatRegister dest) {
515 vpand(rhs, lhs, dest);
516 }
517
bitwiseOrFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)518 void bitwiseOrFloat32x4(FloatRegister lhs, const Operand& rhs,
519 FloatRegister dest) {
520 vorps(rhs, lhs, dest);
521 }
bitwiseOrSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)522 void bitwiseOrSimdInt(FloatRegister lhs, const Operand& rhs,
523 FloatRegister dest) {
524 vpor(rhs, lhs, dest);
525 }
526
bitwiseXorFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)527 void bitwiseXorFloat32x4(FloatRegister lhs, const Operand& rhs,
528 FloatRegister dest) {
529 vxorps(rhs, lhs, dest);
530 }
bitwiseXorSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)531 void bitwiseXorSimdInt(FloatRegister lhs, const Operand& rhs,
532 FloatRegister dest) {
533 vpxor(rhs, lhs, dest);
534 }
535
bitwiseAndNotFloat32x4(FloatRegister lhs,const Operand & rhs,FloatRegister dest)536 void bitwiseAndNotFloat32x4(FloatRegister lhs, const Operand& rhs,
537 FloatRegister dest) {
538 vandnps(rhs, lhs, dest);
539 }
bitwiseAndNotSimdInt(FloatRegister lhs,const Operand & rhs,FloatRegister dest)540 void bitwiseAndNotSimdInt(FloatRegister lhs, const Operand& rhs,
541 FloatRegister dest) {
542 vpandn(rhs, lhs, dest);
543 }
544
zeroSimd128Float(FloatRegister dest)545 void zeroSimd128Float(FloatRegister dest) { vxorps(dest, dest, dest); }
zeroSimd128Int(FloatRegister dest)546 void zeroSimd128Int(FloatRegister dest) { vpxor(dest, dest, dest); }
547
548 void selectSimd128(FloatRegister mask, FloatRegister onTrue,
549 FloatRegister onFalse, FloatRegister temp,
550 FloatRegister output);
selectX4(FloatRegister mask,FloatRegister onTrue,FloatRegister onFalse,FloatRegister temp,FloatRegister output)551 void selectX4(FloatRegister mask, FloatRegister onTrue, FloatRegister onFalse,
552 FloatRegister temp, FloatRegister output) {
553 if (AssemblerX86Shared::HasAVX()) {
554 vblendvps(mask, onTrue, onFalse, output);
555 } else {
556 selectSimd128(mask, onTrue, onFalse, temp, output);
557 }
558 }
559
560 template <class T, class Reg>
561 inline void loadScalar(const Operand& src, Reg dest);
562 template <class T, class Reg>
563 inline void storeScalar(Reg src, const Address& dest);
564 template <class T>
565 inline void loadAlignedVector(const Address& src, FloatRegister dest);
566 template <class T>
567 inline void storeAlignedVector(FloatRegister src, const Address& dest);
568
loadAlignedSimd128Int(const Address & src,FloatRegister dest)569 void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
570 vmovdqa(Operand(src), dest);
571 }
loadAlignedSimd128Int(const Operand & src,FloatRegister dest)572 void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
573 vmovdqa(src, dest);
574 }
storeAlignedSimd128Int(FloatRegister src,const Address & dest)575 void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
576 vmovdqa(src, Operand(dest));
577 }
moveSimd128Int(FloatRegister src,FloatRegister dest)578 void moveSimd128Int(FloatRegister src, FloatRegister dest) {
579 vmovdqa(src, dest);
580 }
reusedInputInt32x4(FloatRegister src,FloatRegister dest)581 FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
582 if (HasAVX()) {
583 return src;
584 }
585 moveSimd128Int(src, dest);
586 return dest;
587 }
reusedInputAlignedInt32x4(const Operand & src,FloatRegister dest)588 FloatRegister reusedInputAlignedInt32x4(const Operand& src,
589 FloatRegister dest) {
590 if (HasAVX() && src.kind() == Operand::FPREG) {
591 return FloatRegister::FromCode(src.fpu());
592 }
593 loadAlignedSimd128Int(src, dest);
594 return dest;
595 }
loadUnalignedSimd128Int(const Address & src,FloatRegister dest)596 void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
597 vmovdqu(Operand(src), dest);
598 }
loadUnalignedSimd128Int(const BaseIndex & src,FloatRegister dest)599 void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
600 vmovdqu(Operand(src), dest);
601 }
loadUnalignedSimd128Int(const Operand & src,FloatRegister dest)602 void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
603 vmovdqu(src, dest);
604 }
storeUnalignedSimd128Int(FloatRegister src,const Address & dest)605 void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
606 vmovdqu(src, Operand(dest));
607 }
storeUnalignedSimd128Int(FloatRegister src,const BaseIndex & dest)608 void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
609 vmovdqu(src, Operand(dest));
610 }
storeUnalignedSimd128Int(FloatRegister src,const Operand & dest)611 void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
612 vmovdqu(src, dest);
613 }
packedEqualInt32x4(const Operand & src,FloatRegister dest)614 void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
615 vpcmpeqd(src, dest, dest);
616 }
packedGreaterThanInt32x4(const Operand & src,FloatRegister dest)617 void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
618 vpcmpgtd(src, dest, dest);
619 }
packedAddInt8(const Operand & src,FloatRegister dest)620 void packedAddInt8(const Operand& src, FloatRegister dest) {
621 vpaddb(src, dest, dest);
622 }
packedSubInt8(const Operand & src,FloatRegister dest)623 void packedSubInt8(const Operand& src, FloatRegister dest) {
624 vpsubb(src, dest, dest);
625 }
packedAddInt16(const Operand & src,FloatRegister dest)626 void packedAddInt16(const Operand& src, FloatRegister dest) {
627 vpaddw(src, dest, dest);
628 }
packedSubInt16(const Operand & src,FloatRegister dest)629 void packedSubInt16(const Operand& src, FloatRegister dest) {
630 vpsubw(src, dest, dest);
631 }
packedAddInt32(const Operand & src,FloatRegister dest)632 void packedAddInt32(const Operand& src, FloatRegister dest) {
633 vpaddd(src, dest, dest);
634 }
packedSubInt32(const Operand & src,FloatRegister dest)635 void packedSubInt32(const Operand& src, FloatRegister dest) {
636 vpsubd(src, dest, dest);
637 }
packedRcpApproximationFloat32x4(const Operand & src,FloatRegister dest)638 void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
639 // This function is an approximation of the result, this might need
640 // fix up if the spec requires a given precision for this operation.
641 // TODO See also bug 1068028.
642 vrcpps(src, dest);
643 }
packedRcpSqrtApproximationFloat32x4(const Operand & src,FloatRegister dest)644 void packedRcpSqrtApproximationFloat32x4(const Operand& src,
645 FloatRegister dest) {
646 // TODO See comment above. See also bug 1068028.
647 vrsqrtps(src, dest);
648 }
649
650 private:
651 void packedShiftByScalarInt8x16(
652 FloatRegister in, Register count, Register temp, FloatRegister xtmp,
653 FloatRegister dest,
654 void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister,
655 FloatRegister),
656 void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister));
657
658 public:
659 void packedLeftShiftByScalarInt8x16(FloatRegister in, Register count,
660 Register temp, FloatRegister xtmp,
661 FloatRegister dest);
662 void packedLeftShiftByScalarInt8x16(Imm32 count, FloatRegister src,
663 FloatRegister dest);
664 void packedRightShiftByScalarInt8x16(FloatRegister in, Register count,
665 Register temp, FloatRegister xtmp,
666 FloatRegister dest);
667 void packedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src,
668 FloatRegister temp, FloatRegister dest);
669 void packedUnsignedRightShiftByScalarInt8x16(FloatRegister in, Register count,
670 Register temp,
671 FloatRegister xtmp,
672 FloatRegister dest);
673 void packedUnsignedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src,
674 FloatRegister dest);
675
676 void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count,
677 Register temp, FloatRegister dest);
678 void packedRightShiftByScalarInt16x8(FloatRegister in, Register count,
679 Register temp, FloatRegister dest);
680 void packedUnsignedRightShiftByScalarInt16x8(FloatRegister in, Register count,
681 Register temp,
682 FloatRegister dest);
683
packedLeftShiftByScalarInt16x8(Imm32 count,FloatRegister dest)684 void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
685 count.value &= 15;
686 vpsllw(count, dest, dest);
687 }
packedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)688 void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
689 count.value &= 15;
690 vpsraw(count, dest, dest);
691 }
packedUnsignedRightShiftByScalarInt16x8(Imm32 count,FloatRegister dest)692 void packedUnsignedRightShiftByScalarInt16x8(Imm32 count,
693 FloatRegister dest) {
694 count.value &= 15;
695 vpsrlw(count, dest, dest);
696 }
697
698 void packedLeftShiftByScalarInt32x4(FloatRegister in, Register count,
699 Register temp, FloatRegister dest);
700 void packedRightShiftByScalarInt32x4(FloatRegister in, Register count,
701 Register temp, FloatRegister dest);
702 void packedUnsignedRightShiftByScalarInt32x4(FloatRegister in, Register count,
703 Register temp,
704 FloatRegister dest);
705 void packedLeftShiftByScalarInt64x2(FloatRegister in, Register count,
706 Register temp, FloatRegister dest);
707 void packedRightShiftByScalarInt64x2(FloatRegister in, Register count,
708 Register temp1, FloatRegister temp2,
709 FloatRegister dest);
710 void packedUnsignedRightShiftByScalarInt64x2(FloatRegister in, Register count,
711 Register temp,
712 FloatRegister dest);
713
packedLeftShiftByScalarInt32x4(Imm32 count,FloatRegister dest)714 void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
715 count.value &= 31;
716 vpslld(count, dest, dest);
717 }
packedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)718 void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
719 count.value &= 31;
720 vpsrad(count, dest, dest);
721 }
packedUnsignedRightShiftByScalarInt32x4(Imm32 count,FloatRegister dest)722 void packedUnsignedRightShiftByScalarInt32x4(Imm32 count,
723 FloatRegister dest) {
724 count.value &= 31;
725 vpsrld(count, dest, dest);
726 }
727
loadAlignedSimd128Float(const Address & src,FloatRegister dest)728 void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
729 vmovaps(Operand(src), dest);
730 }
loadAlignedSimd128Float(const Operand & src,FloatRegister dest)731 void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
732 vmovaps(src, dest);
733 }
734
storeAlignedSimd128Float(FloatRegister src,const Address & dest)735 void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
736 vmovaps(src, Operand(dest));
737 }
moveSimd128Float(FloatRegister src,FloatRegister dest)738 void moveSimd128Float(FloatRegister src, FloatRegister dest) {
739 vmovaps(src, dest);
740 }
reusedInputSimd128Float(FloatRegister src,FloatRegister dest)741 FloatRegister reusedInputSimd128Float(FloatRegister src, FloatRegister dest) {
742 if (HasAVX()) {
743 return src;
744 }
745 moveSimd128Float(src, dest);
746 return dest;
747 }
reusedInputAlignedSimd128Float(const Operand & src,FloatRegister dest)748 FloatRegister reusedInputAlignedSimd128Float(const Operand& src,
749 FloatRegister dest) {
750 if (HasAVX() && src.kind() == Operand::FPREG) {
751 return FloatRegister::FromCode(src.fpu());
752 }
753 loadAlignedSimd128Float(src, dest);
754 return dest;
755 }
loadUnalignedSimd128(const Operand & src,FloatRegister dest)756 void loadUnalignedSimd128(const Operand& src, FloatRegister dest) {
757 vmovups(src, dest);
758 }
storeUnalignedSimd128(FloatRegister src,const Operand & dest)759 void storeUnalignedSimd128(FloatRegister src, const Operand& dest) {
760 vmovups(src, dest);
761 }
packedAddFloat32(const Operand & src,FloatRegister dest)762 void packedAddFloat32(const Operand& src, FloatRegister dest) {
763 vaddps(src, dest, dest);
764 }
packedSubFloat32(const Operand & src,FloatRegister dest)765 void packedSubFloat32(const Operand& src, FloatRegister dest) {
766 vsubps(src, dest, dest);
767 }
packedMulFloat32(const Operand & src,FloatRegister dest)768 void packedMulFloat32(const Operand& src, FloatRegister dest) {
769 vmulps(src, dest, dest);
770 }
packedDivFloat32(const Operand & src,FloatRegister dest)771 void packedDivFloat32(const Operand& src, FloatRegister dest) {
772 vdivps(src, dest, dest);
773 }
774
775 static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
776 uint32_t z = 2, uint32_t w = 3) {
777 MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
778 uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
779 MOZ_ASSERT(r < 256);
780 return r;
781 }
782
shuffleInt32(uint32_t mask,FloatRegister src,FloatRegister dest)783 void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
784 vpshufd(mask, src, dest);
785 }
moveLowInt32(FloatRegister src,Register dest)786 void moveLowInt32(FloatRegister src, Register dest) { vmovd(src, dest); }
787
moveHighPairToLowPairFloat32(FloatRegister src,FloatRegister dest)788 void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
789 vmovhlps(src, dest, dest);
790 }
shuffleFloat32(uint32_t mask,FloatRegister src,FloatRegister dest)791 void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
792 // The shuffle instruction on x86 is such that it moves 2 words from
793 // the dest and 2 words from the src operands. To simplify things, just
794 // clobber the output with the input and apply the instruction
795 // afterwards.
796 // Note: this is useAtStart-safe because src isn't read afterwards.
797 FloatRegister srcCopy = reusedInputSimd128Float(src, dest);
798 vshufps(mask, srcCopy, srcCopy, dest);
799 }
shuffleMix(uint32_t mask,const Operand & src,FloatRegister dest)800 void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
801 // Note this uses vshufps, which is a cross-domain penalty on CPU where it
802 // applies, but that's the way clang and gcc do it.
803 vshufps(mask, src, dest, dest);
804 }
805
moveFloatAsDouble(Register src,FloatRegister dest)806 void moveFloatAsDouble(Register src, FloatRegister dest) {
807 vmovd(src, dest);
808 vcvtss2sd(dest, dest, dest);
809 }
loadFloatAsDouble(const Address & src,FloatRegister dest)810 void loadFloatAsDouble(const Address& src, FloatRegister dest) {
811 vmovss(src, dest);
812 vcvtss2sd(dest, dest, dest);
813 }
loadFloatAsDouble(const BaseIndex & src,FloatRegister dest)814 void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
815 vmovss(src, dest);
816 vcvtss2sd(dest, dest, dest);
817 }
loadFloatAsDouble(const Operand & src,FloatRegister dest)818 void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
819 loadFloat32(src, dest);
820 vcvtss2sd(dest, dest, dest);
821 }
loadFloat32(const Address & src,FloatRegister dest)822 void loadFloat32(const Address& src, FloatRegister dest) {
823 vmovss(src, dest);
824 }
loadFloat32(const BaseIndex & src,FloatRegister dest)825 void loadFloat32(const BaseIndex& src, FloatRegister dest) {
826 vmovss(src, dest);
827 }
loadFloat32(const Operand & src,FloatRegister dest)828 void loadFloat32(const Operand& src, FloatRegister dest) {
829 switch (src.kind()) {
830 case Operand::MEM_REG_DISP:
831 loadFloat32(src.toAddress(), dest);
832 break;
833 case Operand::MEM_SCALE:
834 loadFloat32(src.toBaseIndex(), dest);
835 break;
836 default:
837 MOZ_CRASH("unexpected operand kind");
838 }
839 }
moveFloat32(FloatRegister src,FloatRegister dest)840 void moveFloat32(FloatRegister src, FloatRegister dest) {
841 // Use vmovaps instead of vmovss to avoid dependencies.
842 vmovaps(src, dest);
843 }
844
845 // Checks whether a double is representable as a 32-bit integer. If so, the
846 // integer is written to the output register. Otherwise, a bailout is taken to
847 // the given snapshot. This function overwrites the scratch float register.
848 void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
849 bool negativeZeroCheck = true) {
850 // Check for -0.0
851 if (negativeZeroCheck) {
852 branchNegativeZero(src, dest, fail);
853 }
854
855 ScratchDoubleScope scratch(asMasm());
856 vcvttsd2si(src, dest);
857 convertInt32ToDouble(dest, scratch);
858 vucomisd(scratch, src);
859 j(Assembler::Parity, fail);
860 j(Assembler::NotEqual, fail);
861 }
862
863 // Checks whether a float32 is representable as a 32-bit integer. If so, the
864 // integer is written to the output register. Otherwise, a bailout is taken to
865 // the given snapshot. This function overwrites the scratch float register.
866 void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
867 bool negativeZeroCheck = true) {
868 // Check for -0.0
869 if (negativeZeroCheck) {
870 branchNegativeZeroFloat32(src, dest, fail);
871 }
872
873 ScratchFloat32Scope scratch(asMasm());
874 vcvttss2si(src, dest);
875 convertInt32ToFloat32(dest, scratch);
876 vucomiss(scratch, src);
877 j(Assembler::Parity, fail);
878 j(Assembler::NotEqual, fail);
879 }
880
truncateDoubleToInt32(FloatRegister src,Register dest,Label * fail)881 void truncateDoubleToInt32(FloatRegister src, Register dest, Label* fail) {
882 // vcvttsd2si returns 0x80000000 on failure. Test for it by
883 // subtracting 1 and testing overflow. The other possibility is to test
884 // equality for INT_MIN after a comparison, but 1 costs fewer bytes to
885 // materialize.
886 vcvttsd2si(src, dest);
887 cmp32(dest, Imm32(1));
888 j(Assembler::Overflow, fail);
889 }
truncateFloat32ToInt32(FloatRegister src,Register dest,Label * fail)890 void truncateFloat32ToInt32(FloatRegister src, Register dest, Label* fail) {
891 // Same trick as explained in the above comment.
892 vcvttss2si(src, dest);
893 cmp32(dest, Imm32(1));
894 j(Assembler::Overflow, fail);
895 }
896
897 inline void clampIntToUint8(Register reg);
898
maybeInlineDouble(double d,FloatRegister dest)899 bool maybeInlineDouble(double d, FloatRegister dest) {
900 // Loading zero with xor is specially optimized in hardware.
901 if (mozilla::IsPositiveZero(d)) {
902 zeroDouble(dest);
903 return true;
904 }
905
906 // It is also possible to load several common constants using vpcmpeqw
907 // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
908 // as described in "13.4 Generating constants" of
909 // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
910 // previously implemented here. However, with x86 and x64 both using
911 // constant pool loads for double constants, this is probably only
912 // worthwhile in cases where a load is likely to be delayed.
913
914 return false;
915 }
916
maybeInlineFloat(float f,FloatRegister dest)917 bool maybeInlineFloat(float f, FloatRegister dest) {
918 // See comment above
919 if (mozilla::IsPositiveZero(f)) {
920 zeroFloat32(dest);
921 return true;
922 }
923 return false;
924 }
925
maybeInlineSimd128Int(const SimdConstant & v,const FloatRegister & dest)926 bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
927 static const SimdConstant zero = SimdConstant::SplatX4(0);
928 static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
929 if (v == zero) {
930 zeroSimd128Int(dest);
931 return true;
932 }
933 if (v == minusOne) {
934 vpcmpeqw(Operand(dest), dest, dest);
935 return true;
936 }
937 return false;
938 }
maybeInlineSimd128Float(const SimdConstant & v,const FloatRegister & dest)939 bool maybeInlineSimd128Float(const SimdConstant& v,
940 const FloatRegister& dest) {
941 static const SimdConstant zero = SimdConstant::SplatX4(0.f);
942 if (v == zero) {
943 // This won't get inlined if the SimdConstant v contains -0 in any
944 // lane, as operator== here does a memcmp.
945 zeroSimd128Float(dest);
946 return true;
947 }
948 return false;
949 }
950
convertBoolToInt32(Register source,Register dest)951 void convertBoolToInt32(Register source, Register dest) {
952 // Note that C++ bool is only 1 byte, so zero extend it to clear the
953 // higher-order bits.
954 movzbl(source, dest);
955 }
956
957 void emitSet(Assembler::Condition cond, Register dest,
958 Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
959 if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
960 // If the register we're defining is a single byte register,
961 // take advantage of the setCC instruction
962 setCC(cond, dest);
963 movzbl(dest, dest);
964
965 if (ifNaN != Assembler::NaN_HandledByCond) {
966 Label noNaN;
967 j(Assembler::NoParity, &noNaN);
968 mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
969 bind(&noNaN);
970 }
971 } else {
972 Label end;
973 Label ifFalse;
974
975 if (ifNaN == Assembler::NaN_IsFalse) {
976 j(Assembler::Parity, &ifFalse);
977 }
978 // Note a subtlety here: FLAGS is live at this point, and the
979 // mov interface doesn't guarantee to preserve FLAGS. Use
980 // movl instead of mov, because the movl instruction
981 // preserves FLAGS.
982 movl(Imm32(1), dest);
983 j(cond, &end);
984 if (ifNaN == Assembler::NaN_IsTrue) {
985 j(Assembler::Parity, &end);
986 }
987 bind(&ifFalse);
988 mov(ImmWord(0), dest);
989
990 bind(&end);
991 }
992 }
993
994 // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
toggledJump(Label * label)995 CodeOffset toggledJump(Label* label) {
996 CodeOffset offset(size());
997 jump(label);
998 return offset;
999 }
1000
1001 template <typename T>
computeEffectiveAddress(const T & address,Register dest)1002 void computeEffectiveAddress(const T& address, Register dest) {
1003 lea(Operand(address), dest);
1004 }
1005
checkStackAlignment()1006 void checkStackAlignment() {
1007 // Exists for ARM compatibility.
1008 }
1009
abiret()1010 void abiret() { ret(); }
1011
1012 protected:
1013 bool buildOOLFakeExitFrame(void* fakeReturnAddr);
1014 };
1015
1016 // Specialize for float to use movaps. Use movdqa for everything else.
1017 template <>
1018 inline void MacroAssemblerX86Shared::loadAlignedVector<float>(
1019 const Address& src, FloatRegister dest) {
1020 loadAlignedSimd128Float(src, dest);
1021 }
1022
1023 template <typename T>
loadAlignedVector(const Address & src,FloatRegister dest)1024 inline void MacroAssemblerX86Shared::loadAlignedVector(const Address& src,
1025 FloatRegister dest) {
1026 loadAlignedSimd128Int(src, dest);
1027 }
1028
1029 // Specialize for float to use movaps. Use movdqa for everything else.
1030 template <>
1031 inline void MacroAssemblerX86Shared::storeAlignedVector<float>(
1032 FloatRegister src, const Address& dest) {
1033 storeAlignedSimd128Float(src, dest);
1034 }
1035
1036 template <typename T>
storeAlignedVector(FloatRegister src,const Address & dest)1037 inline void MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src,
1038 const Address& dest) {
1039 storeAlignedSimd128Int(src, dest);
1040 }
1041
1042 template <>
1043 inline void MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src,
1044 Register dest) {
1045 load8ZeroExtend(src, dest);
1046 }
1047 template <>
1048 inline void MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src,
1049 Register dest) {
1050 load16ZeroExtend(src, dest);
1051 }
1052 template <>
1053 inline void MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src,
1054 Register dest) {
1055 load32(src, dest);
1056 }
1057 template <>
1058 inline void MacroAssemblerX86Shared::loadScalar<float>(const Operand& src,
1059 FloatRegister dest) {
1060 loadFloat32(src, dest);
1061 }
1062
1063 template <>
1064 inline void MacroAssemblerX86Shared::storeScalar<int8_t>(Register src,
1065 const Address& dest) {
1066 store8(src, dest);
1067 }
1068 template <>
1069 inline void MacroAssemblerX86Shared::storeScalar<int16_t>(Register src,
1070 const Address& dest) {
1071 store16(src, dest);
1072 }
1073 template <>
1074 inline void MacroAssemblerX86Shared::storeScalar<int32_t>(Register src,
1075 const Address& dest) {
1076 store32(src, dest);
1077 }
1078 template <>
1079 inline void MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src,
1080 const Address& dest) {
1081 vmovss(src, dest);
1082 }
1083
1084 } // namespace jit
1085 } // namespace js
1086
1087 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */
1088