1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "jit/arm64/MacroAssembler-arm64.h"
8 
9 #include "mozilla/MathAlgorithms.h"
10 #include "mozilla/Maybe.h"
11 
12 #include "jsmath.h"
13 
14 #include "jit/arm64/MoveEmitter-arm64.h"
15 #include "jit/arm64/SharedICRegisters-arm64.h"
16 #include "jit/Bailouts.h"
17 #include "jit/BaselineFrame.h"
18 #include "jit/JitRuntime.h"
19 #include "jit/MacroAssembler.h"
20 #include "util/Memory.h"
21 #include "vm/JitActivation.h"  // js::jit::JitActivation
22 #include "vm/JSContext.h"
23 
24 #include "jit/MacroAssembler-inl.h"
25 
26 namespace js {
27 namespace jit {
28 
29 enum class Width { _32 = 32, _64 = 64 };
30 
X(Register r)31 static inline ARMRegister X(Register r) { return ARMRegister(r, 64); }
32 
X(MacroAssembler & masm,RegisterOrSP r)33 static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) {
34   return masm.toARMRegister(r, 64);
35 }
36 
W(Register r)37 static inline ARMRegister W(Register r) { return ARMRegister(r, 32); }
38 
R(Register r,Width w)39 static inline ARMRegister R(Register r, Width w) {
40   return ARMRegister(r, unsigned(w));
41 }
42 
boxValue(JSValueType type,Register src,Register dest)43 void MacroAssemblerCompat::boxValue(JSValueType type, Register src,
44                                     Register dest) {
45 #ifdef DEBUG
46   if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
47     Label upper32BitsZeroed;
48     movePtr(ImmWord(UINT32_MAX), dest);
49     asMasm().branchPtr(Assembler::BelowOrEqual, src, dest, &upper32BitsZeroed);
50     breakpoint();
51     bind(&upper32BitsZeroed);
52   }
53 #endif
54   Orr(ARMRegister(dest, 64), ARMRegister(src, 64),
55       Operand(ImmShiftedTag(type).value));
56 }
57 
clampDoubleToUint8(FloatRegister input,Register output)58 void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
59   ARMRegister dest(output, 32);
60   Fcvtns(dest, ARMFPRegister(input, 64));
61 
62   {
63     vixl::UseScratchRegisterScope temps(this);
64     const ARMRegister scratch32 = temps.AcquireW();
65 
66     Mov(scratch32, Operand(0xff));
67     Cmp(dest, scratch32);
68     Csel(dest, dest, scratch32, LessThan);
69   }
70 
71   Cmp(dest, Operand(0));
72   Csel(dest, dest, wzr, GreaterThan);
73 }
74 
asMasm()75 js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() {
76   return *static_cast<js::jit::MacroAssembler*>(this);
77 }
78 
asMasm() const79 const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const {
80   return *static_cast<const js::jit::MacroAssembler*>(this);
81 }
82 
asVIXL()83 vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() {
84   return *static_cast<vixl::MacroAssembler*>(this);
85 }
86 
asVIXL() const87 const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const {
88   return *static_cast<const vixl::MacroAssembler*>(this);
89 }
90 
mov(CodeLabel * label,Register dest)91 void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) {
92   BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest);
93   label->patchAt()->bind(bo.getOffset());
94   label->setLinkMode(CodeLabel::MoveImmediate);
95 }
96 
movePatchablePtr(ImmPtr ptr,Register dest)97 BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) {
98   const size_t numInst = 1;           // Inserting one load instruction.
99   const unsigned numPoolEntries = 2;  // Every pool entry is 4 bytes.
100   uint8_t* literalAddr = (uint8_t*)(&ptr.value);  // TODO: Should be const.
101 
102   // Scratch space for generating the load instruction.
103   //
104   // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
105   // index to the corresponding PoolEntry in the instruction itself.
106   //
107   // That index will be fixed up later when finishPool()
108   // walks over all marked loads and calls PatchConstantPoolLoad().
109   uint32_t instructionScratch = 0;
110 
111   // Emit the instruction mask in the scratch space.
112   // The offset doesn't matter: it will be fixed up later.
113   vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
114                        0);
115 
116   // Add the entry to the pool, fix up the LDR imm19 offset,
117   // and add the completed instruction to the buffer.
118   return allocLiteralLoadEntry(numInst, numPoolEntries,
119                                (uint8_t*)&instructionScratch, literalAddr);
120 }
121 
movePatchablePtr(ImmWord ptr,Register dest)122 BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr,
123                                                     Register dest) {
124   const size_t numInst = 1;           // Inserting one load instruction.
125   const unsigned numPoolEntries = 2;  // Every pool entry is 4 bytes.
126   uint8_t* literalAddr = (uint8_t*)(&ptr.value);
127 
128   // Scratch space for generating the load instruction.
129   //
130   // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
131   // index to the corresponding PoolEntry in the instruction itself.
132   //
133   // That index will be fixed up later when finishPool()
134   // walks over all marked loads and calls PatchConstantPoolLoad().
135   uint32_t instructionScratch = 0;
136 
137   // Emit the instruction mask in the scratch space.
138   // The offset doesn't matter: it will be fixed up later.
139   vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
140                        0);
141 
142   // Add the entry to the pool, fix up the LDR imm19 offset,
143   // and add the completed instruction to the buffer.
144   return allocLiteralLoadEntry(numInst, numPoolEntries,
145                                (uint8_t*)&instructionScratch, literalAddr);
146 }
147 
loadPrivate(const Address & src,Register dest)148 void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) {
149   loadPtr(src, dest);
150 }
151 
handleFailureWithHandlerTail(Label * profilerExitTail)152 void MacroAssemblerCompat::handleFailureWithHandlerTail(
153     Label* profilerExitTail) {
154   // Fail rather than silently create wrong code.
155   MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
156 
157   // Reserve space for exception information.
158   int64_t size = (sizeof(ResumeFromException) + 7) & ~7;
159   Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size));
160   syncStackPtr();
161 
162   MOZ_ASSERT(!x0.Is(PseudoStackPointer64));
163   Mov(x0, PseudoStackPointer64);
164 
165   // Call the handler.
166   using Fn = void (*)(ResumeFromException * rfe);
167   asMasm().setupUnalignedABICall(r1);
168   asMasm().passABIArg(r0);
169   asMasm().callWithABI<Fn, HandleException>(
170       MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
171 
172   Label entryFrame;
173   Label catch_;
174   Label finally;
175   Label return_;
176   Label bailout;
177   Label wasm;
178   Label wasmCatch;
179 
180   // Check the `asMasm` calls above didn't mess with the StackPointer identity.
181   MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
182 
183   loadPtr(Address(PseudoStackPointer, offsetof(ResumeFromException, kind)), r0);
184   asMasm().branch32(Assembler::Equal, r0,
185                     Imm32(ResumeFromException::RESUME_ENTRY_FRAME),
186                     &entryFrame);
187   asMasm().branch32(Assembler::Equal, r0,
188                     Imm32(ResumeFromException::RESUME_CATCH), &catch_);
189   asMasm().branch32(Assembler::Equal, r0,
190                     Imm32(ResumeFromException::RESUME_FINALLY), &finally);
191   asMasm().branch32(Assembler::Equal, r0,
192                     Imm32(ResumeFromException::RESUME_FORCED_RETURN), &return_);
193   asMasm().branch32(Assembler::Equal, r0,
194                     Imm32(ResumeFromException::RESUME_BAILOUT), &bailout);
195   asMasm().branch32(Assembler::Equal, r0,
196                     Imm32(ResumeFromException::RESUME_WASM), &wasm);
197   asMasm().branch32(Assembler::Equal, r0,
198                     Imm32(ResumeFromException::RESUME_WASM_CATCH), &wasmCatch);
199 
200   breakpoint();  // Invalid kind.
201 
202   // No exception handler. Load the error value, load the new stack pointer,
203   // and return from the entry frame.
204   bind(&entryFrame);
205   moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
206   loadPtr(
207       Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
208       PseudoStackPointer);
209 
210   // `retn` does indeed sync the stack pointer, but before doing that it reads
211   // from the stack.  Consequently, if we remove this call to syncStackPointer
212   // then we take on the requirement to prove that the immediately preceding
213   // loadPtr produces a value for PSP which maintains the SP <= PSP invariant.
214   // That's a proof burden we don't want to take on.  In general it would be
215   // good to move (at some time in the future, not now) to a world where
216   // *every* assignment to PSP or SP is followed immediately by a copy into
217   // the other register.  That would make all required correctness proofs
218   // trivial in the sense that it requires only local inspection of code
219   // immediately following (dominated by) any such assignment.
220   syncStackPtr();
221   retn(Imm32(1 * sizeof(void*)));  // Pop from stack and return.
222 
223   // If we found a catch handler, this must be a baseline frame. Restore state
224   // and jump to the catch block.
225   bind(&catch_);
226   loadPtr(Address(PseudoStackPointer, offsetof(ResumeFromException, target)),
227           r0);
228   loadPtr(
229       Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)),
230       BaselineFrameReg);
231   loadPtr(
232       Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
233       PseudoStackPointer);
234   syncStackPtr();
235   Br(x0);
236 
237   // If we found a finally block, this must be a baseline frame.
238   // Push two values expected by JSOp::Retsub: BooleanValue(true)
239   // and the exception.
240   bind(&finally);
241   ARMRegister exception = x1;
242   Ldr(exception, MemOperand(PseudoStackPointer64,
243                             offsetof(ResumeFromException, exception)));
244   Ldr(x0,
245       MemOperand(PseudoStackPointer64, offsetof(ResumeFromException, target)));
246   Ldr(ARMRegister(BaselineFrameReg, 64),
247       MemOperand(PseudoStackPointer64,
248                  offsetof(ResumeFromException, framePointer)));
249   Ldr(PseudoStackPointer64,
250       MemOperand(PseudoStackPointer64,
251                  offsetof(ResumeFromException, stackPointer)));
252   syncStackPtr();
253   pushValue(BooleanValue(true));
254   push(exception);
255   Br(x0);
256 
257   // Only used in debug mode. Return BaselineFrame->returnValue() to the caller.
258   bind(&return_);
259   loadPtr(
260       Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)),
261       BaselineFrameReg);
262   loadPtr(
263       Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
264       PseudoStackPointer);
265   // See comment further up beginning "`retn` does indeed sync the stack
266   // pointer".  That comment applies here too.
267   syncStackPtr();
268   loadValue(
269       Address(BaselineFrameReg, BaselineFrame::reverseOffsetOfReturnValue()),
270       JSReturnOperand);
271   movePtr(BaselineFrameReg, PseudoStackPointer);
272   syncStackPtr();
273   vixl::MacroAssembler::Pop(ARMRegister(BaselineFrameReg, 64));
274 
275   // If profiling is enabled, then update the lastProfilingFrame to refer to
276   // caller frame before returning.
277   {
278     Label skipProfilingInstrumentation;
279     AbsoluteAddress addressOfEnabled(
280         GetJitContext()->runtime->geckoProfiler().addressOfEnabled());
281     asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
282                       &skipProfilingInstrumentation);
283     jump(profilerExitTail);
284     bind(&skipProfilingInstrumentation);
285   }
286 
287   vixl::MacroAssembler::Pop(vixl::lr);
288   syncStackPtr();
289   vixl::MacroAssembler::Ret(vixl::lr);
290 
291   // If we are bailing out to baseline to handle an exception, jump to the
292   // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success.
293   bind(&bailout);
294   Ldr(x2, MemOperand(PseudoStackPointer64,
295                      offsetof(ResumeFromException, bailoutInfo)));
296   Ldr(x1,
297       MemOperand(PseudoStackPointer64, offsetof(ResumeFromException, target)));
298   Mov(x0, 1);
299   Br(x1);
300 
301   // If we are throwing and the innermost frame was a wasm frame, reset SP and
302   // FP; SP is pointing to the unwound return address to the wasm entry, so
303   // we can just ret().
304   bind(&wasm);
305   Ldr(x29, MemOperand(PseudoStackPointer64,
306                       offsetof(ResumeFromException, framePointer)));
307   Ldr(PseudoStackPointer64,
308       MemOperand(PseudoStackPointer64,
309                  offsetof(ResumeFromException, stackPointer)));
310   syncStackPtr();
311   ret();
312 
313   // Found a wasm catch handler, restore state and jump to it.
314   bind(&wasmCatch);
315   loadPtr(Address(PseudoStackPointer, offsetof(ResumeFromException, target)),
316           r0);
317   loadPtr(
318       Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)),
319       r29);
320   loadPtr(
321       Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
322       PseudoStackPointer);
323   syncStackPtr();
324   Br(x0);
325 
326   MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
327 }
328 
profilerEnterFrame(Register framePtr,Register scratch)329 void MacroAssemblerCompat::profilerEnterFrame(Register framePtr,
330                                               Register scratch) {
331   profilerEnterFrame(RegisterOrSP(framePtr), scratch);
332 }
333 
profilerEnterFrame(RegisterOrSP framePtr,Register scratch)334 void MacroAssemblerCompat::profilerEnterFrame(RegisterOrSP framePtr,
335                                               Register scratch) {
336   asMasm().loadJSContext(scratch);
337   loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
338   if (IsHiddenSP(framePtr)) {
339     storeStackPtr(
340         Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
341   } else {
342     storePtr(AsRegister(framePtr),
343              Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
344   }
345   storePtr(ImmPtr(nullptr),
346            Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
347 }
348 
profilerExitFrame()349 void MacroAssemblerCompat::profilerExitFrame() {
350   jump(GetJitContext()->runtime->jitRuntime()->getProfilerExitFrameTail());
351 }
352 
breakpoint()353 void MacroAssemblerCompat::breakpoint() {
354   // Note, other payloads are possible, but GDB is known to misinterpret them
355   // sometimes and iloop on the breakpoint instead of stopping properly.
356   Brk(0);
357 }
358 
359 // Either `any` is valid or `sixtyfour` is valid.  Return a 32-bit ARMRegister
360 // in the first case and an ARMRegister of the desired size in the latter case.
361 
SelectGPReg(AnyRegister any,Register64 sixtyfour,unsigned size=64)362 static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour,
363                                       unsigned size = 64) {
364   MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid()));
365 
366   if (sixtyfour == Register64::Invalid()) {
367     return ARMRegister(any.gpr(), 32);
368   }
369 
370   return ARMRegister(sixtyfour.reg, size);
371 }
372 
373 // Assert that `sixtyfour` is invalid and then return an FP register from `any`
374 // of the desired size.
375 
SelectFPReg(AnyRegister any,Register64 sixtyfour,unsigned size)376 static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour,
377                                         unsigned size) {
378   MOZ_ASSERT(sixtyfour == Register64::Invalid());
379   return ARMFPRegister(any.fpu(), size);
380 }
381 
wasmLoadImpl(const wasm::MemoryAccessDesc & access,Register memoryBase_,Register ptr_,AnyRegister outany,Register64 out64)382 void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
383                                         Register memoryBase_, Register ptr_,
384                                         AnyRegister outany, Register64 out64) {
385   uint32_t offset = access.offset();
386   MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
387 
388   ARMRegister memoryBase(memoryBase_, 64);
389   ARMRegister ptr(ptr_, 64);
390   if (offset) {
391     vixl::UseScratchRegisterScope temps(this);
392     ARMRegister scratch = temps.AcquireX();
393     Add(scratch, ptr, Operand(offset));
394     MemOperand srcAddr(memoryBase, scratch);
395     wasmLoadImpl(access, srcAddr, outany, out64);
396   } else {
397     MemOperand srcAddr(memoryBase, ptr);
398     wasmLoadImpl(access, srcAddr, outany, out64);
399   }
400 }
401 
wasmLoadImpl(const wasm::MemoryAccessDesc & access,MemOperand srcAddr,AnyRegister outany,Register64 out64)402 void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
403                                         MemOperand srcAddr, AnyRegister outany,
404                                         Register64 out64) {
405   // Reg+Reg and Reg+SmallImm addressing is directly encodable in one Load
406   // instruction, hence we expect exactly one instruction to be emitted in the
407   // window.
408   int32_t instructionsExpected = 1;
409 
410   // Splat and widen however require an additional instruction to be emitted
411   // after the load, so allow one more instruction in the window.
412   if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
413     MOZ_ASSERT(access.type() == Scalar::Float64);
414     instructionsExpected++;
415   }
416 
417   asMasm().memoryBarrierBefore(access.sync());
418 
419   {
420     // The AutoForbidPoolsAndNops asserts if we emit more than the expected
421     // number of instructions and thus ensures that the access metadata is
422     // emitted at the address of the Load.
423     AutoForbidPoolsAndNops afp(this, instructionsExpected);
424 
425     append(access, asMasm().currentOffset());
426     switch (access.type()) {
427       case Scalar::Int8:
428         Ldrsb(SelectGPReg(outany, out64), srcAddr);
429         break;
430       case Scalar::Uint8:
431         Ldrb(SelectGPReg(outany, out64), srcAddr);
432         break;
433       case Scalar::Int16:
434         Ldrsh(SelectGPReg(outany, out64), srcAddr);
435         break;
436       case Scalar::Uint16:
437         Ldrh(SelectGPReg(outany, out64), srcAddr);
438         break;
439       case Scalar::Int32:
440         if (out64 != Register64::Invalid()) {
441           Ldrsw(SelectGPReg(outany, out64), srcAddr);
442         } else {
443           Ldr(SelectGPReg(outany, out64, 32), srcAddr);
444         }
445         break;
446       case Scalar::Uint32:
447         Ldr(SelectGPReg(outany, out64, 32), srcAddr);
448         break;
449       case Scalar::Int64:
450         Ldr(SelectGPReg(outany, out64), srcAddr);
451         break;
452       case Scalar::Float32:
453         // LDR does the right thing also for access.isZeroExtendSimd128Load()
454         Ldr(SelectFPReg(outany, out64, 32), srcAddr);
455         break;
456       case Scalar::Float64:
457         if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
458           ScratchSimd128Scope scratch_(asMasm());
459           ARMFPRegister scratch = Simd1D(scratch_);
460           Ldr(scratch, srcAddr);
461           if (access.isSplatSimd128Load()) {
462             Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0);
463           } else {
464             MOZ_ASSERT(access.isWidenSimd128Load());
465             switch (access.widenSimdOp()) {
466               case wasm::SimdOp::I16x8LoadS8x8:
467                 Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
468                 break;
469               case wasm::SimdOp::I16x8LoadU8x8:
470                 Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
471                 break;
472               case wasm::SimdOp::I32x4LoadS16x4:
473                 Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
474                 break;
475               case wasm::SimdOp::I32x4LoadU16x4:
476                 Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
477                 break;
478               case wasm::SimdOp::I64x2LoadS32x2:
479                 Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
480                 break;
481               case wasm::SimdOp::I64x2LoadU32x2:
482                 Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
483                 break;
484               default:
485                 MOZ_CRASH("Unexpected widening op for wasmLoad");
486             }
487           }
488         } else {
489           // LDR does the right thing also for access.isZeroExtendSimd128Load()
490           Ldr(SelectFPReg(outany, out64, 64), srcAddr);
491         }
492         break;
493       case Scalar::Simd128:
494         Ldr(SelectFPReg(outany, out64, 128), srcAddr);
495         break;
496       case Scalar::Uint8Clamped:
497       case Scalar::BigInt64:
498       case Scalar::BigUint64:
499       case Scalar::MaxTypedArrayViewType:
500         MOZ_CRASH("unexpected array type");
501     }
502   }
503 
504   asMasm().memoryBarrierAfter(access.sync());
505 }
506 
507 // Return true if `address` can be represented as an immediate (possibly scaled
508 // by the access size) in an LDR/STR type instruction.
509 //
510 // For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro().
IsLSImmediateOffset(uint64_t address,size_t accessByteSize)511 static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) {
512   // The predicates below operate on signed values only.
513   if (address > INT64_MAX) {
514     return false;
515   }
516 
517   // The access size is always a power of 2, so computing the log amounts to
518   // counting trailing zeroes.
519   unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize);
520   return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) ||
521           MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize));
522 }
523 
wasmLoadAbsolute(const wasm::MemoryAccessDesc & access,Register memoryBase,uint64_t address,AnyRegister output,Register64 out64)524 void MacroAssemblerCompat::wasmLoadAbsolute(
525     const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address,
526     AnyRegister output, Register64 out64) {
527   if (!IsLSImmediateOffset(address, access.byteSize())) {
528     // The access will require the constant to be loaded into a temp register.
529     // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting
530     // trap information.
531     //
532     // Almost all constant addresses will in practice be handled by a single MOV
533     // so do not worry about additional optimizations here.
534     vixl::UseScratchRegisterScope temps(this);
535     ARMRegister scratch = temps.AcquireX();
536     Mov(scratch, address);
537     MemOperand srcAddr(X(memoryBase), scratch);
538     wasmLoadImpl(access, srcAddr, output, out64);
539   } else {
540     MemOperand srcAddr(X(memoryBase), address);
541     wasmLoadImpl(access, srcAddr, output, out64);
542   }
543 }
544 
wasmStoreImpl(const wasm::MemoryAccessDesc & access,AnyRegister valany,Register64 val64,Register memoryBase_,Register ptr_)545 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
546                                          AnyRegister valany, Register64 val64,
547                                          Register memoryBase_, Register ptr_) {
548   uint32_t offset = access.offset();
549   MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
550 
551   ARMRegister memoryBase(memoryBase_, 64);
552   ARMRegister ptr(ptr_, 64);
553   if (offset) {
554     vixl::UseScratchRegisterScope temps(this);
555     ARMRegister scratch = temps.AcquireX();
556     Add(scratch, ptr, Operand(offset));
557     MemOperand destAddr(memoryBase, scratch);
558     wasmStoreImpl(access, destAddr, valany, val64);
559   } else {
560     MemOperand destAddr(memoryBase, ptr);
561     wasmStoreImpl(access, destAddr, valany, val64);
562   }
563 }
564 
wasmStoreImpl(const wasm::MemoryAccessDesc & access,MemOperand dstAddr,AnyRegister valany,Register64 val64)565 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
566                                          MemOperand dstAddr, AnyRegister valany,
567                                          Register64 val64) {
568   asMasm().memoryBarrierBefore(access.sync());
569 
570   {
571     // Reg+Reg addressing is directly encodable in one Store instruction, hence
572     // the AutoForbidPoolsAndNops will ensure that the access metadata is
573     // emitted at the address of the Store.  The AutoForbidPoolsAndNops will
574     // assert if we emit more than one instruction.
575 
576     AutoForbidPoolsAndNops afp(this,
577                                /* max number of instructions in scope = */ 1);
578 
579     append(access, asMasm().currentOffset());
580     switch (access.type()) {
581       case Scalar::Int8:
582       case Scalar::Uint8:
583         Strb(SelectGPReg(valany, val64), dstAddr);
584         break;
585       case Scalar::Int16:
586       case Scalar::Uint16:
587         Strh(SelectGPReg(valany, val64), dstAddr);
588         break;
589       case Scalar::Int32:
590       case Scalar::Uint32:
591         Str(SelectGPReg(valany, val64), dstAddr);
592         break;
593       case Scalar::Int64:
594         Str(SelectGPReg(valany, val64), dstAddr);
595         break;
596       case Scalar::Float32:
597         Str(SelectFPReg(valany, val64, 32), dstAddr);
598         break;
599       case Scalar::Float64:
600         Str(SelectFPReg(valany, val64, 64), dstAddr);
601         break;
602       case Scalar::Simd128:
603         Str(SelectFPReg(valany, val64, 128), dstAddr);
604         break;
605       case Scalar::Uint8Clamped:
606       case Scalar::BigInt64:
607       case Scalar::BigUint64:
608       case Scalar::MaxTypedArrayViewType:
609         MOZ_CRASH("unexpected array type");
610     }
611   }
612 
613   asMasm().memoryBarrierAfter(access.sync());
614 }
615 
wasmStoreAbsolute(const wasm::MemoryAccessDesc & access,AnyRegister value,Register64 value64,Register memoryBase,uint64_t address)616 void MacroAssemblerCompat::wasmStoreAbsolute(
617     const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64,
618     Register memoryBase, uint64_t address) {
619   // See comments in wasmLoadAbsolute.
620   unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize());
621   if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) ||
622                                IsImmLSUnscaled(int64_t(address)))) {
623     vixl::UseScratchRegisterScope temps(this);
624     ARMRegister scratch = temps.AcquireX();
625     Mov(scratch, address);
626     MemOperand destAddr(X(memoryBase), scratch);
627     wasmStoreImpl(access, destAddr, value, value64);
628   } else {
629     MemOperand destAddr(X(memoryBase), address);
630     wasmStoreImpl(access, destAddr, value, value64);
631   }
632 }
633 
compareSimd128Int(Assembler::Condition cond,ARMFPRegister dest,ARMFPRegister lhs,ARMFPRegister rhs)634 void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond,
635                                              ARMFPRegister dest,
636                                              ARMFPRegister lhs,
637                                              ARMFPRegister rhs) {
638   switch (cond) {
639     case Assembler::Equal:
640       Cmeq(dest, lhs, rhs);
641       break;
642     case Assembler::NotEqual:
643       Cmeq(dest, lhs, rhs);
644       Mvn(dest, dest);
645       break;
646     case Assembler::GreaterThan:
647       Cmgt(dest, lhs, rhs);
648       break;
649     case Assembler::GreaterThanOrEqual:
650       Cmge(dest, lhs, rhs);
651       break;
652     case Assembler::LessThan:
653       Cmgt(dest, rhs, lhs);
654       break;
655     case Assembler::LessThanOrEqual:
656       Cmge(dest, rhs, lhs);
657       break;
658     case Assembler::Above:
659       Cmhi(dest, lhs, rhs);
660       break;
661     case Assembler::AboveOrEqual:
662       Cmhs(dest, lhs, rhs);
663       break;
664     case Assembler::Below:
665       Cmhi(dest, rhs, lhs);
666       break;
667     case Assembler::BelowOrEqual:
668       Cmhs(dest, rhs, lhs);
669       break;
670     default:
671       MOZ_CRASH("Unexpected SIMD integer condition");
672   }
673 }
674 
compareSimd128Float(Assembler::Condition cond,ARMFPRegister dest,ARMFPRegister lhs,ARMFPRegister rhs)675 void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond,
676                                                ARMFPRegister dest,
677                                                ARMFPRegister lhs,
678                                                ARMFPRegister rhs) {
679   switch (cond) {
680     case Assembler::Equal:
681       Fcmeq(dest, lhs, rhs);
682       break;
683     case Assembler::NotEqual:
684       Fcmeq(dest, lhs, rhs);
685       Mvn(dest, dest);
686       break;
687     case Assembler::GreaterThan:
688       Fcmgt(dest, lhs, rhs);
689       break;
690     case Assembler::GreaterThanOrEqual:
691       Fcmge(dest, lhs, rhs);
692       break;
693     case Assembler::LessThan:
694       Fcmgt(dest, rhs, lhs);
695       break;
696     case Assembler::LessThanOrEqual:
697       Fcmge(dest, rhs, lhs);
698       break;
699     default:
700       MOZ_CRASH("Unexpected SIMD integer condition");
701   }
702 }
703 
rightShiftInt8x16(FloatRegister lhs,Register rhs,FloatRegister dest,bool isUnsigned)704 void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs,
705                                              FloatRegister dest,
706                                              bool isUnsigned) {
707   ScratchSimd128Scope scratch_(asMasm());
708   ARMFPRegister shift = Simd16B(scratch_);
709 
710   // Compute -(shift & 7) in all 8-bit lanes
711   {
712     vixl::UseScratchRegisterScope temps(this);
713     ARMRegister scratch = temps.AcquireW();
714     And(scratch, ARMRegister(rhs, 32), 7);
715     Neg(scratch, scratch);
716     Dup(shift, scratch);
717   }
718 
719   if (isUnsigned) {
720     Ushl(Simd16B(dest), Simd16B(lhs), shift);
721   } else {
722     Sshl(Simd16B(dest), Simd16B(lhs), shift);
723   }
724 }
725 
rightShiftInt16x8(FloatRegister lhs,Register rhs,FloatRegister dest,bool isUnsigned)726 void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs,
727                                              FloatRegister dest,
728                                              bool isUnsigned) {
729   ScratchSimd128Scope scratch_(asMasm());
730   ARMFPRegister shift = Simd8H(scratch_);
731 
732   // Compute -(shift & 15) in all 16-bit lanes
733   {
734     vixl::UseScratchRegisterScope temps(this);
735     ARMRegister scratch = temps.AcquireW();
736     And(scratch, ARMRegister(rhs, 32), 15);
737     Neg(scratch, scratch);
738     Dup(shift, scratch);
739   }
740 
741   if (isUnsigned) {
742     Ushl(Simd8H(dest), Simd8H(lhs), shift);
743   } else {
744     Sshl(Simd8H(dest), Simd8H(lhs), shift);
745   }
746 }
747 
rightShiftInt32x4(FloatRegister lhs,Register rhs,FloatRegister dest,bool isUnsigned)748 void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs,
749                                              FloatRegister dest,
750                                              bool isUnsigned) {
751   ScratchSimd128Scope scratch_(asMasm());
752   ARMFPRegister shift = Simd4S(scratch_);
753 
754   // Compute -(shift & 31) in all 32-bit lanes
755   {
756     vixl::UseScratchRegisterScope temps(this);
757     ARMRegister scratch = temps.AcquireW();
758     And(scratch, ARMRegister(rhs, 32), 31);
759     Neg(scratch, scratch);
760     Dup(shift, scratch);
761   }
762 
763   if (isUnsigned) {
764     Ushl(Simd4S(dest), Simd4S(lhs), shift);
765   } else {
766     Sshl(Simd4S(dest), Simd4S(lhs), shift);
767   }
768 }
769 
rightShiftInt64x2(FloatRegister lhs,Register rhs,FloatRegister dest,bool isUnsigned)770 void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs,
771                                              FloatRegister dest,
772                                              bool isUnsigned) {
773   ScratchSimd128Scope scratch_(asMasm());
774   ARMFPRegister shift = Simd2D(scratch_);
775 
776   // Compute -(shift & 63)
777   {
778     vixl::UseScratchRegisterScope temps(this);
779     ARMRegister scratch = temps.AcquireX();
780     And(scratch, ARMRegister(rhs, 64), 63);
781     Neg(scratch, scratch);
782     Dup(shift, scratch);
783   }
784 
785   if (isUnsigned) {
786     Ushl(Simd2D(dest), Simd2D(lhs), shift);
787   } else {
788     Sshl(Simd2D(dest), Simd2D(lhs), shift);
789   }
790 }
791 
reserveStack(uint32_t amount)792 void MacroAssembler::reserveStack(uint32_t amount) {
793   // TODO: This bumps |sp| every time we reserve using a second register.
794   // It would save some instructions if we had a fixed frame size.
795   vixl::MacroAssembler::Claim(Operand(amount));
796   adjustFrame(amount);
797 }
798 
Push(RegisterOrSP reg)799 void MacroAssembler::Push(RegisterOrSP reg) {
800   if (IsHiddenSP(reg)) {
801     push(sp);
802   } else {
803     push(AsRegister(reg));
804   }
805   adjustFrame(sizeof(intptr_t));
806 }
807 
808 //{{{ check_macroassembler_style
809 // ===============================================================
810 // MacroAssembler high-level usage.
811 
flush()812 void MacroAssembler::flush() { Assembler::flush(); }
813 
814 // ===============================================================
815 // Stack manipulation functions.
816 
817 // Routines for saving/restoring registers on the stack.  The format is:
818 //
819 //   (highest address)
820 //
821 //   integer (X) regs in any order      size: 8 * # int regs
822 //
823 //   if # int regs is odd,
824 //     then an 8 byte alignment hole    size: 0 or 8
825 //
826 //   double (D) regs in any order       size: 8 * # double regs
827 //
828 //   if # double regs is odd,
829 //     then an 8 byte alignment hole    size: 0 or 8
830 //
831 //   vector (Q) regs in any order       size: 16 * # vector regs
832 //
833 //   (lowest address)
834 //
835 // Hence the size of the save area is 0 % 16.  And, provided that the base
836 // (highest) address is 16-aligned, then the vector reg save/restore accesses
837 // will also be 16-aligned, as will pairwise operations for the double regs.
838 //
839 // Implied by this is that the format of the double and vector dump area
840 // corresponds with what FloatRegister::GetPushSizeInBytes computes.
841 // See block comment in MacroAssembler.h for more details.
842 
PushRegsInMaskSizeInBytes(LiveRegisterSet set)843 size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
844   size_t numIntRegs = set.gprs().size();
845   return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) +
846          FloatRegister::GetPushSizeInBytes(set.fpus());
847 }
848 
849 // Generate code to dump the values in `set`, either on the stack if `dest` is
850 // `Nothing` or working backwards from the address denoted by `dest` if it is
851 // `Some`.  These two cases are combined so as to minimise the chance of
852 // mistakenly generating different formats for the same `set`, given that the
853 // `Some` `dest` case is used extremely rarely.
PushOrStoreRegsInMask(MacroAssembler * masm,LiveRegisterSet set,mozilla::Maybe<Address> dest)854 static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set,
855                                   mozilla::Maybe<Address> dest) {
856   static_assert(sizeof(FloatRegisters::RegisterContent) == 16);
857 
858   // If we're saving to arbitrary memory, check the destination is big enough.
859   if (dest) {
860     mozilla::DebugOnly<size_t> bytesRequired =
861         masm->PushRegsInMaskSizeInBytes(set);
862     MOZ_ASSERT(dest->offset >= 0);
863     MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired);
864   }
865 
866   // Note the high limit point; we'll check it again later.
867   mozilla::DebugOnly<size_t> maxExtentInitial =
868       dest ? dest->offset : masm->framePushed();
869 
870   // Gather up the integer registers in groups of four, and either push each
871   // group as a single transfer so as to minimise the number of stack pointer
872   // changes, or write them individually to memory.  Take care to ensure the
873   // space used remains 16-aligned.
874   for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) {
875     vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg,
876                                 vixl::NoCPUReg};
877     size_t i;
878     for (i = 0; i < 4 && iter.more(); i++) {
879       src[i] = ARMRegister(*iter, 64);
880       ++iter;
881     }
882     MOZ_ASSERT(i > 0);
883 
884     if (i == 1 || i == 3) {
885       // Ensure the stack remains 16-aligned
886       MOZ_ASSERT(!iter.more());
887       src[i] = vixl::xzr;
888       i++;
889     }
890     MOZ_ASSERT(i == 2 || i == 4);
891 
892     if (dest) {
893       for (size_t j = 0; j < i; j++) {
894         Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr
895                                                            : src[j].code());
896         dest->offset -= sizeof(intptr_t);
897         masm->storePtr(ireg, *dest);
898       }
899     } else {
900       masm->adjustFrame(i * 8);
901       masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
902     }
903   }
904 
905   // Now the same for the FP double registers.  Note that because of how
906   // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either
907   // be present as a double register, or as a V128 register, but not both.
908   // Firstly, round up the registers to be pushed.
909 
910   FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
911   vixl::CPURegister allSrcs[FloatRegisters::TotalPhys];
912   size_t numAllSrcs = 0;
913 
914   for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
915     FloatRegister reg = *iter;
916     if (reg.isDouble()) {
917       MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
918       allSrcs[numAllSrcs] = ARMFPRegister(reg, 64);
919       numAllSrcs++;
920     } else {
921       MOZ_ASSERT(reg.isSimd128());
922     }
923   }
924   MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
925 
926   if ((numAllSrcs & 1) == 1) {
927     // We've got an odd number of doubles.  In order to maintain 16-alignment,
928     // push the last register twice.  We'll skip over the duplicate in
929     // PopRegsInMaskIgnore.
930     allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1];
931     numAllSrcs++;
932   }
933   MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
934   MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0);
935 
936   // And now generate the transfers.
937   size_t i;
938   if (dest) {
939     for (i = 0; i < numAllSrcs; i++) {
940       FloatRegister freg =
941           FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
942                         FloatRegisters::Kind::Double);
943       dest->offset -= sizeof(double);
944       masm->storeDouble(freg, *dest);
945     }
946   } else {
947     i = 0;
948     while (i < numAllSrcs) {
949       vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
950                                   vixl::NoCPUReg, vixl::NoCPUReg};
951       size_t j;
952       for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
953         src[j] = allSrcs[j + i];
954       }
955       masm->adjustFrame(8 * j);
956       masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
957       i += j;
958     }
959   }
960   MOZ_ASSERT(i == numAllSrcs);
961 
962   // Finally, deal with the SIMD (V128) registers.  This is a bit simpler
963   // as there's no need for special-casing to maintain 16-alignment.
964 
965   numAllSrcs = 0;
966   for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
967     FloatRegister reg = *iter;
968     if (reg.isSimd128()) {
969       MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
970       allSrcs[numAllSrcs] = ARMFPRegister(reg, 128);
971       numAllSrcs++;
972     }
973   }
974   MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
975 
976   // Generate the transfers.
977   if (dest) {
978     for (i = 0; i < numAllSrcs; i++) {
979       FloatRegister freg =
980           FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
981                         FloatRegisters::Kind::Simd128);
982       dest->offset -= FloatRegister::SizeOfSimd128;
983       masm->storeUnalignedSimd128(freg, *dest);
984     }
985   } else {
986     i = 0;
987     while (i < numAllSrcs) {
988       vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
989                                   vixl::NoCPUReg, vixl::NoCPUReg};
990       size_t j;
991       for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
992         src[j] = allSrcs[j + i];
993       }
994       masm->adjustFrame(16 * j);
995       masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
996       i += j;
997     }
998   }
999   MOZ_ASSERT(i == numAllSrcs);
1000 
1001   // Final overrun check.
1002   if (dest) {
1003     MOZ_ASSERT(maxExtentInitial - dest->offset ==
1004                masm->PushRegsInMaskSizeInBytes(set));
1005   } else {
1006     MOZ_ASSERT(masm->framePushed() - maxExtentInitial ==
1007                masm->PushRegsInMaskSizeInBytes(set));
1008   }
1009 }
1010 
PushRegsInMask(LiveRegisterSet set)1011 void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
1012   PushOrStoreRegsInMask(this, set, mozilla::Nothing());
1013 }
1014 
storeRegsInMask(LiveRegisterSet set,Address dest,Register scratch)1015 void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
1016                                      Register scratch) {
1017   PushOrStoreRegsInMask(this, set, mozilla::Some(dest));
1018 }
1019 
1020 // This is a helper function for PopRegsInMaskIgnore below.  It emits the
1021 // loads described by dests[0] and [1] and offsets[0] and [1], generating a
1022 // load-pair if it can.
GeneratePendingLoadsThenFlush(MacroAssembler * masm,vixl::CPURegister * dests,uint32_t * offsets,uint32_t transactionSize)1023 static void GeneratePendingLoadsThenFlush(MacroAssembler* masm,
1024                                           vixl::CPURegister* dests,
1025                                           uint32_t* offsets,
1026                                           uint32_t transactionSize) {
1027   // Generate the loads ..
1028   if (!dests[0].IsNone()) {
1029     if (!dests[1].IsNone()) {
1030       // [0] and [1] both present.
1031       if (offsets[0] + transactionSize == offsets[1]) {
1032         masm->Ldp(dests[0], dests[1],
1033                   MemOperand(masm->GetStackPointer64(), offsets[0]));
1034       } else {
1035         // Theoretically we could check for a load-pair with the destinations
1036         // switched, but our callers will never generate that.  Hence there's
1037         // no loss in giving up at this point and generating two loads.
1038         masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
1039         masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1]));
1040       }
1041     } else {
1042       // [0] only.
1043       masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
1044     }
1045   } else {
1046     if (!dests[1].IsNone()) {
1047       // [1] only.  Can't happen because callers always fill [0] before [1].
1048       MOZ_CRASH("GenerateLoadsThenFlush");
1049     } else {
1050       // Neither entry valid.  This can happen.
1051     }
1052   }
1053 
1054   // .. and flush.
1055   dests[0] = dests[1] = vixl::NoCPUReg;
1056   offsets[0] = offsets[1] = 0;
1057 }
1058 
PopRegsInMaskIgnore(LiveRegisterSet set,LiveRegisterSet ignore)1059 void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
1060                                          LiveRegisterSet ignore) {
1061   mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
1062 
1063   // The offset of the data from the stack pointer.
1064   uint32_t offset = 0;
1065 
1066   // The set of FP/SIMD registers we need to restore.
1067   FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
1068 
1069   // The set of registers to ignore.  BroadcastToAllSizes() is used to avoid
1070   // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore`
1071   // containing d17.
1072   FloatRegisterSet ignoreFpusBroadcasted(
1073       FloatRegister::BroadcastToAllSizes(ignore.fpus()));
1074 
1075   // First recover the SIMD (V128) registers.  This is straightforward in that
1076   // we don't need to think about alignment holes.
1077 
1078   // These three form a two-entry queue that holds loads that we know we
1079   // need, but which we haven't yet emitted.
1080   vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg};
1081   uint32_t pendingOffsets[2] = {0, 0};
1082   size_t nPending = 0;
1083 
1084   for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
1085     FloatRegister reg = *iter;
1086     if (reg.isDouble()) {
1087       continue;
1088     }
1089     MOZ_RELEASE_ASSERT(reg.isSimd128());
1090 
1091     uint32_t offsetForReg = offset;
1092     offset += FloatRegister::SizeOfSimd128;
1093 
1094     if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
1095       continue;
1096     }
1097 
1098     MOZ_ASSERT(nPending <= 2);
1099     if (nPending == 2) {
1100       GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
1101       nPending = 0;
1102     }
1103     pendingDests[nPending] = ARMFPRegister(reg, 128);
1104     pendingOffsets[nPending] = offsetForReg;
1105     nPending++;
1106   }
1107   GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
1108   nPending = 0;
1109 
1110   MOZ_ASSERT((offset % 16) == 0);
1111 
1112   // Now recover the FP double registers.  This is more tricky in that we need
1113   // to skip over the lowest-addressed of them if the number of them was odd.
1114 
1115   if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) {
1116     offset += sizeof(double);
1117   }
1118 
1119   for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
1120     FloatRegister reg = *iter;
1121     if (reg.isSimd128()) {
1122       continue;
1123     }
1124     /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */
1125 
1126     uint32_t offsetForReg = offset;
1127     offset += sizeof(double);
1128 
1129     if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
1130       continue;
1131     }
1132 
1133     MOZ_ASSERT(nPending <= 2);
1134     if (nPending == 2) {
1135       GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
1136       nPending = 0;
1137     }
1138     pendingDests[nPending] = ARMFPRegister(reg, 64);
1139     pendingOffsets[nPending] = offsetForReg;
1140     nPending++;
1141   }
1142   GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
1143   nPending = 0;
1144 
1145   MOZ_ASSERT((offset % 16) == 0);
1146   MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes());
1147 
1148   // And finally recover the integer registers, again skipping an alignment
1149   // hole if it exists.
1150 
1151   if ((set.gprs().size() & 1) == 1) {
1152     offset += sizeof(uint64_t);
1153   }
1154 
1155   for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) {
1156     Register reg = *iter;
1157 
1158     uint32_t offsetForReg = offset;
1159     offset += sizeof(uint64_t);
1160 
1161     if (ignore.has(reg)) {
1162       continue;
1163     }
1164 
1165     MOZ_ASSERT(nPending <= 2);
1166     if (nPending == 2) {
1167       GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
1168       nPending = 0;
1169     }
1170     pendingDests[nPending] = ARMRegister(reg, 64);
1171     pendingOffsets[nPending] = offsetForReg;
1172     nPending++;
1173   }
1174   GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
1175 
1176   MOZ_ASSERT((offset % 16) == 0);
1177 
1178   size_t bytesPushed = PushRegsInMaskSizeInBytes(set);
1179   MOZ_ASSERT(offset == bytesPushed);
1180   freeStack(bytesPushed);
1181 }
1182 
Push(Register reg)1183 void MacroAssembler::Push(Register reg) {
1184   push(reg);
1185   adjustFrame(sizeof(intptr_t));
1186 }
1187 
Push(Register reg1,Register reg2,Register reg3,Register reg4)1188 void MacroAssembler::Push(Register reg1, Register reg2, Register reg3,
1189                           Register reg4) {
1190   push(reg1, reg2, reg3, reg4);
1191   adjustFrame(4 * sizeof(intptr_t));
1192 }
1193 
Push(const Imm32 imm)1194 void MacroAssembler::Push(const Imm32 imm) {
1195   push(imm);
1196   adjustFrame(sizeof(intptr_t));
1197 }
1198 
Push(const ImmWord imm)1199 void MacroAssembler::Push(const ImmWord imm) {
1200   push(imm);
1201   adjustFrame(sizeof(intptr_t));
1202 }
1203 
Push(const ImmPtr imm)1204 void MacroAssembler::Push(const ImmPtr imm) {
1205   push(imm);
1206   adjustFrame(sizeof(intptr_t));
1207 }
1208 
Push(const ImmGCPtr ptr)1209 void MacroAssembler::Push(const ImmGCPtr ptr) {
1210   push(ptr);
1211   adjustFrame(sizeof(intptr_t));
1212 }
1213 
Push(FloatRegister f)1214 void MacroAssembler::Push(FloatRegister f) {
1215   push(f);
1216   adjustFrame(sizeof(double));
1217 }
1218 
PushBoxed(FloatRegister reg)1219 void MacroAssembler::PushBoxed(FloatRegister reg) {
1220   subFromStackPtr(Imm32(sizeof(double)));
1221   boxDouble(reg, Address(getStackPointer(), 0));
1222   adjustFrame(sizeof(double));
1223 }
1224 
Pop(Register reg)1225 void MacroAssembler::Pop(Register reg) {
1226   pop(reg);
1227   adjustFrame(-1 * int64_t(sizeof(int64_t)));
1228 }
1229 
Pop(FloatRegister f)1230 void MacroAssembler::Pop(FloatRegister f) {
1231   loadDouble(Address(getStackPointer(), 0), f);
1232   freeStack(sizeof(double));
1233 }
1234 
Pop(const ValueOperand & val)1235 void MacroAssembler::Pop(const ValueOperand& val) {
1236   pop(val);
1237   adjustFrame(-1 * int64_t(sizeof(int64_t)));
1238 }
1239 
1240 // ===============================================================
1241 // Simple call functions.
1242 
call(Register reg)1243 CodeOffset MacroAssembler::call(Register reg) {
1244   // This sync has been observed (and is expected) to be necessary.
1245   // eg testcase: tests/debug/bug1107525.js
1246   syncStackPtr();
1247   Blr(ARMRegister(reg, 64));
1248   return CodeOffset(currentOffset());
1249 }
1250 
call(Label * label)1251 CodeOffset MacroAssembler::call(Label* label) {
1252   // This sync has been observed (and is expected) to be necessary.
1253   // eg testcase: tests/basic/testBug504520Harder.js
1254   syncStackPtr();
1255   Bl(label);
1256   return CodeOffset(currentOffset());
1257 }
1258 
call(ImmPtr imm)1259 void MacroAssembler::call(ImmPtr imm) {
1260   // This sync has been observed (and is expected) to be necessary.
1261   // eg testcase: asm.js/testTimeout5.js
1262   syncStackPtr();
1263   vixl::UseScratchRegisterScope temps(this);
1264   MOZ_ASSERT(temps.IsAvailable(ScratchReg64));  // ip0
1265   temps.Exclude(ScratchReg64);
1266   movePtr(imm, ScratchReg64.asUnsized());
1267   Blr(ScratchReg64);
1268 }
1269 
call(ImmWord imm)1270 void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
1271 
call(wasm::SymbolicAddress imm)1272 CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) {
1273   vixl::UseScratchRegisterScope temps(this);
1274   const Register scratch = temps.AcquireX().asUnsized();
1275   // This sync is believed to be necessary, although no case in jit-test/tests
1276   // has been observed to cause SP != PSP here.
1277   syncStackPtr();
1278   movePtr(imm, scratch);
1279   Blr(ARMRegister(scratch, 64));
1280   return CodeOffset(currentOffset());
1281 }
1282 
call(const Address & addr)1283 void MacroAssembler::call(const Address& addr) {
1284   vixl::UseScratchRegisterScope temps(this);
1285   const Register scratch = temps.AcquireX().asUnsized();
1286   // This sync has been observed (and is expected) to be necessary.
1287   // eg testcase: tests/backup-point-bug1315634.js
1288   syncStackPtr();
1289   loadPtr(addr, scratch);
1290   Blr(ARMRegister(scratch, 64));
1291 }
1292 
call(JitCode * c)1293 void MacroAssembler::call(JitCode* c) {
1294   vixl::UseScratchRegisterScope temps(this);
1295   const ARMRegister scratch64 = temps.AcquireX();
1296   // This sync has been observed (and is expected) to be necessary.
1297   // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js
1298   syncStackPtr();
1299   BufferOffset off = immPool64(scratch64, uint64_t(c->raw()));
1300   addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE);
1301   blr(scratch64);
1302 }
1303 
callWithPatch()1304 CodeOffset MacroAssembler::callWithPatch() {
1305   // This needs to sync.  Wasm goes through this one for intramodule calls.
1306   //
1307   // In other cases, wasm goes through masm.wasmCallImport(),
1308   // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which
1309   // sync.
1310   //
1311   // This sync is believed to be necessary, although no case in jit-test/tests
1312   // has been observed to cause SP != PSP here.
1313   syncStackPtr();
1314   bl(0, LabelDoc());
1315   return CodeOffset(currentOffset());
1316 }
patchCall(uint32_t callerOffset,uint32_t calleeOffset)1317 void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
1318   Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4));
1319   MOZ_ASSERT(inst->IsBL());
1320   ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4);
1321   ptrdiff_t relTarget00 = relTarget >> 2;
1322   MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0);
1323   MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00));
1324   bl(inst, relTarget00);
1325 }
1326 
farJumpWithPatch()1327 CodeOffset MacroAssembler::farJumpWithPatch() {
1328   vixl::UseScratchRegisterScope temps(this);
1329   const ARMRegister scratch = temps.AcquireX();
1330   const ARMRegister scratch2 = temps.AcquireX();
1331 
1332   AutoForbidPoolsAndNops afp(this,
1333                              /* max number of instructions in scope = */ 7);
1334 
1335   mozilla::DebugOnly<uint32_t> before = currentOffset();
1336 
1337   align(8);  // At most one nop
1338 
1339   Label branch;
1340   adr(scratch2, &branch);
1341   ldr(scratch, vixl::MemOperand(scratch2, 4));
1342   add(scratch2, scratch2, scratch);
1343   CodeOffset offs(currentOffset());
1344   bind(&branch);
1345   br(scratch2);
1346   Emit(UINT32_MAX);
1347   Emit(UINT32_MAX);
1348 
1349   mozilla::DebugOnly<uint32_t> after = currentOffset();
1350 
1351   MOZ_ASSERT(after - before == 24 || after - before == 28);
1352 
1353   return offs;
1354 }
1355 
patchFarJump(CodeOffset farJump,uint32_t targetOffset)1356 void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
1357   Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4));
1358   Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8));
1359 
1360   int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset();
1361 
1362   MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX);
1363   MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX);
1364 
1365   inst1->SetInstructionBits((uint32_t)distance);
1366   inst2->SetInstructionBits((uint32_t)(distance >> 32));
1367 }
1368 
nopPatchableToCall()1369 CodeOffset MacroAssembler::nopPatchableToCall() {
1370   AutoForbidPoolsAndNops afp(this,
1371                              /* max number of instructions in scope = */ 1);
1372   Nop();
1373   return CodeOffset(currentOffset());
1374 }
1375 
patchNopToCall(uint8_t * call,uint8_t * target)1376 void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) {
1377   uint8_t* inst = call - 4;
1378   Instruction* instr = reinterpret_cast<Instruction*>(inst);
1379   MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
1380   bl(instr, (target - inst) >> 2);
1381 }
1382 
patchCallToNop(uint8_t * call)1383 void MacroAssembler::patchCallToNop(uint8_t* call) {
1384   uint8_t* inst = call - 4;
1385   Instruction* instr = reinterpret_cast<Instruction*>(inst);
1386   MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
1387   nop(instr);
1388 }
1389 
pushReturnAddress()1390 void MacroAssembler::pushReturnAddress() {
1391   MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
1392   push(lr);
1393 }
1394 
popReturnAddress()1395 void MacroAssembler::popReturnAddress() {
1396   MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
1397   pop(lr);
1398 }
1399 
1400 // ===============================================================
1401 // ABI function calls.
1402 
setupUnalignedABICall(Register scratch)1403 void MacroAssembler::setupUnalignedABICall(Register scratch) {
1404   // Because wasm operates without the need for dynamic alignment of SP, it is
1405   // implied that this routine should never be called when generating wasm.
1406   MOZ_ASSERT(!IsCompilingWasm());
1407 
1408   // The following won't work for SP -- needs slightly different logic.
1409   MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
1410 
1411   setupNativeABICall();
1412   dynamicAlignment_ = true;
1413 
1414   int64_t alignment = ~(int64_t(ABIStackAlignment) - 1);
1415   ARMRegister scratch64(scratch, 64);
1416   MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64));
1417 
1418   // Always save LR -- Baseline ICs assume that LR isn't modified.
1419   push(lr);
1420 
1421   // Remember the stack address on entry.  This is reloaded in callWithABIPost
1422   // below.
1423   Mov(scratch64, PseudoStackPointer64);
1424 
1425   // Make alignment, including the effective push of the previous sp.
1426   Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8));
1427   And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment));
1428   syncStackPtr();
1429 
1430   // Store previous sp to the top of the stack, aligned.  This is also
1431   // reloaded in callWithABIPost.
1432   Str(scratch64, MemOperand(PseudoStackPointer64, 0));
1433 }
1434 
callWithABIPre(uint32_t * stackAdjust,bool callFromWasm)1435 void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
1436   // wasm operates without the need for dynamic alignment of SP.
1437   MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
1438 
1439   MOZ_ASSERT(inCall_);
1440   uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
1441 
1442   // ARM64 *really* wants SP to always be 16-aligned, so ensure this now.
1443   if (dynamicAlignment_) {
1444     stackForCall += ComputeByteAlignment(stackForCall, StackAlignment);
1445   } else {
1446     // This can happen when we attach out-of-line stubs for rare cases.  For
1447     // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line
1448     // chunk.
1449     uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
1450     stackForCall += ComputeByteAlignment(
1451         stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
1452   }
1453 
1454   *stackAdjust = stackForCall;
1455   reserveStack(*stackAdjust);
1456   {
1457     enoughMemory_ &= moveResolver_.resolve();
1458     if (!enoughMemory_) {
1459       return;
1460     }
1461     MoveEmitter emitter(*this);
1462     emitter.emit(moveResolver_);
1463     emitter.finish();
1464   }
1465 
1466   // Call boundaries communicate stack via SP.
1467   // (jseward, 2021Mar03) This sync may well be redundant, given that all of
1468   // the MacroAssembler::call methods generate a sync before the call.
1469   // Removing it does not cause any failures for all of jit-tests.
1470   syncStackPtr();
1471 }
1472 
callWithABIPost(uint32_t stackAdjust,MoveOp::Type result,bool callFromWasm)1473 void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result,
1474                                      bool callFromWasm) {
1475   // wasm operates without the need for dynamic alignment of SP.
1476   MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
1477 
1478   // Call boundaries communicate stack via SP, so we must resync PSP now.
1479   initPseudoStackPtr();
1480 
1481   freeStack(stackAdjust);
1482 
1483   if (dynamicAlignment_) {
1484     // This then-clause makes more sense if you first read
1485     // setupUnalignedABICall above.
1486     //
1487     // Restore the stack pointer from entry.  The stack pointer will have been
1488     // saved by setupUnalignedABICall.  This is fragile in that it assumes
1489     // that uses of this routine (callWithABIPost) with `dynamicAlignment_ ==
1490     // true` are preceded by matching calls to setupUnalignedABICall.  But
1491     // there's nothing that enforce that mechanically.  If we really want to
1492     // enforce this, we could add a debug-only CallWithABIState enum to the
1493     // MacroAssembler and assert that setupUnalignedABICall updates it before
1494     // we get here, then reset it to its initial state.
1495     Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0));
1496     syncStackPtr();
1497 
1498     // Restore LR.  This restores LR to the value stored by
1499     // setupUnalignedABICall, which should have been called just before
1500     // callWithABIPre.  This is, per the above comment, also fragile.
1501     pop(lr);
1502 
1503     // SP may be < PSP now.  That is expected from the behaviour of `pop`.  It
1504     // is not clear why the following `syncStackPtr` is necessary, but it is:
1505     // without it, the following test segfaults:
1506     // tests/backup-point-bug1315634.js
1507     syncStackPtr();
1508   }
1509 
1510   // If the ABI's return regs are where ION is expecting them, then
1511   // no other work needs to be done.
1512 
1513 #ifdef DEBUG
1514   MOZ_ASSERT(inCall_);
1515   inCall_ = false;
1516 #endif
1517 }
1518 
callWithABINoProfiler(Register fun,MoveOp::Type result)1519 void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) {
1520   vixl::UseScratchRegisterScope temps(this);
1521   const Register scratch = temps.AcquireX().asUnsized();
1522   movePtr(fun, scratch);
1523 
1524   uint32_t stackAdjust;
1525   callWithABIPre(&stackAdjust);
1526   call(scratch);
1527   callWithABIPost(stackAdjust, result);
1528 }
1529 
callWithABINoProfiler(const Address & fun,MoveOp::Type result)1530 void MacroAssembler::callWithABINoProfiler(const Address& fun,
1531                                            MoveOp::Type result) {
1532   vixl::UseScratchRegisterScope temps(this);
1533   const Register scratch = temps.AcquireX().asUnsized();
1534   loadPtr(fun, scratch);
1535 
1536   uint32_t stackAdjust;
1537   callWithABIPre(&stackAdjust);
1538   call(scratch);
1539   callWithABIPost(stackAdjust, result);
1540 }
1541 
1542 // ===============================================================
1543 // Jit Frames.
1544 
pushFakeReturnAddress(Register scratch)1545 uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
1546   enterNoPool(3);
1547   Label fakeCallsite;
1548 
1549   Adr(ARMRegister(scratch, 64), &fakeCallsite);
1550   Push(scratch);
1551   bind(&fakeCallsite);
1552   uint32_t pseudoReturnOffset = currentOffset();
1553 
1554   leaveNoPool();
1555   return pseudoReturnOffset;
1556 }
1557 
buildOOLFakeExitFrame(void * fakeReturnAddr)1558 bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
1559   uint32_t descriptor = MakeFrameDescriptor(
1560       asMasm().framePushed(), FrameType::IonJS, ExitFrameLayout::Size());
1561   asMasm().Push(Imm32(descriptor));
1562   asMasm().Push(ImmPtr(fakeReturnAddr));
1563   return true;
1564 }
1565 
1566 // ===============================================================
1567 // Move instructions
1568 
moveValue(const TypedOrValueRegister & src,const ValueOperand & dest)1569 void MacroAssembler::moveValue(const TypedOrValueRegister& src,
1570                                const ValueOperand& dest) {
1571   if (src.hasValue()) {
1572     moveValue(src.valueReg(), dest);
1573     return;
1574   }
1575 
1576   MIRType type = src.type();
1577   AnyRegister reg = src.typedReg();
1578 
1579   if (!IsFloatingPointType(type)) {
1580     boxNonDouble(ValueTypeFromMIRType(type), reg.gpr(), dest);
1581     return;
1582   }
1583 
1584   ScratchDoubleScope scratch(*this);
1585   FloatRegister freg = reg.fpu();
1586   if (type == MIRType::Float32) {
1587     convertFloat32ToDouble(freg, scratch);
1588     freg = scratch;
1589   }
1590   boxDouble(freg, dest, scratch);
1591 }
1592 
moveValue(const ValueOperand & src,const ValueOperand & dest)1593 void MacroAssembler::moveValue(const ValueOperand& src,
1594                                const ValueOperand& dest) {
1595   if (src == dest) {
1596     return;
1597   }
1598   movePtr(src.valueReg(), dest.valueReg());
1599 }
1600 
moveValue(const Value & src,const ValueOperand & dest)1601 void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
1602   if (!src.isGCThing()) {
1603     movePtr(ImmWord(src.asRawBits()), dest.valueReg());
1604     return;
1605   }
1606 
1607   BufferOffset load =
1608       movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg());
1609   writeDataRelocation(src, load);
1610 }
1611 
1612 // ===============================================================
1613 // Branch functions
1614 
loadStoreBuffer(Register ptr,Register buffer)1615 void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
1616   if (ptr != buffer) {
1617     movePtr(ptr, buffer);
1618   }
1619   orPtr(Imm32(gc::ChunkMask), buffer);
1620   loadPtr(Address(buffer, gc::ChunkStoreBufferOffsetFromLastByte), buffer);
1621 }
1622 
branchPtrInNurseryChunk(Condition cond,Register ptr,Register temp,Label * label)1623 void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
1624                                              Register temp, Label* label) {
1625   MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
1626   MOZ_ASSERT(ptr != temp);
1627   MOZ_ASSERT(ptr != ScratchReg &&
1628              ptr != ScratchReg2);  // Both may be used internally.
1629   MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2);
1630 
1631   movePtr(ptr, temp);
1632   orPtr(Imm32(gc::ChunkMask), temp);
1633   branchPtr(InvertCondition(cond),
1634             Address(temp, gc::ChunkStoreBufferOffsetFromLastByte), ImmWord(0),
1635             label);
1636 }
1637 
branchValueIsNurseryCell(Condition cond,const Address & address,Register temp,Label * label)1638 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
1639                                               const Address& address,
1640                                               Register temp, Label* label) {
1641   branchValueIsNurseryCellImpl(cond, address, temp, label);
1642 }
1643 
branchValueIsNurseryCell(Condition cond,ValueOperand value,Register temp,Label * label)1644 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
1645                                               ValueOperand value, Register temp,
1646                                               Label* label) {
1647   branchValueIsNurseryCellImpl(cond, value, temp, label);
1648 }
1649 
1650 template <typename T>
branchValueIsNurseryCellImpl(Condition cond,const T & value,Register temp,Label * label)1651 void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond,
1652                                                   const T& value, Register temp,
1653                                                   Label* label) {
1654   MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
1655   MOZ_ASSERT(temp != ScratchReg &&
1656              temp != ScratchReg2);  // Both may be used internally.
1657 
1658   Label done;
1659   branchTestGCThing(Assembler::NotEqual, value,
1660                     cond == Assembler::Equal ? &done : label);
1661 
1662   unboxGCThingForGCBarrier(value, temp);
1663   orPtr(Imm32(gc::ChunkMask), temp);
1664   branchPtr(InvertCondition(cond),
1665             Address(temp, gc::ChunkStoreBufferOffsetFromLastByte), ImmWord(0),
1666             label);
1667 
1668   bind(&done);
1669 }
1670 
branchTestValue(Condition cond,const ValueOperand & lhs,const Value & rhs,Label * label)1671 void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
1672                                      const Value& rhs, Label* label) {
1673   MOZ_ASSERT(cond == Equal || cond == NotEqual);
1674   vixl::UseScratchRegisterScope temps(this);
1675   const ARMRegister scratch64 = temps.AcquireX();
1676   MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg());
1677   moveValue(rhs, ValueOperand(scratch64.asUnsized()));
1678   Cmp(ARMRegister(lhs.valueReg(), 64), scratch64);
1679   B(label, cond);
1680 }
1681 
1682 // ========================================================================
1683 // Memory access primitives.
1684 template <typename T>
storeUnboxedValue(const ConstantOrRegister & value,MIRType valueType,const T & dest,MIRType slotType)1685 void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
1686                                        MIRType valueType, const T& dest,
1687                                        MIRType slotType) {
1688   if (valueType == MIRType::Double) {
1689     boxDouble(value.reg().typedReg().fpu(), dest);
1690     return;
1691   }
1692 
1693   // For known integers and booleans, we can just store the unboxed value if
1694   // the slot has the same type.
1695   if ((valueType == MIRType::Int32 || valueType == MIRType::Boolean) &&
1696       slotType == valueType) {
1697     if (value.constant()) {
1698       Value val = value.value();
1699       if (valueType == MIRType::Int32) {
1700         store32(Imm32(val.toInt32()), dest);
1701       } else {
1702         store32(Imm32(val.toBoolean() ? 1 : 0), dest);
1703       }
1704     } else {
1705       store32(value.reg().typedReg().gpr(), dest);
1706     }
1707     return;
1708   }
1709 
1710   if (value.constant()) {
1711     storeValue(value.value(), dest);
1712   } else {
1713     storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(),
1714                dest);
1715   }
1716 }
1717 
1718 template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
1719                                                 MIRType valueType,
1720                                                 const Address& dest,
1721                                                 MIRType slotType);
1722 template void MacroAssembler::storeUnboxedValue(
1723     const ConstantOrRegister& value, MIRType valueType,
1724     const BaseObjectElementIndex& dest, MIRType slotType);
1725 
comment(const char * msg)1726 void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); }
1727 
1728 // ========================================================================
1729 // wasm support
1730 
wasmTrapInstruction()1731 CodeOffset MacroAssembler::wasmTrapInstruction() {
1732   AutoForbidPoolsAndNops afp(this,
1733                              /* max number of instructions in scope = */ 1);
1734   CodeOffset offs(currentOffset());
1735   Unreachable();
1736   return offs;
1737 }
1738 
wasmBoundsCheck32(Condition cond,Register index,Register boundsCheckLimit,Label * label)1739 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
1740                                        Register boundsCheckLimit,
1741                                        Label* label) {
1742   branch32(cond, index, boundsCheckLimit, label);
1743   if (JitOptions.spectreIndexMasking) {
1744     csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
1745   }
1746 }
1747 
wasmBoundsCheck32(Condition cond,Register index,Address boundsCheckLimit,Label * label)1748 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
1749                                        Address boundsCheckLimit, Label* label) {
1750   branch32(cond, index, boundsCheckLimit, label);
1751   if (JitOptions.spectreIndexMasking) {
1752     csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
1753   }
1754 }
1755 
wasmBoundsCheck64(Condition cond,Register64 index,Register64 boundsCheckLimit,Label * label)1756 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
1757                                        Register64 boundsCheckLimit,
1758                                        Label* label) {
1759   branchPtr(cond, index.reg, boundsCheckLimit.reg, label);
1760   if (JitOptions.spectreIndexMasking) {
1761     csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
1762          cond);
1763   }
1764 }
1765 
wasmBoundsCheck64(Condition cond,Register64 index,Address boundsCheckLimit,Label * label)1766 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
1767                                        Address boundsCheckLimit, Label* label) {
1768   branchPtr(InvertCondition(cond), boundsCheckLimit, index.reg, label);
1769   if (JitOptions.spectreIndexMasking) {
1770     csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
1771          cond);
1772   }
1773 }
1774 
1775 // FCVTZU behaves as follows:
1776 //
1777 // on NaN it produces zero
1778 // on too large it produces UINT_MAX (for appropriate type)
1779 // on too small it produces zero
1780 //
1781 // FCVTZS behaves as follows:
1782 //
1783 // on NaN it produces zero
1784 // on too large it produces INT_MAX (for appropriate type)
1785 // on too small it produces INT_MIN (ditto)
1786 
wasmTruncateDoubleToUInt32(FloatRegister input_,Register output_,bool isSaturating,Label * oolEntry)1787 void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_,
1788                                                 Register output_,
1789                                                 bool isSaturating,
1790                                                 Label* oolEntry) {
1791   ARMRegister output(output_, 32);
1792   ARMFPRegister input(input_, 64);
1793   Fcvtzu(output, input);
1794   if (!isSaturating) {
1795     Cmp(output, 0);
1796     Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
1797     B(oolEntry, Assembler::Equal);
1798   }
1799 }
1800 
wasmTruncateFloat32ToUInt32(FloatRegister input_,Register output_,bool isSaturating,Label * oolEntry)1801 void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_,
1802                                                  Register output_,
1803                                                  bool isSaturating,
1804                                                  Label* oolEntry) {
1805   ARMRegister output(output_, 32);
1806   ARMFPRegister input(input_, 32);
1807   Fcvtzu(output, input);
1808   if (!isSaturating) {
1809     Cmp(output, 0);
1810     Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
1811     B(oolEntry, Assembler::Equal);
1812   }
1813 }
1814 
wasmTruncateDoubleToInt32(FloatRegister input_,Register output_,bool isSaturating,Label * oolEntry)1815 void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_,
1816                                                Register output_,
1817                                                bool isSaturating,
1818                                                Label* oolEntry) {
1819   ARMRegister output(output_, 32);
1820   ARMFPRegister input(input_, 64);
1821   Fcvtzs(output, input);
1822   if (!isSaturating) {
1823     Cmp(output, 0);
1824     Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
1825     Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
1826     B(oolEntry, Assembler::Equal);
1827   }
1828 }
1829 
wasmTruncateFloat32ToInt32(FloatRegister input_,Register output_,bool isSaturating,Label * oolEntry)1830 void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_,
1831                                                 Register output_,
1832                                                 bool isSaturating,
1833                                                 Label* oolEntry) {
1834   ARMRegister output(output_, 32);
1835   ARMFPRegister input(input_, 32);
1836   Fcvtzs(output, input);
1837   if (!isSaturating) {
1838     Cmp(output, 0);
1839     Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
1840     Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
1841     B(oolEntry, Assembler::Equal);
1842   }
1843 }
1844 
wasmTruncateDoubleToUInt64(FloatRegister input_,Register64 output_,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempDouble)1845 void MacroAssembler::wasmTruncateDoubleToUInt64(
1846     FloatRegister input_, Register64 output_, bool isSaturating,
1847     Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
1848   MOZ_ASSERT(tempDouble.isInvalid());
1849 
1850   ARMRegister output(output_.reg, 64);
1851   ARMFPRegister input(input_, 64);
1852   Fcvtzu(output, input);
1853   if (!isSaturating) {
1854     Cmp(output, 0);
1855     Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
1856     B(oolEntry, Assembler::Equal);
1857     bind(oolRejoin);
1858   }
1859 }
1860 
wasmTruncateFloat32ToUInt64(FloatRegister input_,Register64 output_,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempDouble)1861 void MacroAssembler::wasmTruncateFloat32ToUInt64(
1862     FloatRegister input_, Register64 output_, bool isSaturating,
1863     Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
1864   MOZ_ASSERT(tempDouble.isInvalid());
1865 
1866   ARMRegister output(output_.reg, 64);
1867   ARMFPRegister input(input_, 32);
1868   Fcvtzu(output, input);
1869   if (!isSaturating) {
1870     Cmp(output, 0);
1871     Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
1872     B(oolEntry, Assembler::Equal);
1873     bind(oolRejoin);
1874   }
1875 }
1876 
wasmTruncateDoubleToInt64(FloatRegister input_,Register64 output_,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempDouble)1877 void MacroAssembler::wasmTruncateDoubleToInt64(
1878     FloatRegister input_, Register64 output_, bool isSaturating,
1879     Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
1880   MOZ_ASSERT(tempDouble.isInvalid());
1881 
1882   ARMRegister output(output_.reg, 64);
1883   ARMFPRegister input(input_, 64);
1884   Fcvtzs(output, input);
1885   if (!isSaturating) {
1886     Cmp(output, 0);
1887     Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
1888     Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
1889     B(oolEntry, Assembler::Equal);
1890     bind(oolRejoin);
1891   }
1892 }
1893 
wasmTruncateFloat32ToInt64(FloatRegister input_,Register64 output_,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempDouble)1894 void MacroAssembler::wasmTruncateFloat32ToInt64(
1895     FloatRegister input_, Register64 output_, bool isSaturating,
1896     Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
1897   ARMRegister output(output_.reg, 64);
1898   ARMFPRegister input(input_, 32);
1899   Fcvtzs(output, input);
1900   if (!isSaturating) {
1901     Cmp(output, 0);
1902     Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
1903     Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
1904     B(oolEntry, Assembler::Equal);
1905     bind(oolRejoin);
1906   }
1907 }
1908 
oolWasmTruncateCheckF32ToI32(FloatRegister input,Register output,TruncFlags flags,wasm::BytecodeOffset off,Label * rejoin)1909 void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input,
1910                                                   Register output,
1911                                                   TruncFlags flags,
1912                                                   wasm::BytecodeOffset off,
1913                                                   Label* rejoin) {
1914   Label notNaN;
1915   branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
1916   wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
1917   bind(&notNaN);
1918 
1919   Label isOverflow;
1920   const float two_31 = -float(INT32_MIN);
1921   ScratchFloat32Scope fpscratch(*this);
1922   if (flags & TRUNC_UNSIGNED) {
1923     loadConstantFloat32(two_31 * 2, fpscratch);
1924     branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
1925                 &isOverflow);
1926     loadConstantFloat32(-1.0f, fpscratch);
1927     branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
1928   } else {
1929     loadConstantFloat32(two_31, fpscratch);
1930     branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
1931                 &isOverflow);
1932     loadConstantFloat32(-two_31, fpscratch);
1933     branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
1934   }
1935   bind(&isOverflow);
1936   wasmTrap(wasm::Trap::IntegerOverflow, off);
1937 }
1938 
oolWasmTruncateCheckF64ToI32(FloatRegister input,Register output,TruncFlags flags,wasm::BytecodeOffset off,Label * rejoin)1939 void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input,
1940                                                   Register output,
1941                                                   TruncFlags flags,
1942                                                   wasm::BytecodeOffset off,
1943                                                   Label* rejoin) {
1944   Label notNaN;
1945   branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
1946   wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
1947   bind(&notNaN);
1948 
1949   Label isOverflow;
1950   const double two_31 = -double(INT32_MIN);
1951   ScratchDoubleScope fpscratch(*this);
1952   if (flags & TRUNC_UNSIGNED) {
1953     loadConstantDouble(two_31 * 2, fpscratch);
1954     branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
1955                  &isOverflow);
1956     loadConstantDouble(-1.0, fpscratch);
1957     branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
1958   } else {
1959     loadConstantDouble(two_31, fpscratch);
1960     branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
1961                  &isOverflow);
1962     loadConstantDouble(-two_31 - 1, fpscratch);
1963     branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
1964   }
1965   bind(&isOverflow);
1966   wasmTrap(wasm::Trap::IntegerOverflow, off);
1967 }
1968 
oolWasmTruncateCheckF32ToI64(FloatRegister input,Register64 output,TruncFlags flags,wasm::BytecodeOffset off,Label * rejoin)1969 void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input,
1970                                                   Register64 output,
1971                                                   TruncFlags flags,
1972                                                   wasm::BytecodeOffset off,
1973                                                   Label* rejoin) {
1974   Label notNaN;
1975   branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
1976   wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
1977   bind(&notNaN);
1978 
1979   Label isOverflow;
1980   const float two_63 = -float(INT64_MIN);
1981   ScratchFloat32Scope fpscratch(*this);
1982   if (flags & TRUNC_UNSIGNED) {
1983     loadConstantFloat32(two_63 * 2, fpscratch);
1984     branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
1985                 &isOverflow);
1986     loadConstantFloat32(-1.0f, fpscratch);
1987     branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
1988   } else {
1989     loadConstantFloat32(two_63, fpscratch);
1990     branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
1991                 &isOverflow);
1992     loadConstantFloat32(-two_63, fpscratch);
1993     branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
1994   }
1995   bind(&isOverflow);
1996   wasmTrap(wasm::Trap::IntegerOverflow, off);
1997 }
1998 
oolWasmTruncateCheckF64ToI64(FloatRegister input,Register64 output,TruncFlags flags,wasm::BytecodeOffset off,Label * rejoin)1999 void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input,
2000                                                   Register64 output,
2001                                                   TruncFlags flags,
2002                                                   wasm::BytecodeOffset off,
2003                                                   Label* rejoin) {
2004   Label notNaN;
2005   branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
2006   wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
2007   bind(&notNaN);
2008 
2009   Label isOverflow;
2010   const double two_63 = -double(INT64_MIN);
2011   ScratchDoubleScope fpscratch(*this);
2012   if (flags & TRUNC_UNSIGNED) {
2013     loadConstantDouble(two_63 * 2, fpscratch);
2014     branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
2015                  &isOverflow);
2016     loadConstantDouble(-1.0, fpscratch);
2017     branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
2018   } else {
2019     loadConstantDouble(two_63, fpscratch);
2020     branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
2021                  &isOverflow);
2022     loadConstantDouble(-two_63, fpscratch);
2023     branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
2024   }
2025   bind(&isOverflow);
2026   wasmTrap(wasm::Trap::IntegerOverflow, off);
2027 }
2028 
wasmLoad(const wasm::MemoryAccessDesc & access,Register memoryBase,Register ptr,AnyRegister output)2029 void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
2030                               Register memoryBase, Register ptr,
2031                               AnyRegister output) {
2032   wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid());
2033 }
2034 
wasmLoadI64(const wasm::MemoryAccessDesc & access,Register memoryBase,Register ptr,Register64 output)2035 void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
2036                                  Register memoryBase, Register ptr,
2037                                  Register64 output) {
2038   wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output);
2039 }
2040 
wasmStore(const wasm::MemoryAccessDesc & access,AnyRegister value,Register memoryBase,Register ptr)2041 void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
2042                                AnyRegister value, Register memoryBase,
2043                                Register ptr) {
2044   wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr);
2045 }
2046 
wasmStoreI64(const wasm::MemoryAccessDesc & access,Register64 value,Register memoryBase,Register ptr)2047 void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
2048                                   Register64 value, Register memoryBase,
2049                                   Register ptr) {
2050   wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr);
2051 }
2052 
enterFakeExitFrameForWasm(Register cxreg,Register scratch,ExitFrameType type)2053 void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
2054                                                ExitFrameType type) {
2055   // Wasm stubs use the native SP, not the PSP.  Setting up the fake exit
2056   // frame leaves the SP mis-aligned, which is how we want it, but we must do
2057   // that carefully.
2058 
2059   linkExitFrame(cxreg, scratch);
2060 
2061   MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64()));
2062 
2063   const ARMRegister tmp(scratch, 64);
2064 
2065   vixl::UseScratchRegisterScope temps(this);
2066   const ARMRegister tmp2 = temps.AcquireX();
2067 
2068   Sub(sp, sp, 8);
2069 
2070   // Despite the above assertion, it is possible for control to flow from here
2071   // to the code generated by
2072   // MacroAssemblerCompat::handleFailureWithHandlerTail without any
2073   // intervening assignment to PSP.  But handleFailureWithHandlerTail assumes
2074   // that PSP is the active stack pointer.  Hence the following is necessary
2075   // for safety.  Note we can't use initPseudoStackPtr here as that would
2076   // generate no instructions.
2077   Mov(PseudoStackPointer64, sp);
2078 
2079   Mov(tmp, sp);  // SP may be unaligned, can't use it for memory op
2080   Mov(tmp2, int32_t(type));
2081   Str(tmp2, vixl::MemOperand(tmp, 0));
2082 }
2083 
2084 // ========================================================================
2085 // Convert floating point.
2086 
convertUInt64ToDoubleNeedsTemp()2087 bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
2088 
convertUInt64ToDouble(Register64 src,FloatRegister dest,Register temp)2089 void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
2090                                            Register temp) {
2091   MOZ_ASSERT(temp == Register::Invalid());
2092   Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
2093 }
2094 
convertInt64ToDouble(Register64 src,FloatRegister dest)2095 void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
2096   Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
2097 }
2098 
convertUInt64ToFloat32(Register64 src,FloatRegister dest,Register temp)2099 void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest,
2100                                             Register temp) {
2101   MOZ_ASSERT(temp == Register::Invalid());
2102   Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
2103 }
2104 
convertInt64ToFloat32(Register64 src,FloatRegister dest)2105 void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) {
2106   Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
2107 }
2108 
convertIntPtrToDouble(Register src,FloatRegister dest)2109 void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
2110   convertInt64ToDouble(Register64(src), dest);
2111 }
2112 
2113 // ========================================================================
2114 // Primitive atomic operations.
2115 
2116 // The computed MemOperand must be Reg+0 because the load/store exclusive
2117 // instructions only take a single pointer register.
2118 
ComputePointerForAtomic(MacroAssembler & masm,const Address & address,Register scratch)2119 static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
2120                                           const Address& address,
2121                                           Register scratch) {
2122   if (address.offset == 0) {
2123     return MemOperand(X(masm, address.base), 0);
2124   }
2125 
2126   masm.Add(X(scratch), X(masm, address.base), address.offset);
2127   return MemOperand(X(scratch), 0);
2128 }
2129 
ComputePointerForAtomic(MacroAssembler & masm,const BaseIndex & address,Register scratch)2130 static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
2131                                           const BaseIndex& address,
2132                                           Register scratch) {
2133   masm.Add(X(scratch), X(masm, address.base),
2134            Operand(X(address.index), vixl::LSL, address.scale));
2135   if (address.offset) {
2136     masm.Add(X(scratch), X(scratch), address.offset);
2137   }
2138   return MemOperand(X(scratch), 0);
2139 }
2140 
2141 // This sign extends to targetWidth and leaves any higher bits zero.
2142 
SignOrZeroExtend(MacroAssembler & masm,Scalar::Type srcType,Width targetWidth,Register src,Register dest)2143 static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType,
2144                              Width targetWidth, Register src, Register dest) {
2145   bool signExtend = Scalar::isSignedIntType(srcType);
2146 
2147   switch (Scalar::byteSize(srcType)) {
2148     case 1:
2149       if (signExtend) {
2150         masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
2151       } else {
2152         masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
2153       }
2154       break;
2155     case 2:
2156       if (signExtend) {
2157         masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
2158       } else {
2159         masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
2160       }
2161       break;
2162     case 4:
2163       if (targetWidth == Width::_64) {
2164         if (signExtend) {
2165           masm.Sbfm(X(dest), X(src), 0, 31);
2166         } else {
2167           masm.Ubfm(X(dest), X(src), 0, 31);
2168         }
2169       } else if (src != dest) {
2170         masm.Mov(R(dest, targetWidth), R(src, targetWidth));
2171       }
2172       break;
2173     case 8:
2174       if (src != dest) {
2175         masm.Mov(R(dest, targetWidth), R(src, targetWidth));
2176       }
2177       break;
2178     default:
2179       MOZ_CRASH();
2180   }
2181 }
2182 
2183 // Exclusive-loads zero-extend their values to the full width of the X register.
2184 //
2185 // Note, we've promised to leave the high bits of the 64-bit register clear if
2186 // the targetWidth is 32.
2187 
LoadExclusive(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,Scalar::Type srcType,Width targetWidth,MemOperand ptr,Register dest)2188 static void LoadExclusive(MacroAssembler& masm,
2189                           const wasm::MemoryAccessDesc* access,
2190                           Scalar::Type srcType, Width targetWidth,
2191                           MemOperand ptr, Register dest) {
2192   bool signExtend = Scalar::isSignedIntType(srcType);
2193 
2194   // With this address form, a single native ldxr* will be emitted, and the
2195   // AutoForbidPoolsAndNops ensures that the metadata is emitted at the address
2196   // of the ldxr*.
2197   MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0);
2198 
2199   switch (Scalar::byteSize(srcType)) {
2200     case 1: {
2201       {
2202         AutoForbidPoolsAndNops afp(
2203             &masm,
2204             /* max number of instructions in scope = */ 1);
2205         if (access) {
2206           masm.append(*access, masm.currentOffset());
2207         }
2208         masm.Ldxrb(W(dest), ptr);
2209       }
2210       if (signExtend) {
2211         masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7);
2212       }
2213       break;
2214     }
2215     case 2: {
2216       {
2217         AutoForbidPoolsAndNops afp(
2218             &masm,
2219             /* max number of instructions in scope = */ 1);
2220         if (access) {
2221           masm.append(*access, masm.currentOffset());
2222         }
2223         masm.Ldxrh(W(dest), ptr);
2224       }
2225       if (signExtend) {
2226         masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15);
2227       }
2228       break;
2229     }
2230     case 4: {
2231       {
2232         AutoForbidPoolsAndNops afp(
2233             &masm,
2234             /* max number of instructions in scope = */ 1);
2235         if (access) {
2236           masm.append(*access, masm.currentOffset());
2237         }
2238         masm.Ldxr(W(dest), ptr);
2239       }
2240       if (targetWidth == Width::_64 && signExtend) {
2241         masm.Sbfm(X(dest), X(dest), 0, 31);
2242       }
2243       break;
2244     }
2245     case 8: {
2246       {
2247         AutoForbidPoolsAndNops afp(
2248             &masm,
2249             /* max number of instructions in scope = */ 1);
2250         if (access) {
2251           masm.append(*access, masm.currentOffset());
2252         }
2253         masm.Ldxr(X(dest), ptr);
2254       }
2255       break;
2256     }
2257     default: {
2258       MOZ_CRASH();
2259     }
2260   }
2261 }
2262 
StoreExclusive(MacroAssembler & masm,Scalar::Type type,Register status,Register src,MemOperand ptr)2263 static void StoreExclusive(MacroAssembler& masm, Scalar::Type type,
2264                            Register status, Register src, MemOperand ptr) {
2265   switch (Scalar::byteSize(type)) {
2266     case 1:
2267       masm.Stxrb(W(status), W(src), ptr);
2268       break;
2269     case 2:
2270       masm.Stxrh(W(status), W(src), ptr);
2271       break;
2272     case 4:
2273       masm.Stxr(W(status), W(src), ptr);
2274       break;
2275     case 8:
2276       masm.Stxr(W(status), X(src), ptr);
2277       break;
2278   }
2279 }
2280 
2281 template <typename T>
CompareExchange(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,Scalar::Type type,Width targetWidth,const Synchronization & sync,const T & mem,Register oldval,Register newval,Register output)2282 static void CompareExchange(MacroAssembler& masm,
2283                             const wasm::MemoryAccessDesc* access,
2284                             Scalar::Type type, Width targetWidth,
2285                             const Synchronization& sync, const T& mem,
2286                             Register oldval, Register newval, Register output) {
2287   MOZ_ASSERT(oldval != output && newval != output);
2288 
2289   Label again;
2290   Label done;
2291 
2292   vixl::UseScratchRegisterScope temps(&masm);
2293 
2294   Register scratch2 = temps.AcquireX().asUnsized();
2295   MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2);
2296 
2297   MOZ_ASSERT(ptr.base().asUnsized() != output);
2298 
2299   masm.memoryBarrierBefore(sync);
2300 
2301   Register scratch = temps.AcquireX().asUnsized();
2302 
2303   masm.bind(&again);
2304   SignOrZeroExtend(masm, type, targetWidth, oldval, scratch);
2305   LoadExclusive(masm, access, type, targetWidth, ptr, output);
2306   masm.Cmp(R(output, targetWidth), R(scratch, targetWidth));
2307   masm.B(&done, MacroAssembler::NotEqual);
2308   StoreExclusive(masm, type, scratch, newval, ptr);
2309   masm.Cbnz(W(scratch), &again);
2310   masm.bind(&done);
2311 
2312   masm.memoryBarrierAfter(sync);
2313 }
2314 
2315 template <typename T>
AtomicExchange(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,Scalar::Type type,Width targetWidth,const Synchronization & sync,const T & mem,Register value,Register output)2316 static void AtomicExchange(MacroAssembler& masm,
2317                            const wasm::MemoryAccessDesc* access,
2318                            Scalar::Type type, Width targetWidth,
2319                            const Synchronization& sync, const T& mem,
2320                            Register value, Register output) {
2321   MOZ_ASSERT(value != output);
2322 
2323   Label again;
2324 
2325   vixl::UseScratchRegisterScope temps(&masm);
2326 
2327   Register scratch2 = temps.AcquireX().asUnsized();
2328   MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2);
2329 
2330   masm.memoryBarrierBefore(sync);
2331 
2332   Register scratch = temps.AcquireX().asUnsized();
2333 
2334   masm.bind(&again);
2335   LoadExclusive(masm, access, type, targetWidth, ptr, output);
2336   StoreExclusive(masm, type, scratch, value, ptr);
2337   masm.Cbnz(W(scratch), &again);
2338 
2339   masm.memoryBarrierAfter(sync);
2340 }
2341 
2342 template <bool wantResult, typename T>
AtomicFetchOp(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,Scalar::Type type,Width targetWidth,const Synchronization & sync,AtomicOp op,const T & mem,Register value,Register temp,Register output)2343 static void AtomicFetchOp(MacroAssembler& masm,
2344                           const wasm::MemoryAccessDesc* access,
2345                           Scalar::Type type, Width targetWidth,
2346                           const Synchronization& sync, AtomicOp op,
2347                           const T& mem, Register value, Register temp,
2348                           Register output) {
2349   MOZ_ASSERT(value != output);
2350   MOZ_ASSERT(value != temp);
2351   MOZ_ASSERT_IF(wantResult, output != temp);
2352 
2353   Label again;
2354 
2355   vixl::UseScratchRegisterScope temps(&masm);
2356 
2357   Register scratch2 = temps.AcquireX().asUnsized();
2358   MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2);
2359 
2360   masm.memoryBarrierBefore(sync);
2361 
2362   Register scratch = temps.AcquireX().asUnsized();
2363 
2364   masm.bind(&again);
2365   LoadExclusive(masm, access, type, targetWidth, ptr, output);
2366   switch (op) {
2367     case AtomicFetchAddOp:
2368       masm.Add(X(temp), X(output), X(value));
2369       break;
2370     case AtomicFetchSubOp:
2371       masm.Sub(X(temp), X(output), X(value));
2372       break;
2373     case AtomicFetchAndOp:
2374       masm.And(X(temp), X(output), X(value));
2375       break;
2376     case AtomicFetchOrOp:
2377       masm.Orr(X(temp), X(output), X(value));
2378       break;
2379     case AtomicFetchXorOp:
2380       masm.Eor(X(temp), X(output), X(value));
2381       break;
2382   }
2383   StoreExclusive(masm, type, scratch, temp, ptr);
2384   masm.Cbnz(W(scratch), &again);
2385   if (wantResult) {
2386     SignOrZeroExtend(masm, type, targetWidth, output, output);
2387   }
2388 
2389   masm.memoryBarrierAfter(sync);
2390 }
2391 
compareExchange(Scalar::Type type,const Synchronization & sync,const Address & mem,Register oldval,Register newval,Register output)2392 void MacroAssembler::compareExchange(Scalar::Type type,
2393                                      const Synchronization& sync,
2394                                      const Address& mem, Register oldval,
2395                                      Register newval, Register output) {
2396   CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
2397                   output);
2398 }
2399 
compareExchange(Scalar::Type type,const Synchronization & sync,const BaseIndex & mem,Register oldval,Register newval,Register output)2400 void MacroAssembler::compareExchange(Scalar::Type type,
2401                                      const Synchronization& sync,
2402                                      const BaseIndex& mem, Register oldval,
2403                                      Register newval, Register output) {
2404   CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
2405                   output);
2406 }
2407 
compareExchange64(const Synchronization & sync,const Address & mem,Register64 expect,Register64 replace,Register64 output)2408 void MacroAssembler::compareExchange64(const Synchronization& sync,
2409                                        const Address& mem, Register64 expect,
2410                                        Register64 replace, Register64 output) {
2411   CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
2412                   expect.reg, replace.reg, output.reg);
2413 }
2414 
compareExchange64(const Synchronization & sync,const BaseIndex & mem,Register64 expect,Register64 replace,Register64 output)2415 void MacroAssembler::compareExchange64(const Synchronization& sync,
2416                                        const BaseIndex& mem, Register64 expect,
2417                                        Register64 replace, Register64 output) {
2418   CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
2419                   expect.reg, replace.reg, output.reg);
2420 }
2421 
atomicExchange64(const Synchronization & sync,const Address & mem,Register64 value,Register64 output)2422 void MacroAssembler::atomicExchange64(const Synchronization& sync,
2423                                       const Address& mem, Register64 value,
2424                                       Register64 output) {
2425   AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
2426                  value.reg, output.reg);
2427 }
2428 
atomicExchange64(const Synchronization & sync,const BaseIndex & mem,Register64 value,Register64 output)2429 void MacroAssembler::atomicExchange64(const Synchronization& sync,
2430                                       const BaseIndex& mem, Register64 value,
2431                                       Register64 output) {
2432   AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
2433                  value.reg, output.reg);
2434 }
2435 
atomicFetchOp64(const Synchronization & sync,AtomicOp op,Register64 value,const Address & mem,Register64 temp,Register64 output)2436 void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
2437                                      Register64 value, const Address& mem,
2438                                      Register64 temp, Register64 output) {
2439   AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
2440                       value.reg, temp.reg, output.reg);
2441 }
2442 
atomicFetchOp64(const Synchronization & sync,AtomicOp op,Register64 value,const BaseIndex & mem,Register64 temp,Register64 output)2443 void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
2444                                      Register64 value, const BaseIndex& mem,
2445                                      Register64 temp, Register64 output) {
2446   AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
2447                       value.reg, temp.reg, output.reg);
2448 }
2449 
atomicEffectOp64(const Synchronization & sync,AtomicOp op,Register64 value,const Address & mem,Register64 temp)2450 void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
2451                                       Register64 value, const Address& mem,
2452                                       Register64 temp) {
2453   AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
2454                        value.reg, temp.reg, temp.reg);
2455 }
2456 
atomicEffectOp64(const Synchronization & sync,AtomicOp op,Register64 value,const BaseIndex & mem,Register64 temp)2457 void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
2458                                       Register64 value, const BaseIndex& mem,
2459                                       Register64 temp) {
2460   AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
2461                        value.reg, temp.reg, temp.reg);
2462 }
2463 
wasmCompareExchange(const wasm::MemoryAccessDesc & access,const Address & mem,Register oldval,Register newval,Register output)2464 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
2465                                          const Address& mem, Register oldval,
2466                                          Register newval, Register output) {
2467   CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
2468                   oldval, newval, output);
2469 }
2470 
wasmCompareExchange(const wasm::MemoryAccessDesc & access,const BaseIndex & mem,Register oldval,Register newval,Register output)2471 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
2472                                          const BaseIndex& mem, Register oldval,
2473                                          Register newval, Register output) {
2474   CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
2475                   oldval, newval, output);
2476 }
2477 
atomicExchange(Scalar::Type type,const Synchronization & sync,const Address & mem,Register value,Register output)2478 void MacroAssembler::atomicExchange(Scalar::Type type,
2479                                     const Synchronization& sync,
2480                                     const Address& mem, Register value,
2481                                     Register output) {
2482   AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
2483 }
2484 
atomicExchange(Scalar::Type type,const Synchronization & sync,const BaseIndex & mem,Register value,Register output)2485 void MacroAssembler::atomicExchange(Scalar::Type type,
2486                                     const Synchronization& sync,
2487                                     const BaseIndex& mem, Register value,
2488                                     Register output) {
2489   AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
2490 }
2491 
wasmAtomicExchange(const wasm::MemoryAccessDesc & access,const Address & mem,Register value,Register output)2492 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
2493                                         const Address& mem, Register value,
2494                                         Register output) {
2495   AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
2496                  value, output);
2497 }
2498 
wasmAtomicExchange(const wasm::MemoryAccessDesc & access,const BaseIndex & mem,Register value,Register output)2499 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
2500                                         const BaseIndex& mem, Register value,
2501                                         Register output) {
2502   AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
2503                  value, output);
2504 }
2505 
atomicFetchOp(Scalar::Type type,const Synchronization & sync,AtomicOp op,Register value,const Address & mem,Register temp,Register output)2506 void MacroAssembler::atomicFetchOp(Scalar::Type type,
2507                                    const Synchronization& sync, AtomicOp op,
2508                                    Register value, const Address& mem,
2509                                    Register temp, Register output) {
2510   AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
2511                       temp, output);
2512 }
2513 
atomicFetchOp(Scalar::Type type,const Synchronization & sync,AtomicOp op,Register value,const BaseIndex & mem,Register temp,Register output)2514 void MacroAssembler::atomicFetchOp(Scalar::Type type,
2515                                    const Synchronization& sync, AtomicOp op,
2516                                    Register value, const BaseIndex& mem,
2517                                    Register temp, Register output) {
2518   AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
2519                       temp, output);
2520 }
2521 
wasmAtomicFetchOp(const wasm::MemoryAccessDesc & access,AtomicOp op,Register value,const Address & mem,Register temp,Register output)2522 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
2523                                        AtomicOp op, Register value,
2524                                        const Address& mem, Register temp,
2525                                        Register output) {
2526   AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
2527                       op, mem, value, temp, output);
2528 }
2529 
wasmAtomicFetchOp(const wasm::MemoryAccessDesc & access,AtomicOp op,Register value,const BaseIndex & mem,Register temp,Register output)2530 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
2531                                        AtomicOp op, Register value,
2532                                        const BaseIndex& mem, Register temp,
2533                                        Register output) {
2534   AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
2535                       op, mem, value, temp, output);
2536 }
2537 
wasmAtomicEffectOp(const wasm::MemoryAccessDesc & access,AtomicOp op,Register value,const Address & mem,Register temp)2538 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
2539                                         AtomicOp op, Register value,
2540                                         const Address& mem, Register temp) {
2541   AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
2542                        op, mem, value, temp, temp);
2543 }
2544 
wasmAtomicEffectOp(const wasm::MemoryAccessDesc & access,AtomicOp op,Register value,const BaseIndex & mem,Register temp)2545 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
2546                                         AtomicOp op, Register value,
2547                                         const BaseIndex& mem, Register temp) {
2548   AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
2549                        op, mem, value, temp, temp);
2550 }
2551 
wasmCompareExchange64(const wasm::MemoryAccessDesc & access,const Address & mem,Register64 expect,Register64 replace,Register64 output)2552 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
2553                                            const Address& mem,
2554                                            Register64 expect,
2555                                            Register64 replace,
2556                                            Register64 output) {
2557   CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
2558                   expect.reg, replace.reg, output.reg);
2559 }
2560 
wasmCompareExchange64(const wasm::MemoryAccessDesc & access,const BaseIndex & mem,Register64 expect,Register64 replace,Register64 output)2561 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
2562                                            const BaseIndex& mem,
2563                                            Register64 expect,
2564                                            Register64 replace,
2565                                            Register64 output) {
2566   CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
2567                   expect.reg, replace.reg, output.reg);
2568 }
2569 
wasmAtomicExchange64(const wasm::MemoryAccessDesc & access,const Address & mem,Register64 value,Register64 output)2570 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
2571                                           const Address& mem, Register64 value,
2572                                           Register64 output) {
2573   AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
2574                  value.reg, output.reg);
2575 }
2576 
wasmAtomicExchange64(const wasm::MemoryAccessDesc & access,const BaseIndex & mem,Register64 value,Register64 output)2577 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
2578                                           const BaseIndex& mem,
2579                                           Register64 value, Register64 output) {
2580   AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
2581                  value.reg, output.reg);
2582 }
2583 
wasmAtomicFetchOp64(const wasm::MemoryAccessDesc & access,AtomicOp op,Register64 value,const Address & mem,Register64 temp,Register64 output)2584 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
2585                                          AtomicOp op, Register64 value,
2586                                          const Address& mem, Register64 temp,
2587                                          Register64 output) {
2588   AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
2589                       op, mem, value.reg, temp.reg, output.reg);
2590 }
2591 
wasmAtomicFetchOp64(const wasm::MemoryAccessDesc & access,AtomicOp op,Register64 value,const BaseIndex & mem,Register64 temp,Register64 output)2592 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
2593                                          AtomicOp op, Register64 value,
2594                                          const BaseIndex& mem, Register64 temp,
2595                                          Register64 output) {
2596   AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
2597                       op, mem, value.reg, temp.reg, output.reg);
2598 }
2599 
wasmAtomicEffectOp64(const wasm::MemoryAccessDesc & access,AtomicOp op,Register64 value,const BaseIndex & mem,Register64 temp)2600 void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access,
2601                                           AtomicOp op, Register64 value,
2602                                           const BaseIndex& mem,
2603                                           Register64 temp) {
2604   AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
2605                        op, mem, value.reg, temp.reg, temp.reg);
2606 }
2607 
2608 // ========================================================================
2609 // JS atomic operations.
2610 
2611 template <typename T>
CompareExchangeJS(MacroAssembler & masm,Scalar::Type arrayType,const Synchronization & sync,const T & mem,Register oldval,Register newval,Register temp,AnyRegister output)2612 static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
2613                               const Synchronization& sync, const T& mem,
2614                               Register oldval, Register newval, Register temp,
2615                               AnyRegister output) {
2616   if (arrayType == Scalar::Uint32) {
2617     masm.compareExchange(arrayType, sync, mem, oldval, newval, temp);
2618     masm.convertUInt32ToDouble(temp, output.fpu());
2619   } else {
2620     masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr());
2621   }
2622 }
2623 
compareExchangeJS(Scalar::Type arrayType,const Synchronization & sync,const Address & mem,Register oldval,Register newval,Register temp,AnyRegister output)2624 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
2625                                        const Synchronization& sync,
2626                                        const Address& mem, Register oldval,
2627                                        Register newval, Register temp,
2628                                        AnyRegister output) {
2629   CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
2630 }
2631 
compareExchangeJS(Scalar::Type arrayType,const Synchronization & sync,const BaseIndex & mem,Register oldval,Register newval,Register temp,AnyRegister output)2632 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
2633                                        const Synchronization& sync,
2634                                        const BaseIndex& mem, Register oldval,
2635                                        Register newval, Register temp,
2636                                        AnyRegister output) {
2637   CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
2638 }
2639 
2640 template <typename T>
AtomicExchangeJS(MacroAssembler & masm,Scalar::Type arrayType,const Synchronization & sync,const T & mem,Register value,Register temp,AnyRegister output)2641 static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
2642                              const Synchronization& sync, const T& mem,
2643                              Register value, Register temp,
2644                              AnyRegister output) {
2645   if (arrayType == Scalar::Uint32) {
2646     masm.atomicExchange(arrayType, sync, mem, value, temp);
2647     masm.convertUInt32ToDouble(temp, output.fpu());
2648   } else {
2649     masm.atomicExchange(arrayType, sync, mem, value, output.gpr());
2650   }
2651 }
2652 
atomicExchangeJS(Scalar::Type arrayType,const Synchronization & sync,const Address & mem,Register value,Register temp,AnyRegister output)2653 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
2654                                       const Synchronization& sync,
2655                                       const Address& mem, Register value,
2656                                       Register temp, AnyRegister output) {
2657   AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
2658 }
2659 
atomicExchangeJS(Scalar::Type arrayType,const Synchronization & sync,const BaseIndex & mem,Register value,Register temp,AnyRegister output)2660 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
2661                                       const Synchronization& sync,
2662                                       const BaseIndex& mem, Register value,
2663                                       Register temp, AnyRegister output) {
2664   AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
2665 }
2666 
2667 template <typename T>
AtomicFetchOpJS(MacroAssembler & masm,Scalar::Type arrayType,const Synchronization & sync,AtomicOp op,Register value,const T & mem,Register temp1,Register temp2,AnyRegister output)2668 static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
2669                             const Synchronization& sync, AtomicOp op,
2670                             Register value, const T& mem, Register temp1,
2671                             Register temp2, AnyRegister output) {
2672   if (arrayType == Scalar::Uint32) {
2673     masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1);
2674     masm.convertUInt32ToDouble(temp1, output.fpu());
2675   } else {
2676     masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr());
2677   }
2678 }
2679 
atomicFetchOpJS(Scalar::Type arrayType,const Synchronization & sync,AtomicOp op,Register value,const Address & mem,Register temp1,Register temp2,AnyRegister output)2680 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
2681                                      const Synchronization& sync, AtomicOp op,
2682                                      Register value, const Address& mem,
2683                                      Register temp1, Register temp2,
2684                                      AnyRegister output) {
2685   AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
2686 }
2687 
atomicFetchOpJS(Scalar::Type arrayType,const Synchronization & sync,AtomicOp op,Register value,const BaseIndex & mem,Register temp1,Register temp2,AnyRegister output)2688 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
2689                                      const Synchronization& sync, AtomicOp op,
2690                                      Register value, const BaseIndex& mem,
2691                                      Register temp1, Register temp2,
2692                                      AnyRegister output) {
2693   AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
2694 }
2695 
atomicEffectOpJS(Scalar::Type arrayType,const Synchronization & sync,AtomicOp op,Register value,const BaseIndex & mem,Register temp)2696 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
2697                                       const Synchronization& sync, AtomicOp op,
2698                                       Register value, const BaseIndex& mem,
2699                                       Register temp) {
2700   AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
2701                        value, temp, temp);
2702 }
2703 
atomicEffectOpJS(Scalar::Type arrayType,const Synchronization & sync,AtomicOp op,Register value,const Address & mem,Register temp)2704 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
2705                                       const Synchronization& sync, AtomicOp op,
2706                                       Register value, const Address& mem,
2707                                       Register temp) {
2708   AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
2709                        value, temp, temp);
2710 }
2711 
flexibleQuotient32(Register rhs,Register srcDest,bool isUnsigned,const LiveRegisterSet &)2712 void MacroAssembler::flexibleQuotient32(Register rhs, Register srcDest,
2713                                         bool isUnsigned,
2714                                         const LiveRegisterSet&) {
2715   quotient32(rhs, srcDest, isUnsigned);
2716 }
2717 
flexibleRemainder32(Register rhs,Register srcDest,bool isUnsigned,const LiveRegisterSet &)2718 void MacroAssembler::flexibleRemainder32(Register rhs, Register srcDest,
2719                                          bool isUnsigned,
2720                                          const LiveRegisterSet&) {
2721   remainder32(rhs, srcDest, isUnsigned);
2722 }
2723 
flexibleDivMod32(Register rhs,Register srcDest,Register remOutput,bool isUnsigned,const LiveRegisterSet &)2724 void MacroAssembler::flexibleDivMod32(Register rhs, Register srcDest,
2725                                       Register remOutput, bool isUnsigned,
2726                                       const LiveRegisterSet&) {
2727   vixl::UseScratchRegisterScope temps(this);
2728   ARMRegister scratch = temps.AcquireW();
2729   ARMRegister src = temps.AcquireW();
2730 
2731   // Preserve src for remainder computation
2732   Mov(src, ARMRegister(srcDest, 32));
2733 
2734   if (isUnsigned) {
2735     Udiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
2736   } else {
2737     Sdiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
2738   }
2739   // Compute remainder
2740   Mul(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
2741   Sub(ARMRegister(remOutput, 32), src, scratch);
2742 }
2743 
moveNearAddressWithPatch(Register dest)2744 CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
2745   AutoForbidPoolsAndNops afp(this,
2746                              /* max number of instructions in scope = */ 1);
2747   CodeOffset offset(currentOffset());
2748   adr(ARMRegister(dest, 64), 0, LabelDoc());
2749   return offset;
2750 }
2751 
patchNearAddressMove(CodeLocationLabel loc,CodeLocationLabel target)2752 void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
2753                                           CodeLocationLabel target) {
2754   ptrdiff_t off = target - loc;
2755   MOZ_RELEASE_ASSERT(vixl::IsInt21(off));
2756 
2757   Instruction* cur = reinterpret_cast<Instruction*>(loc.raw());
2758   MOZ_ASSERT(cur->IsADR());
2759 
2760   vixl::Register rd = vixl::Register::XRegFromCode(cur->Rd());
2761   adr(cur, rd, off);
2762 }
2763 
2764 // ========================================================================
2765 // Spectre Mitigations.
2766 
speculationBarrier()2767 void MacroAssembler::speculationBarrier() {
2768   // Conditional speculation barrier.
2769   csdb();
2770 }
2771 
floorFloat32ToInt32(FloatRegister src,Register dest,Label * fail)2772 void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
2773                                          Label* fail) {
2774   floorf(src, dest, fail);
2775 }
2776 
floorDoubleToInt32(FloatRegister src,Register dest,Label * fail)2777 void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
2778                                         Label* fail) {
2779   floor(src, dest, fail);
2780 }
2781 
ceilFloat32ToInt32(FloatRegister src,Register dest,Label * fail)2782 void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
2783                                         Label* fail) {
2784   ceilf(src, dest, fail);
2785 }
2786 
ceilDoubleToInt32(FloatRegister src,Register dest,Label * fail)2787 void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
2788                                        Label* fail) {
2789   ceil(src, dest, fail);
2790 }
2791 
truncFloat32ToInt32(FloatRegister src,Register dest,Label * fail)2792 void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
2793                                          Label* fail) {
2794   const ARMFPRegister src32(src, 32);
2795 
2796   Label done, zeroCase;
2797 
2798   // Convert scalar to signed 32-bit fixed-point, rounding toward zero.
2799   // In the case of overflow, the output is saturated.
2800   // In the case of NaN and -0, the output is zero.
2801   Fcvtzs(ARMRegister(dest, 32), src32);
2802 
2803   // If the output was zero, worry about special cases.
2804   branch32(Assembler::Equal, dest, Imm32(0), &zeroCase);
2805 
2806   // Fail on overflow cases.
2807   branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail);
2808   branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail);
2809 
2810   // If the output was non-zero and wasn't saturated, just return it.
2811   jump(&done);
2812 
2813   // Handle the case of a zero output:
2814   // 1. The input may have been NaN, requiring a failure.
2815   // 2. The input may have been in (-1,-0], requiring a failure.
2816   {
2817     bind(&zeroCase);
2818 
2819     // If input is a negative number that truncated to zero, the real
2820     // output should be the non-integer -0.
2821     // The use of "lt" instead of "lo" also catches unordered NaN input.
2822     Fcmp(src32, 0.0f);
2823     B(fail, vixl::lt);
2824 
2825     // Check explicitly for -0, bitwise.
2826     Fmov(ARMRegister(dest, 32), src32);
2827     branchTest32(Assembler::Signed, dest, dest, fail);
2828     move32(Imm32(0), dest);
2829   }
2830 
2831   bind(&done);
2832 }
2833 
truncDoubleToInt32(FloatRegister src,Register dest,Label * fail)2834 void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
2835                                         Label* fail) {
2836   const ARMFPRegister src64(src, 64);
2837 
2838   Label done, zeroCase;
2839 
2840   // Convert scalar to signed 32-bit fixed-point, rounding toward zero.
2841   // In the case of overflow, the output is saturated.
2842   // In the case of NaN and -0, the output is zero.
2843   Fcvtzs(ARMRegister(dest, 32), src64);
2844 
2845   // If the output was zero, worry about special cases.
2846   branch32(Assembler::Equal, dest, Imm32(0), &zeroCase);
2847 
2848   // Fail on overflow cases.
2849   branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail);
2850   branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail);
2851 
2852   // If the output was non-zero and wasn't saturated, just return it.
2853   jump(&done);
2854 
2855   // Handle the case of a zero output:
2856   // 1. The input may have been NaN, requiring a failure.
2857   // 2. The input may have been in (-1,-0], requiring a failure.
2858   {
2859     bind(&zeroCase);
2860 
2861     // If input is a negative number that truncated to zero, the real
2862     // output should be the non-integer -0.
2863     // The use of "lt" instead of "lo" also catches unordered NaN input.
2864     Fcmp(src64, 0.0);
2865     B(fail, vixl::lt);
2866 
2867     // Check explicitly for -0, bitwise.
2868     Fmov(ARMRegister(dest, 64), src64);
2869     branchTestPtr(Assembler::Signed, dest, dest, fail);
2870     movePtr(ImmPtr(0), dest);
2871   }
2872 
2873   bind(&done);
2874 }
2875 
roundFloat32ToInt32(FloatRegister src,Register dest,FloatRegister temp,Label * fail)2876 void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
2877                                          FloatRegister temp, Label* fail) {
2878   const ARMFPRegister src32(src, 32);
2879   ScratchFloat32Scope scratch(*this);
2880 
2881   Label negative, done;
2882 
2883   // Branch to a slow path if input < 0.0 due to complicated rounding rules.
2884   // Note that Fcmp with NaN unsets the negative flag.
2885   Fcmp(src32, 0.0);
2886   B(&negative, Assembler::Condition::lo);
2887 
2888   // Handle the simple case of a positive input, and also -0 and NaN.
2889   // Rounding proceeds with consideration of the fractional part of the input:
2890   // 1. If > 0.5, round to integer with higher absolute value (so, up).
2891   // 2. If < 0.5, round to integer with lower absolute value (so, down).
2892   // 3. If = 0.5, round to +Infinity (so, up).
2893   {
2894     // Convert to signed 32-bit integer, rounding halfway cases away from zero.
2895     // In the case of overflow, the output is saturated.
2896     // In the case of NaN and -0, the output is zero.
2897     Fcvtas(ARMRegister(dest, 32), src32);
2898     // If the output potentially saturated, fail.
2899     branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail);
2900 
2901     // If the result of the rounding was non-zero, return the output.
2902     // In the case of zero, the input may have been NaN or -0, which must bail.
2903     branch32(Assembler::NotEqual, dest, Imm32(0), &done);
2904     {
2905       // If input is NaN, comparisons set the C and V bits of the NZCV flags.
2906       Fcmp(src32, 0.0f);
2907       B(fail, Assembler::Overflow);
2908 
2909       // Move all 32 bits of the input into a scratch register to check for -0.
2910       vixl::UseScratchRegisterScope temps(this);
2911       const ARMRegister scratchGPR32 = temps.AcquireW();
2912       Fmov(scratchGPR32, src32);
2913       Cmp(scratchGPR32, vixl::Operand(uint32_t(0x80000000)));
2914       B(fail, Assembler::Equal);
2915     }
2916 
2917     jump(&done);
2918   }
2919 
2920   // Handle the complicated case of a negative input.
2921   // Rounding proceeds with consideration of the fractional part of the input:
2922   // 1. If > 0.5, round to integer with higher absolute value (so, down).
2923   // 2. If < 0.5, round to integer with lower absolute value (so, up).
2924   // 3. If = 0.5, round to +Infinity (so, up).
2925   bind(&negative);
2926   {
2927     // Inputs in [-0.5, 0) need 0.5 added; other negative inputs need
2928     // the biggest double less than 0.5.
2929     Label join;
2930     loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
2931     loadConstantFloat32(-0.5f, scratch);
2932     branchFloat(Assembler::DoubleLessThan, src, scratch, &join);
2933     loadConstantFloat32(0.5f, temp);
2934     bind(&join);
2935 
2936     addFloat32(src, temp);
2937     // Round all values toward -Infinity.
2938     // In the case of overflow, the output is saturated.
2939     // NaN and -0 are already handled by the "positive number" path above.
2940     Fcvtms(ARMRegister(dest, 32), temp);
2941     // If the output potentially saturated, fail.
2942     branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail);
2943 
2944     // If output is zero, then the actual result is -0. Fail.
2945     branchTest32(Assembler::Zero, dest, dest, fail);
2946   }
2947 
2948   bind(&done);
2949 }
2950 
roundDoubleToInt32(FloatRegister src,Register dest,FloatRegister temp,Label * fail)2951 void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
2952                                         FloatRegister temp, Label* fail) {
2953   const ARMFPRegister src64(src, 64);
2954   ScratchDoubleScope scratch(*this);
2955 
2956   Label negative, done;
2957 
2958   // Branch to a slow path if input < 0.0 due to complicated rounding rules.
2959   // Note that Fcmp with NaN unsets the negative flag.
2960   Fcmp(src64, 0.0);
2961   B(&negative, Assembler::Condition::lo);
2962 
2963   // Handle the simple case of a positive input, and also -0 and NaN.
2964   // Rounding proceeds with consideration of the fractional part of the input:
2965   // 1. If > 0.5, round to integer with higher absolute value (so, up).
2966   // 2. If < 0.5, round to integer with lower absolute value (so, down).
2967   // 3. If = 0.5, round to +Infinity (so, up).
2968   {
2969     // Convert to signed 32-bit integer, rounding halfway cases away from zero.
2970     // In the case of overflow, the output is saturated.
2971     // In the case of NaN and -0, the output is zero.
2972     Fcvtas(ARMRegister(dest, 32), src64);
2973     // If the output potentially saturated, fail.
2974     branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail);
2975 
2976     // If the result of the rounding was non-zero, return the output.
2977     // In the case of zero, the input may have been NaN or -0, which must bail.
2978     branch32(Assembler::NotEqual, dest, Imm32(0), &done);
2979     {
2980       // If input is NaN, comparisons set the C and V bits of the NZCV flags.
2981       Fcmp(src64, 0.0);
2982       B(fail, Assembler::Overflow);
2983 
2984       // Move all 64 bits of the input into a scratch register to check for -0.
2985       vixl::UseScratchRegisterScope temps(this);
2986       const ARMRegister scratchGPR64 = temps.AcquireX();
2987       Fmov(scratchGPR64, src64);
2988       Cmp(scratchGPR64, vixl::Operand(uint64_t(0x8000000000000000)));
2989       B(fail, Assembler::Equal);
2990     }
2991 
2992     jump(&done);
2993   }
2994 
2995   // Handle the complicated case of a negative input.
2996   // Rounding proceeds with consideration of the fractional part of the input:
2997   // 1. If > 0.5, round to integer with higher absolute value (so, down).
2998   // 2. If < 0.5, round to integer with lower absolute value (so, up).
2999   // 3. If = 0.5, round to +Infinity (so, up).
3000   bind(&negative);
3001   {
3002     // Inputs in [-0.5, 0) need 0.5 added; other negative inputs need
3003     // the biggest double less than 0.5.
3004     Label join;
3005     loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
3006     loadConstantDouble(-0.5, scratch);
3007     branchDouble(Assembler::DoubleLessThan, src, scratch, &join);
3008     loadConstantDouble(0.5, temp);
3009     bind(&join);
3010 
3011     addDouble(src, temp);
3012     // Round all values toward -Infinity.
3013     // In the case of overflow, the output is saturated.
3014     // NaN and -0 are already handled by the "positive number" path above.
3015     Fcvtms(ARMRegister(dest, 32), temp);
3016     // If the output potentially saturated, fail.
3017     branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail);
3018 
3019     // If output is zero, then the actual result is -0. Fail.
3020     branchTest32(Assembler::Zero, dest, dest, fail);
3021   }
3022 
3023   bind(&done);
3024 }
3025 
nearbyIntDouble(RoundingMode mode,FloatRegister src,FloatRegister dest)3026 void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
3027                                      FloatRegister dest) {
3028   switch (mode) {
3029     case RoundingMode::Up:
3030       frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
3031       return;
3032     case RoundingMode::Down:
3033       frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
3034       return;
3035     case RoundingMode::NearestTiesToEven:
3036       frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
3037       return;
3038     case RoundingMode::TowardsZero:
3039       frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
3040       return;
3041   }
3042   MOZ_CRASH("unexpected mode");
3043 }
3044 
nearbyIntFloat32(RoundingMode mode,FloatRegister src,FloatRegister dest)3045 void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
3046                                       FloatRegister dest) {
3047   switch (mode) {
3048     case RoundingMode::Up:
3049       frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
3050       return;
3051     case RoundingMode::Down:
3052       frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
3053       return;
3054     case RoundingMode::NearestTiesToEven:
3055       frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
3056       return;
3057     case RoundingMode::TowardsZero:
3058       frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
3059       return;
3060   }
3061   MOZ_CRASH("unexpected mode");
3062 }
3063 
copySignDouble(FloatRegister lhs,FloatRegister rhs,FloatRegister output)3064 void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
3065                                     FloatRegister output) {
3066   ScratchDoubleScope scratch(*this);
3067 
3068   // Double with only the sign bit set (= negative zero).
3069   loadConstantDouble(0, scratch);
3070   negateDouble(scratch);
3071 
3072   if (lhs != output) {
3073     moveDouble(lhs, output);
3074   }
3075 
3076   bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
3077       ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
3078       ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
3079 }
3080 
copySignFloat32(FloatRegister lhs,FloatRegister rhs,FloatRegister output)3081 void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
3082                                      FloatRegister output) {
3083   ScratchFloat32Scope scratch(*this);
3084 
3085   // Float with only the sign bit set (= negative zero).
3086   loadConstantFloat32(0, scratch);
3087   negateFloat(scratch);
3088 
3089   if (lhs != output) {
3090     moveFloat32(lhs, output);
3091   }
3092 
3093   bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
3094       ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
3095       ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
3096 }
3097 
3098 //}}} check_macroassembler_style
3099 
3100 }  // namespace jit
3101 }  // namespace js
3102