1 //! Implementation of a standard AArch64 ABI.
2
3 use crate::ir;
4 use crate::ir::types;
5 use crate::ir::types::*;
6 use crate::ir::MemFlags;
7 use crate::ir::Opcode;
8 use crate::ir::{ExternalName, LibCall};
9 use crate::isa;
10 use crate::isa::aarch64::{inst::EmitState, inst::*};
11 use crate::isa::unwind::UnwindInst;
12 use crate::machinst::*;
13 use crate::settings;
14 use crate::{CodegenError, CodegenResult};
15 use alloc::boxed::Box;
16 use alloc::vec::Vec;
17 use regalloc::{RealReg, Reg, RegClass, Set, Writable};
18 use smallvec::{smallvec, SmallVec};
19
20 // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
21 // these ABIs are very similar.
22
23 /// Support for the AArch64 ABI from the callee side (within a function body).
24 pub(crate) type AArch64ABICallee = ABICalleeImpl<AArch64MachineDeps>;
25
26 /// Support for the AArch64 ABI from the caller side (at a callsite).
27 pub(crate) type AArch64ABICaller = ABICallerImpl<AArch64MachineDeps>;
28
29 // Spidermonkey specific ABI convention.
30
31 /// This is SpiderMonkey's `WasmTableCallSigReg`.
32 static BALDRDASH_SIG_REG: u8 = 10;
33
34 /// This is SpiderMonkey's `WasmTlsReg`.
35 static BALDRDASH_TLS_REG: u8 = 23;
36
37 /// Offset in stack-arg area to callee-TLS slot in Baldrdash-2020 calling convention.
38 static BALDRDASH_CALLEE_TLS_OFFSET: i64 = 0;
39 /// Offset in stack-arg area to caller-TLS slot in Baldrdash-2020 calling convention.
40 static BALDRDASH_CALLER_TLS_OFFSET: i64 = 8;
41
42 // These two lists represent the registers the JIT may *not* use at any point in generated code.
43 //
44 // So these are callee-preserved from the JIT's point of view, and every register not in this list
45 // has to be caller-preserved by definition.
46 //
47 // Keep these lists in sync with the NonAllocatableMask set in Spidermonkey's
48 // Architecture-arm64.cpp.
49
50 // Indexed by physical register number.
51 #[rustfmt::skip]
52 static BALDRDASH_JIT_CALLEE_SAVED_GPR: &[bool] = &[
53 /* 0 = */ false, false, false, false, false, false, false, false,
54 /* 8 = */ false, false, false, false, false, false, false, false,
55 /* 16 = */ true /* x16 / ip1 */, true /* x17 / ip2 */, true /* x18 / TLS */, false,
56 /* 20 = */ false, false, false, false,
57 /* 24 = */ false, false, false, false,
58 // There should be 28, the pseudo stack pointer in this list, however the wasm stubs trash it
59 // gladly right now.
60 /* 28 = */ false, false, true /* x30 = FP */, false /* x31 = SP */
61 ];
62
63 #[rustfmt::skip]
64 static BALDRDASH_JIT_CALLEE_SAVED_FPU: &[bool] = &[
65 /* 0 = */ false, false, false, false, false, false, false, false,
66 /* 8 = */ false, false, false, false, false, false, false, false,
67 /* 16 = */ false, false, false, false, false, false, false, false,
68 /* 24 = */ false, false, false, false, false, false, false, true /* v31 / d31 */
69 ];
70
71 /// This is the limit for the size of argument and return-value areas on the
72 /// stack. We place a reasonable limit here to avoid integer overflow issues
73 /// with 32-bit arithmetic: for now, 128 MB.
74 static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
75
76 /// Try to fill a Baldrdash register, returning it if it was found.
try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg>77 fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
78 if call_conv.extends_baldrdash() {
79 match ¶m.purpose {
80 &ir::ArgumentPurpose::VMContext => {
81 // This is SpiderMonkey's `WasmTlsReg`.
82 Some(ABIArg::reg(
83 xreg(BALDRDASH_TLS_REG).to_real_reg(),
84 ir::types::I64,
85 param.extension,
86 param.purpose,
87 ))
88 }
89 &ir::ArgumentPurpose::SignatureId => {
90 // This is SpiderMonkey's `WasmTableCallSigReg`.
91 Some(ABIArg::reg(
92 xreg(BALDRDASH_SIG_REG).to_real_reg(),
93 ir::types::I64,
94 param.extension,
95 param.purpose,
96 ))
97 }
98 &ir::ArgumentPurpose::CalleeTLS => {
99 // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
100 assert!(call_conv == isa::CallConv::Baldrdash2020);
101 Some(ABIArg::stack(
102 BALDRDASH_CALLEE_TLS_OFFSET,
103 ir::types::I64,
104 ir::ArgumentExtension::None,
105 param.purpose,
106 ))
107 }
108 &ir::ArgumentPurpose::CallerTLS => {
109 // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
110 assert!(call_conv == isa::CallConv::Baldrdash2020);
111 Some(ABIArg::stack(
112 BALDRDASH_CALLER_TLS_OFFSET,
113 ir::types::I64,
114 ir::ArgumentExtension::None,
115 param.purpose,
116 ))
117 }
118 _ => None,
119 }
120 } else {
121 None
122 }
123 }
124
125 impl Into<AMode> for StackAMode {
into(self) -> AMode126 fn into(self) -> AMode {
127 match self {
128 StackAMode::FPOffset(off, ty) => AMode::FPOffset(off, ty),
129 StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset(off, ty),
130 StackAMode::SPOffset(off, ty) => AMode::SPOffset(off, ty),
131 }
132 }
133 }
134
135 // Returns the size of stack space needed to store the
136 // `int_reg` and `vec_reg`.
saved_reg_stack_size( call_conv: isa::CallConv, int_reg: &[Writable<RealReg>], vec_reg: &[Writable<RealReg>], ) -> (usize, usize)137 fn saved_reg_stack_size(
138 call_conv: isa::CallConv,
139 int_reg: &[Writable<RealReg>],
140 vec_reg: &[Writable<RealReg>],
141 ) -> (usize, usize) {
142 // Round up to multiple of 2, to keep 16-byte stack alignment.
143 let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8;
144 // The Baldrdash ABIs require saving and restoring the whole 16-byte
145 // SIMD & FP registers, so the necessary stack space is always a
146 // multiple of the mandatory 16-byte stack alignment. However, the
147 // Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64,
148 // including several related ABIs such as the one used by Windows)
149 // mandates saving only the bottom 8 bytes of the vector registers,
150 // so in that case we round up the number of registers to ensure proper
151 // stack alignment (similarly to the situation with `int_reg`).
152 let vec_reg_size = if call_conv.extends_baldrdash() { 16 } else { 8 };
153 let vec_save_padding = if call_conv.extends_baldrdash() {
154 0
155 } else {
156 vec_reg.len() & 1
157 };
158 let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size;
159
160 (int_save_bytes, vec_save_bytes)
161 }
162
163 /// AArch64-specific ABI behavior. This struct just serves as an implementation
164 /// point for the trait; it is never actually instantiated.
165 pub(crate) struct AArch64MachineDeps;
166
167 impl ABIMachineSpec for AArch64MachineDeps {
168 type I = Inst;
169
word_bits() -> u32170 fn word_bits() -> u32 {
171 64
172 }
173
174 /// Return required stack alignment in bytes.
stack_align(_call_conv: isa::CallConv) -> u32175 fn stack_align(_call_conv: isa::CallConv) -> u32 {
176 16
177 }
178
compute_arg_locs( call_conv: isa::CallConv, _flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)>179 fn compute_arg_locs(
180 call_conv: isa::CallConv,
181 _flags: &settings::Flags,
182 params: &[ir::AbiParam],
183 args_or_rets: ArgsOrRets,
184 add_ret_area_ptr: bool,
185 ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
186 let is_baldrdash = call_conv.extends_baldrdash();
187 let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
188
189 // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
190 //
191 // MacOS aarch64 is slightly different, see also
192 // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
193 // We are diverging from the MacOS aarch64 implementation in the
194 // following ways:
195 // - sign- and zero- extensions of data types less than 32 bits are not
196 // implemented yet.
197 // - i128 arguments passing isn't implemented yet in the standard (non
198 // MacOS) aarch64 ABI.
199 // - we align the arguments stack space to a 16-bytes boundary, while
200 // the MacOS allows aligning only on 8 bytes. In practice it means we're
201 // slightly overallocating when calling, which is fine, and doesn't
202 // break our other invariants that the stack is always allocated in
203 // 16-bytes chunks.
204
205 let mut next_xreg = 0;
206 let mut next_vreg = 0;
207 let mut next_stack: u64 = 0;
208 let mut ret = vec![];
209
210 if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
211 // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
212 // caller TLS-register values, respectively.
213 next_stack = 16;
214 }
215
216 let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
217 ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
218
219 // Note on return values: on the regular ABI, we may return values
220 // in 8 registers for V128 and I64 registers independently of the
221 // number of register values returned in the other class. That is,
222 // we can return values in up to 8 integer and
223 // 8 vector registers at once.
224 //
225 // In Baldrdash and Wasmtime, we can only use one register for
226 // return value for all the register classes. That is, we can't
227 // return values in both one integer and one vector register; only
228 // one return value may be in a register.
229 ArgsOrRets::Rets => {
230 if is_baldrdash || call_conv.extends_wasmtime() {
231 (1, 1) // x0 or v0, but not both
232 } else {
233 (8, 16) // x0-x7 and v0-v7
234 }
235 }
236 };
237
238 for i in 0..params.len() {
239 // Process returns backward, according to the SpiderMonkey ABI (which we
240 // adopt internally if `is_baldrdash` is set).
241 let param = match (args_or_rets, is_baldrdash) {
242 (ArgsOrRets::Args, _) => ¶ms[i],
243 (ArgsOrRets::Rets, false) => ¶ms[i],
244 (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i],
245 };
246
247 // Validate "purpose".
248 match ¶m.purpose {
249 &ir::ArgumentPurpose::VMContext
250 | &ir::ArgumentPurpose::Normal
251 | &ir::ArgumentPurpose::StackLimit
252 | &ir::ArgumentPurpose::SignatureId
253 | &ir::ArgumentPurpose::CallerTLS
254 | &ir::ArgumentPurpose::CalleeTLS
255 | &ir::ArgumentPurpose::StructReturn
256 | &ir::ArgumentPurpose::StructArgument(_) => {}
257 _ => panic!(
258 "Unsupported argument purpose {:?} in signature: {:?}",
259 param.purpose, params
260 ),
261 }
262
263 assert!(
264 legal_type_for_machine(param.value_type),
265 "Invalid type for AArch64: {:?}",
266 param.value_type
267 );
268 let (rcs, _) = Inst::rc_for_type(param.value_type).unwrap();
269 assert!(rcs.len() == 1, "Multi-reg values not supported yet");
270 let rc = rcs[0];
271
272 let next_reg = match rc {
273 RegClass::I64 => &mut next_xreg,
274 RegClass::V128 => &mut next_vreg,
275 _ => panic!("Invalid register class: {:?}", rc),
276 };
277
278 if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
279 assert!(rc == RegClass::I64);
280 ret.push(param);
281 } else if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
282 let offset = next_stack as i64;
283 let size = size as u64;
284 assert!(size % 8 == 0, "StructArgument size is not properly aligned");
285 next_stack += size;
286 ret.push(ABIArg::StructArg {
287 offset,
288 size,
289 purpose: param.purpose,
290 });
291 } else if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
292 let reg = match rc {
293 RegClass::I64 => xreg(*next_reg),
294 RegClass::V128 => vreg(*next_reg),
295 _ => unreachable!(),
296 };
297 ret.push(ABIArg::reg(
298 reg.to_real_reg(),
299 param.value_type,
300 param.extension,
301 param.purpose,
302 ));
303 *next_reg += 1;
304 remaining_reg_vals -= 1;
305 } else {
306 // Compute the stack slot's size.
307 let size = (ty_bits(param.value_type) / 8) as u64;
308
309 let size = if call_conv == isa::CallConv::AppleAarch64
310 || (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets)
311 {
312 // MacOS aarch64 and Wasmtime allow stack slots with
313 // sizes less than 8 bytes. They still need to be
314 // properly aligned on their natural data alignment,
315 // though.
316 size
317 } else {
318 // Every arg takes a minimum slot of 8 bytes. (16-byte stack
319 // alignment happens separately after all args.)
320 std::cmp::max(size, 8)
321 };
322
323 // Align the stack slot.
324 debug_assert!(size.is_power_of_two());
325 next_stack = align_to(next_stack, size);
326
327 ret.push(ABIArg::stack(
328 next_stack as i64,
329 param.value_type,
330 param.extension,
331 param.purpose,
332 ));
333 next_stack += size;
334 }
335 }
336
337 if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
338 ret.reverse();
339 }
340
341 let extra_arg = if add_ret_area_ptr {
342 debug_assert!(args_or_rets == ArgsOrRets::Args);
343 if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
344 ret.push(ABIArg::reg(
345 xreg(next_xreg).to_real_reg(),
346 I64,
347 ir::ArgumentExtension::None,
348 ir::ArgumentPurpose::Normal,
349 ));
350 } else {
351 ret.push(ABIArg::stack(
352 next_stack as i64,
353 I64,
354 ir::ArgumentExtension::None,
355 ir::ArgumentPurpose::Normal,
356 ));
357 next_stack += 8;
358 }
359 Some(ret.len() - 1)
360 } else {
361 None
362 };
363
364 next_stack = align_to(next_stack, 16);
365
366 // To avoid overflow issues, limit the arg/return size to something
367 // reasonable -- here, 128 MB.
368 if next_stack > STACK_ARG_RET_SIZE_LIMIT {
369 return Err(CodegenError::ImplLimitExceeded);
370 }
371
372 Ok((ret, next_stack as i64, extra_arg))
373 }
374
fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64375 fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 {
376 if call_conv.extends_baldrdash() {
377 let num_words = flags.baldrdash_prologue_words() as i64;
378 debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
379 debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned");
380 num_words * 8
381 } else {
382 16 // frame pointer + return address.
383 }
384 }
385
gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst386 fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
387 Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
388 }
389
gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst390 fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
391 Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
392 }
393
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst394 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
395 Inst::gen_move(to_reg, from_reg, ty)
396 }
397
gen_extend( to_reg: Writable<Reg>, from_reg: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Inst398 fn gen_extend(
399 to_reg: Writable<Reg>,
400 from_reg: Reg,
401 signed: bool,
402 from_bits: u8,
403 to_bits: u8,
404 ) -> Inst {
405 assert!(from_bits < to_bits);
406 Inst::Extend {
407 rd: to_reg,
408 rn: from_reg,
409 signed,
410 from_bits,
411 to_bits,
412 }
413 }
414
gen_ret() -> Inst415 fn gen_ret() -> Inst {
416 Inst::Ret
417 }
418
gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst>419 fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
420 let imm = imm as u64;
421 let mut insts = SmallVec::new();
422 if let Some(imm12) = Imm12::maybe_from_u64(imm) {
423 insts.push(Inst::AluRRImm12 {
424 alu_op: ALUOp::Add64,
425 rd: into_reg,
426 rn: from_reg,
427 imm12,
428 });
429 } else {
430 let scratch2 = writable_tmp2_reg();
431 assert_ne!(scratch2.to_reg(), from_reg);
432 insts.extend(Inst::load_constant(scratch2, imm.into()));
433 insts.push(Inst::AluRRRExtend {
434 alu_op: ALUOp::Add64,
435 rd: into_reg,
436 rn: from_reg,
437 rm: scratch2.to_reg(),
438 extendop: ExtendOp::UXTX,
439 });
440 }
441 insts
442 }
443
gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst>444 fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
445 let mut insts = SmallVec::new();
446 insts.push(Inst::AluRRRExtend {
447 alu_op: ALUOp::SubS64,
448 rd: writable_zero_reg(),
449 rn: stack_reg(),
450 rm: limit_reg,
451 extendop: ExtendOp::UXTX,
452 });
453 insts.push(Inst::TrapIf {
454 trap_code: ir::TrapCode::StackOverflow,
455 // Here `Lo` == "less than" when interpreting the two
456 // operands as unsigned integers.
457 kind: CondBrKind::Cond(Cond::Lo),
458 });
459 insts
460 }
461
gen_epilogue_placeholder() -> Inst462 fn gen_epilogue_placeholder() -> Inst {
463 Inst::EpiloguePlaceholder
464 }
465
gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst466 fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
467 let mem = mem.into();
468 Inst::LoadAddr { rd: into_reg, mem }
469 }
470
get_stacklimit_reg() -> Reg471 fn get_stacklimit_reg() -> Reg {
472 spilltmp_reg()
473 }
474
gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst475 fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
476 let mem = AMode::RegOffset(base, offset as i64, ty);
477 Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
478 }
479
gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst480 fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
481 let mem = AMode::RegOffset(base, offset as i64, ty);
482 Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
483 }
484
gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst>485 fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
486 if amount == 0 {
487 return SmallVec::new();
488 }
489
490 let (amount, is_sub) = if amount > 0 {
491 (amount as u64, false)
492 } else {
493 (-amount as u64, true)
494 };
495
496 let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
497
498 let mut ret = SmallVec::new();
499 if let Some(imm12) = Imm12::maybe_from_u64(amount) {
500 let adj_inst = Inst::AluRRImm12 {
501 alu_op,
502 rd: writable_stack_reg(),
503 rn: stack_reg(),
504 imm12,
505 };
506 ret.push(adj_inst);
507 } else {
508 let tmp = writable_spilltmp_reg();
509 let const_inst = Inst::load_constant(tmp, amount);
510 let adj_inst = Inst::AluRRRExtend {
511 alu_op,
512 rd: writable_stack_reg(),
513 rn: stack_reg(),
514 rm: tmp.to_reg(),
515 extendop: ExtendOp::UXTX,
516 };
517 ret.extend(const_inst);
518 ret.push(adj_inst);
519 }
520 ret
521 }
522
gen_nominal_sp_adj(offset: i32) -> Inst523 fn gen_nominal_sp_adj(offset: i32) -> Inst {
524 Inst::VirtualSPOffsetAdj {
525 offset: offset as i64,
526 }
527 }
528
gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst>529 fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst> {
530 let mut insts = SmallVec::new();
531
532 if flags.unwind_info() {
533 insts.push(Inst::Unwind {
534 inst: UnwindInst::Aarch64SetPointerAuth {
535 return_addresses: false,
536 },
537 });
538 }
539
540 // stp fp (x29), lr (x30), [sp, #-16]!
541 insts.push(Inst::StoreP64 {
542 rt: fp_reg(),
543 rt2: link_reg(),
544 mem: PairAMode::PreIndexed(
545 writable_stack_reg(),
546 SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
547 ),
548 flags: MemFlags::trusted(),
549 });
550
551 if flags.unwind_info() {
552 insts.push(Inst::Unwind {
553 inst: UnwindInst::PushFrameRegs {
554 offset_upward_to_caller_sp: 16, // FP, LR
555 },
556 });
557 }
558
559 // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
560 // the usual encoding (`ORR`) does not work with SP.
561 insts.push(Inst::AluRRImm12 {
562 alu_op: ALUOp::Add64,
563 rd: writable_fp_reg(),
564 rn: stack_reg(),
565 imm12: Imm12 {
566 bits: 0,
567 shift12: false,
568 },
569 });
570 insts
571 }
572
gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst>573 fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
574 let mut insts = SmallVec::new();
575
576 // N.B.: sp is already adjusted to the appropriate place by the
577 // clobber-restore code (which also frees the fixed frame). Hence, there
578 // is no need for the usual `mov sp, fp` here.
579
580 // `ldp fp, lr, [sp], #16`
581 insts.push(Inst::LoadP64 {
582 rt: writable_fp_reg(),
583 rt2: writable_link_reg(),
584 mem: PairAMode::PostIndexed(
585 writable_stack_reg(),
586 SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
587 ),
588 flags: MemFlags::trusted(),
589 });
590 insts
591 }
592
gen_probestack(_: u32) -> SmallInstVec<Self::I>593 fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
594 // TODO: implement if we ever require stack probes on an AArch64 host
595 // (unlikely unless Lucet is ported)
596 smallvec![]
597 }
598
599 // Returns stack bytes used as well as instructions. Does not adjust
600 // nominal SP offset; abi_impl generic code will do that.
gen_clobber_save( call_conv: isa::CallConv, flags: &settings::Flags, clobbers: &Set<Writable<RealReg>>, fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>)601 fn gen_clobber_save(
602 call_conv: isa::CallConv,
603 flags: &settings::Flags,
604 clobbers: &Set<Writable<RealReg>>,
605 fixed_frame_storage_size: u32,
606 _outgoing_args_size: u32,
607 ) -> (u64, SmallVec<[Inst; 16]>) {
608 let mut insts = SmallVec::new();
609 let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
610
611 let (int_save_bytes, vec_save_bytes) =
612 saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
613 let total_save_bytes = int_save_bytes + vec_save_bytes;
614 let clobber_size = total_save_bytes as i32;
615
616 if flags.unwind_info() {
617 // The *unwind* frame (but not the actual frame) starts at the
618 // clobbers, just below the saved FP/LR pair.
619 insts.push(Inst::Unwind {
620 inst: UnwindInst::DefineNewFrame {
621 offset_downward_to_clobbers: clobber_size as u32,
622 offset_upward_to_caller_sp: 16, // FP, LR
623 },
624 });
625 }
626
627 // We use pre-indexed addressing modes here, rather than the possibly
628 // more efficient "subtract sp once then used fixed offsets" scheme,
629 // because (i) we cannot necessarily guarantee that the offset of a
630 // clobber-save slot will be within a SImm7Scaled (+504-byte) offset
631 // range of the whole frame including other slots, it is more complex to
632 // conditionally generate a two-stage SP adjustment (clobbers then fixed
633 // frame) otherwise, and generally we just want to maintain simplicity
634 // here for maintainability. Because clobbers are at the top of the
635 // frame, just below FP, all that is necessary is to use the pre-indexed
636 // "push" `[sp, #-16]!` addressing mode.
637 //
638 // `frame_offset` tracks offset above start-of-clobbers for unwind-info
639 // purposes.
640 let mut clobber_offset = clobber_size as u32;
641 let clobber_offset_change = 16;
642 let iter = clobbered_int.chunks_exact(2);
643
644 if let [rd] = iter.remainder() {
645 let rd = rd.to_reg().to_reg();
646
647 debug_assert_eq!(rd.get_class(), RegClass::I64);
648 // str rd, [sp, #-16]!
649 insts.push(Inst::Store64 {
650 rd,
651 mem: AMode::PreIndexed(
652 writable_stack_reg(),
653 SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
654 ),
655 flags: MemFlags::trusted(),
656 });
657
658 if flags.unwind_info() {
659 clobber_offset -= clobber_offset_change as u32;
660 insts.push(Inst::Unwind {
661 inst: UnwindInst::SaveReg {
662 clobber_offset,
663 reg: rd.to_real_reg(),
664 },
665 });
666 }
667 }
668
669 let mut iter = iter.rev();
670
671 while let Some([rt, rt2]) = iter.next() {
672 // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
673 let rt = rt.to_reg().to_reg();
674 let rt2 = rt2.to_reg().to_reg();
675
676 debug_assert!(rt.get_class() == RegClass::I64);
677 debug_assert!(rt2.get_class() == RegClass::I64);
678
679 // stp rt, rt2, [sp, #-16]!
680 insts.push(Inst::StoreP64 {
681 rt,
682 rt2,
683 mem: PairAMode::PreIndexed(
684 writable_stack_reg(),
685 SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
686 ),
687 flags: MemFlags::trusted(),
688 });
689
690 if flags.unwind_info() {
691 clobber_offset -= clobber_offset_change as u32;
692 insts.push(Inst::Unwind {
693 inst: UnwindInst::SaveReg {
694 clobber_offset,
695 reg: rt.to_real_reg(),
696 },
697 });
698 insts.push(Inst::Unwind {
699 inst: UnwindInst::SaveReg {
700 clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
701 reg: rt2.to_real_reg(),
702 },
703 });
704 }
705 }
706
707 let store_vec_reg = |rd| {
708 if call_conv.extends_baldrdash() {
709 Inst::FpuStore128 {
710 rd,
711 mem: AMode::PreIndexed(
712 writable_stack_reg(),
713 SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
714 ),
715 flags: MemFlags::trusted(),
716 }
717 } else {
718 Inst::FpuStore64 {
719 rd,
720 mem: AMode::PreIndexed(
721 writable_stack_reg(),
722 SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
723 ),
724 flags: MemFlags::trusted(),
725 }
726 }
727 };
728 let iter = clobbered_vec.chunks_exact(2);
729
730 if let [rd] = iter.remainder() {
731 let rd = rd.to_reg().to_reg();
732
733 debug_assert_eq!(rd.get_class(), RegClass::V128);
734 insts.push(store_vec_reg(rd));
735
736 if flags.unwind_info() {
737 clobber_offset -= clobber_offset_change as u32;
738 insts.push(Inst::Unwind {
739 inst: UnwindInst::SaveReg {
740 clobber_offset,
741 reg: rd.to_real_reg(),
742 },
743 });
744 }
745 }
746
747 let store_vec_reg_pair = |rt, rt2| {
748 if call_conv.extends_baldrdash() {
749 let clobber_offset_change = 32;
750
751 (
752 Inst::FpuStoreP128 {
753 rt,
754 rt2,
755 mem: PairAMode::PreIndexed(
756 writable_stack_reg(),
757 SImm7Scaled::maybe_from_i64(-clobber_offset_change, I8X16).unwrap(),
758 ),
759 flags: MemFlags::trusted(),
760 },
761 clobber_offset_change as u32,
762 )
763 } else {
764 let clobber_offset_change = 16;
765
766 (
767 Inst::FpuStoreP64 {
768 rt,
769 rt2,
770 mem: PairAMode::PreIndexed(
771 writable_stack_reg(),
772 SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
773 ),
774 flags: MemFlags::trusted(),
775 },
776 clobber_offset_change as u32,
777 )
778 }
779 };
780 let mut iter = iter.rev();
781
782 while let Some([rt, rt2]) = iter.next() {
783 let rt = rt.to_reg().to_reg();
784 let rt2 = rt2.to_reg().to_reg();
785
786 debug_assert_eq!(rt.get_class(), RegClass::V128);
787 debug_assert_eq!(rt2.get_class(), RegClass::V128);
788
789 let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
790
791 insts.push(inst);
792
793 if flags.unwind_info() {
794 clobber_offset -= clobber_offset_change;
795 insts.push(Inst::Unwind {
796 inst: UnwindInst::SaveReg {
797 clobber_offset,
798 reg: rt.to_real_reg(),
799 },
800 });
801 insts.push(Inst::Unwind {
802 inst: UnwindInst::SaveReg {
803 clobber_offset: clobber_offset + clobber_offset_change / 2,
804 reg: rt2.to_real_reg(),
805 },
806 });
807 }
808 }
809
810 // Allocate the fixed frame below the clobbers if necessary.
811 if fixed_frame_storage_size > 0 {
812 insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)));
813 }
814
815 (total_save_bytes as u64, insts)
816 }
817
gen_clobber_restore( call_conv: isa::CallConv, flags: &settings::Flags, clobbers: &Set<Writable<RealReg>>, fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]>818 fn gen_clobber_restore(
819 call_conv: isa::CallConv,
820 flags: &settings::Flags,
821 clobbers: &Set<Writable<RealReg>>,
822 fixed_frame_storage_size: u32,
823 _outgoing_args_size: u32,
824 ) -> SmallVec<[Inst; 16]> {
825 let mut insts = SmallVec::new();
826 let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
827
828 // Free the fixed frame if necessary.
829 if fixed_frame_storage_size > 0 {
830 insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32));
831 }
832
833 let load_vec_reg = |rd| {
834 if call_conv.extends_baldrdash() {
835 Inst::FpuLoad128 {
836 rd,
837 mem: AMode::PostIndexed(
838 writable_stack_reg(),
839 SImm9::maybe_from_i64(16).unwrap(),
840 ),
841 flags: MemFlags::trusted(),
842 }
843 } else {
844 Inst::FpuLoad64 {
845 rd,
846 mem: AMode::PostIndexed(
847 writable_stack_reg(),
848 SImm9::maybe_from_i64(16).unwrap(),
849 ),
850 flags: MemFlags::trusted(),
851 }
852 }
853 };
854 let load_vec_reg_pair = |rt, rt2| {
855 if call_conv.extends_baldrdash() {
856 Inst::FpuLoadP128 {
857 rt,
858 rt2,
859 mem: PairAMode::PostIndexed(
860 writable_stack_reg(),
861 SImm7Scaled::maybe_from_i64(32, I8X16).unwrap(),
862 ),
863 flags: MemFlags::trusted(),
864 }
865 } else {
866 Inst::FpuLoadP64 {
867 rt,
868 rt2,
869 mem: PairAMode::PostIndexed(
870 writable_stack_reg(),
871 SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
872 ),
873 flags: MemFlags::trusted(),
874 }
875 }
876 };
877
878 let mut iter = clobbered_vec.chunks_exact(2);
879
880 while let Some([rt, rt2]) = iter.next() {
881 let rt = rt.map(|r| r.to_reg());
882 let rt2 = rt2.map(|r| r.to_reg());
883
884 debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128);
885 debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128);
886 insts.push(load_vec_reg_pair(rt, rt2));
887 }
888
889 debug_assert!(iter.remainder().len() <= 1);
890
891 if let [rd] = iter.remainder() {
892 let rd = rd.map(|r| r.to_reg());
893
894 debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128);
895 insts.push(load_vec_reg(rd));
896 }
897
898 let mut iter = clobbered_int.chunks_exact(2);
899
900 while let Some([rt, rt2]) = iter.next() {
901 let rt = rt.map(|r| r.to_reg());
902 let rt2 = rt2.map(|r| r.to_reg());
903
904 debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64);
905 debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64);
906 // ldp rt, rt2, [sp], #16
907 insts.push(Inst::LoadP64 {
908 rt,
909 rt2,
910 mem: PairAMode::PostIndexed(
911 writable_stack_reg(),
912 SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
913 ),
914 flags: MemFlags::trusted(),
915 });
916 }
917
918 debug_assert!(iter.remainder().len() <= 1);
919
920 if let [rd] = iter.remainder() {
921 let rd = rd.map(|r| r.to_reg());
922
923 debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64);
924 // ldr rd, [sp], #16
925 insts.push(Inst::ULoad64 {
926 rd,
927 mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
928 flags: MemFlags::trusted(),
929 });
930 }
931
932 // If this is Baldrdash-2020, restore the callee (i.e., our) TLS
933 // register. We may have allocated it for something else and clobbered
934 // it, but the ABI expects us to leave the TLS register unchanged.
935 if call_conv == isa::CallConv::Baldrdash2020 {
936 let off = BALDRDASH_CALLEE_TLS_OFFSET + Self::fp_to_arg_offset(call_conv, flags);
937 insts.push(Inst::gen_load(
938 writable_xreg(BALDRDASH_TLS_REG),
939 AMode::UnsignedOffset(fp_reg(), UImm12Scaled::maybe_from_i64(off, I64).unwrap()),
940 I64,
941 MemFlags::trusted(),
942 ));
943 }
944
945 insts
946 }
947
gen_call( dest: &CallDest, uses: Vec<Reg>, defs: Vec<Writable<Reg>>, opcode: ir::Opcode, tmp: Writable<Reg>, callee_conv: isa::CallConv, caller_conv: isa::CallConv, ) -> SmallVec<[(InstIsSafepoint, Inst); 2]>948 fn gen_call(
949 dest: &CallDest,
950 uses: Vec<Reg>,
951 defs: Vec<Writable<Reg>>,
952 opcode: ir::Opcode,
953 tmp: Writable<Reg>,
954 callee_conv: isa::CallConv,
955 caller_conv: isa::CallConv,
956 ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
957 let mut insts = SmallVec::new();
958 match &dest {
959 &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
960 InstIsSafepoint::Yes,
961 Inst::Call {
962 info: Box::new(CallInfo {
963 dest: name.clone(),
964 uses,
965 defs,
966 opcode,
967 caller_callconv: caller_conv,
968 callee_callconv: callee_conv,
969 }),
970 },
971 )),
972 &CallDest::ExtName(ref name, RelocDistance::Far) => {
973 insts.push((
974 InstIsSafepoint::No,
975 Inst::LoadExtName {
976 rd: tmp,
977 name: Box::new(name.clone()),
978 offset: 0,
979 },
980 ));
981 insts.push((
982 InstIsSafepoint::Yes,
983 Inst::CallInd {
984 info: Box::new(CallIndInfo {
985 rn: tmp.to_reg(),
986 uses,
987 defs,
988 opcode,
989 caller_callconv: caller_conv,
990 callee_callconv: callee_conv,
991 }),
992 },
993 ));
994 }
995 &CallDest::Reg(reg) => insts.push((
996 InstIsSafepoint::Yes,
997 Inst::CallInd {
998 info: Box::new(CallIndInfo {
999 rn: *reg,
1000 uses,
1001 defs,
1002 opcode,
1003 caller_callconv: caller_conv,
1004 callee_callconv: callee_conv,
1005 }),
1006 },
1007 )),
1008 }
1009
1010 insts
1011 }
1012
gen_memcpy( call_conv: isa::CallConv, dst: Reg, src: Reg, size: usize, ) -> SmallVec<[Self::I; 8]>1013 fn gen_memcpy(
1014 call_conv: isa::CallConv,
1015 dst: Reg,
1016 src: Reg,
1017 size: usize,
1018 ) -> SmallVec<[Self::I; 8]> {
1019 // Baldrdash should not use struct args.
1020 assert!(!call_conv.extends_baldrdash());
1021 let mut insts = SmallVec::new();
1022 let arg0 = writable_xreg(0);
1023 let arg1 = writable_xreg(1);
1024 let arg2 = writable_xreg(2);
1025 insts.push(Inst::gen_move(arg0, dst, I64));
1026 insts.push(Inst::gen_move(arg1, src, I64));
1027 insts.extend(Inst::load_constant(arg2, size as u64).into_iter());
1028 insts.push(Inst::Call {
1029 info: Box::new(CallInfo {
1030 dest: ExternalName::LibCall(LibCall::Memcpy),
1031 uses: vec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()],
1032 defs: Self::get_regs_clobbered_by_call(call_conv),
1033 opcode: Opcode::Call,
1034 caller_callconv: call_conv,
1035 callee_callconv: call_conv,
1036 }),
1037 });
1038 insts
1039 }
1040
get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u321041 fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
1042 // We allocate in terms of 8-byte slots.
1043 match (rc, ty) {
1044 (RegClass::I64, _) => 1,
1045 (RegClass::V128, F32) | (RegClass::V128, F64) => 1,
1046 (RegClass::V128, _) => 2,
1047 _ => panic!("Unexpected register class!"),
1048 }
1049 }
1050
1051 /// Get the current virtual-SP offset from an instruction-emission state.
get_virtual_sp_offset_from_state(s: &EmitState) -> i641052 fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
1053 s.virtual_sp_offset
1054 }
1055
1056 /// Get the nominal-SP-to-FP offset from an instruction-emission state.
get_nominal_sp_to_fp(s: &EmitState) -> i641057 fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
1058 s.nominal_sp_to_fp
1059 }
1060
get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>>1061 fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
1062 let mut caller_saved = Vec::new();
1063 for i in 0..29 {
1064 let x = writable_xreg(i);
1065 if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
1066 caller_saved.push(x);
1067 }
1068 }
1069 for i in 0..32 {
1070 let v = writable_vreg(i);
1071 if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
1072 caller_saved.push(v);
1073 }
1074 }
1075 caller_saved
1076 }
1077
get_ext_mode( call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension1078 fn get_ext_mode(
1079 call_conv: isa::CallConv,
1080 specified: ir::ArgumentExtension,
1081 ) -> ir::ArgumentExtension {
1082 if call_conv.extends_baldrdash() {
1083 // Baldrdash (SpiderMonkey) always extends args and return values to the full register.
1084 specified
1085 } else {
1086 // No other supported ABI on AArch64 does so.
1087 ir::ArgumentExtension::None
1088 }
1089 }
1090 }
1091
1092 /// Is this type supposed to be seen on this machine? E.g. references of the
1093 /// wrong width are invalid.
legal_type_for_machine(ty: Type) -> bool1094 fn legal_type_for_machine(ty: Type) -> bool {
1095 match ty {
1096 R32 => false,
1097 _ => true,
1098 }
1099 }
1100
1101 /// Is the given register saved in the prologue if clobbered, i.e., is it a
1102 /// callee-save?
is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool1103 fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
1104 if call_conv.extends_baldrdash() {
1105 match r.get_class() {
1106 RegClass::I64 => {
1107 let enc = r.get_hw_encoding();
1108 return BALDRDASH_JIT_CALLEE_SAVED_GPR[enc];
1109 }
1110 RegClass::V128 => {
1111 let enc = r.get_hw_encoding();
1112 return BALDRDASH_JIT_CALLEE_SAVED_FPU[enc];
1113 }
1114 _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
1115 };
1116 }
1117
1118 match r.get_class() {
1119 RegClass::I64 => {
1120 // x19 - x28 inclusive are callee-saves.
1121 r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28
1122 }
1123 RegClass::V128 => {
1124 // v8 - v15 inclusive are callee-saves.
1125 r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
1126 }
1127 _ => panic!("Unexpected RegClass"),
1128 }
1129 }
1130
1131 /// Return the set of all integer and vector registers that must be saved in the
1132 /// prologue and restored in the epilogue, given the set of all registers
1133 /// written by the function's body.
get_regs_saved_in_prologue( call_conv: isa::CallConv, regs: &Set<Writable<RealReg>>, ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>)1134 fn get_regs_saved_in_prologue(
1135 call_conv: isa::CallConv,
1136 regs: &Set<Writable<RealReg>>,
1137 ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
1138 let mut int_saves = vec![];
1139 let mut vec_saves = vec![];
1140 for ® in regs.iter() {
1141 if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
1142 match reg.to_reg().get_class() {
1143 RegClass::I64 => int_saves.push(reg),
1144 RegClass::V128 => vec_saves.push(reg),
1145 _ => panic!("Unexpected RegClass"),
1146 }
1147 }
1148 }
1149 // Sort registers for deterministic code output. We can do an unstable sort because the
1150 // registers will be unique (there are no dups).
1151 int_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
1152 vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
1153 (int_saves, vec_saves)
1154 }
1155
is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool1156 fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool {
1157 if call_conv_of_callee.extends_baldrdash() {
1158 match r.get_class() {
1159 RegClass::I64 => {
1160 let enc = r.get_hw_encoding();
1161 if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
1162 return true;
1163 }
1164 // Otherwise, fall through to preserve native's ABI caller-saved.
1165 }
1166 RegClass::V128 => {
1167 let enc = r.get_hw_encoding();
1168 if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
1169 return true;
1170 }
1171 // Otherwise, fall through to preserve native's ABI caller-saved.
1172 }
1173 _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
1174 };
1175 }
1176
1177 match r.get_class() {
1178 RegClass::I64 => {
1179 // x0 - x17 inclusive are caller-saves.
1180 r.get_hw_encoding() <= 17
1181 }
1182 RegClass::V128 => {
1183 // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The
1184 // upper 64 bits of v8 - v15 inclusive are also caller-saves.
1185 // However, because we cannot currently represent partial registers
1186 // to regalloc.rs, we indicate here that every vector register is
1187 // caller-save. Because this function is used at *callsites*,
1188 // approximating in this direction (save more than necessary) is
1189 // conservative and thus safe.
1190 //
1191 // Note that we set the 'not included in clobber set' flag in the
1192 // regalloc.rs API when a call instruction's callee has the same ABI
1193 // as the caller (the current function body); this is safe (anything
1194 // clobbered by callee can be clobbered by caller as well) and
1195 // avoids unnecessary saves of v8-v15 in the prologue even though we
1196 // include them as defs here.
1197 true
1198 }
1199 _ => panic!("Unexpected RegClass"),
1200 }
1201 }
1202