1 //! Implementation of a standard AArch64 ABI.
2 
3 use crate::ir;
4 use crate::ir::types;
5 use crate::ir::types::*;
6 use crate::ir::MemFlags;
7 use crate::ir::Opcode;
8 use crate::ir::{ExternalName, LibCall};
9 use crate::isa;
10 use crate::isa::aarch64::{inst::EmitState, inst::*};
11 use crate::isa::unwind::UnwindInst;
12 use crate::machinst::*;
13 use crate::settings;
14 use crate::{CodegenError, CodegenResult};
15 use alloc::boxed::Box;
16 use alloc::vec::Vec;
17 use regalloc::{RealReg, Reg, RegClass, Set, Writable};
18 use smallvec::{smallvec, SmallVec};
19 
20 // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
21 // these ABIs are very similar.
22 
23 /// Support for the AArch64 ABI from the callee side (within a function body).
24 pub(crate) type AArch64ABICallee = ABICalleeImpl<AArch64MachineDeps>;
25 
26 /// Support for the AArch64 ABI from the caller side (at a callsite).
27 pub(crate) type AArch64ABICaller = ABICallerImpl<AArch64MachineDeps>;
28 
29 // Spidermonkey specific ABI convention.
30 
31 /// This is SpiderMonkey's `WasmTableCallSigReg`.
32 static BALDRDASH_SIG_REG: u8 = 10;
33 
34 /// This is SpiderMonkey's `WasmTlsReg`.
35 static BALDRDASH_TLS_REG: u8 = 23;
36 
37 /// Offset in stack-arg area to callee-TLS slot in Baldrdash-2020 calling convention.
38 static BALDRDASH_CALLEE_TLS_OFFSET: i64 = 0;
39 /// Offset in stack-arg area to caller-TLS slot in Baldrdash-2020 calling convention.
40 static BALDRDASH_CALLER_TLS_OFFSET: i64 = 8;
41 
42 // These two lists represent the registers the JIT may *not* use at any point in generated code.
43 //
44 // So these are callee-preserved from the JIT's point of view, and every register not in this list
45 // has to be caller-preserved by definition.
46 //
47 // Keep these lists in sync with the NonAllocatableMask set in Spidermonkey's
48 // Architecture-arm64.cpp.
49 
50 // Indexed by physical register number.
51 #[rustfmt::skip]
52 static BALDRDASH_JIT_CALLEE_SAVED_GPR: &[bool] = &[
53     /* 0 = */ false, false, false, false, false, false, false, false,
54     /* 8 = */ false, false, false, false, false, false, false, false,
55     /* 16 = */ true /* x16 / ip1 */, true /* x17 / ip2 */, true /* x18 / TLS */, false,
56     /* 20 = */ false, false, false, false,
57     /* 24 = */ false, false, false, false,
58     // There should be 28, the pseudo stack pointer in this list, however the wasm stubs trash it
59     // gladly right now.
60     /* 28 = */ false, false, true /* x30 = FP */, false /* x31 = SP */
61 ];
62 
63 #[rustfmt::skip]
64 static BALDRDASH_JIT_CALLEE_SAVED_FPU: &[bool] = &[
65     /* 0 = */ false, false, false, false, false, false, false, false,
66     /* 8 = */ false, false, false, false, false, false, false, false,
67     /* 16 = */ false, false, false, false, false, false, false, false,
68     /* 24 = */ false, false, false, false, false, false, false, true /* v31 / d31 */
69 ];
70 
71 /// This is the limit for the size of argument and return-value areas on the
72 /// stack. We place a reasonable limit here to avoid integer overflow issues
73 /// with 32-bit arithmetic: for now, 128 MB.
74 static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
75 
76 /// Try to fill a Baldrdash register, returning it if it was found.
try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg>77 fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
78     if call_conv.extends_baldrdash() {
79         match &param.purpose {
80             &ir::ArgumentPurpose::VMContext => {
81                 // This is SpiderMonkey's `WasmTlsReg`.
82                 Some(ABIArg::reg(
83                     xreg(BALDRDASH_TLS_REG).to_real_reg(),
84                     ir::types::I64,
85                     param.extension,
86                     param.purpose,
87                 ))
88             }
89             &ir::ArgumentPurpose::SignatureId => {
90                 // This is SpiderMonkey's `WasmTableCallSigReg`.
91                 Some(ABIArg::reg(
92                     xreg(BALDRDASH_SIG_REG).to_real_reg(),
93                     ir::types::I64,
94                     param.extension,
95                     param.purpose,
96                 ))
97             }
98             &ir::ArgumentPurpose::CalleeTLS => {
99                 // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
100                 assert!(call_conv == isa::CallConv::Baldrdash2020);
101                 Some(ABIArg::stack(
102                     BALDRDASH_CALLEE_TLS_OFFSET,
103                     ir::types::I64,
104                     ir::ArgumentExtension::None,
105                     param.purpose,
106                 ))
107             }
108             &ir::ArgumentPurpose::CallerTLS => {
109                 // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
110                 assert!(call_conv == isa::CallConv::Baldrdash2020);
111                 Some(ABIArg::stack(
112                     BALDRDASH_CALLER_TLS_OFFSET,
113                     ir::types::I64,
114                     ir::ArgumentExtension::None,
115                     param.purpose,
116                 ))
117             }
118             _ => None,
119         }
120     } else {
121         None
122     }
123 }
124 
125 impl Into<AMode> for StackAMode {
into(self) -> AMode126     fn into(self) -> AMode {
127         match self {
128             StackAMode::FPOffset(off, ty) => AMode::FPOffset(off, ty),
129             StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset(off, ty),
130             StackAMode::SPOffset(off, ty) => AMode::SPOffset(off, ty),
131         }
132     }
133 }
134 
135 // Returns the size of stack space needed to store the
136 // `int_reg` and `vec_reg`.
saved_reg_stack_size( call_conv: isa::CallConv, int_reg: &[Writable<RealReg>], vec_reg: &[Writable<RealReg>], ) -> (usize, usize)137 fn saved_reg_stack_size(
138     call_conv: isa::CallConv,
139     int_reg: &[Writable<RealReg>],
140     vec_reg: &[Writable<RealReg>],
141 ) -> (usize, usize) {
142     // Round up to multiple of 2, to keep 16-byte stack alignment.
143     let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8;
144     // The Baldrdash ABIs require saving and restoring the whole 16-byte
145     // SIMD & FP registers, so the necessary stack space is always a
146     // multiple of the mandatory 16-byte stack alignment. However, the
147     // Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64,
148     // including several related ABIs such as the one used by Windows)
149     // mandates saving only the bottom 8 bytes of the vector registers,
150     // so in that case we round up the number of registers to ensure proper
151     // stack alignment (similarly to the situation with `int_reg`).
152     let vec_reg_size = if call_conv.extends_baldrdash() { 16 } else { 8 };
153     let vec_save_padding = if call_conv.extends_baldrdash() {
154         0
155     } else {
156         vec_reg.len() & 1
157     };
158     let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size;
159 
160     (int_save_bytes, vec_save_bytes)
161 }
162 
163 /// AArch64-specific ABI behavior. This struct just serves as an implementation
164 /// point for the trait; it is never actually instantiated.
165 pub(crate) struct AArch64MachineDeps;
166 
167 impl ABIMachineSpec for AArch64MachineDeps {
168     type I = Inst;
169 
word_bits() -> u32170     fn word_bits() -> u32 {
171         64
172     }
173 
174     /// Return required stack alignment in bytes.
stack_align(_call_conv: isa::CallConv) -> u32175     fn stack_align(_call_conv: isa::CallConv) -> u32 {
176         16
177     }
178 
compute_arg_locs( call_conv: isa::CallConv, _flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)>179     fn compute_arg_locs(
180         call_conv: isa::CallConv,
181         _flags: &settings::Flags,
182         params: &[ir::AbiParam],
183         args_or_rets: ArgsOrRets,
184         add_ret_area_ptr: bool,
185     ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
186         let is_apple_cc = call_conv.extends_apple_aarch64();
187         let is_baldrdash = call_conv.extends_baldrdash();
188         let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
189 
190         // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
191         //
192         // MacOS aarch64 is slightly different, see also
193         // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
194         // We are diverging from the MacOS aarch64 implementation in the
195         // following ways:
196         // - sign- and zero- extensions of data types less than 32 bits are not
197         // implemented yet.
198         // - we align the arguments stack space to a 16-bytes boundary, while
199         // the MacOS allows aligning only on 8 bytes. In practice it means we're
200         // slightly overallocating when calling, which is fine, and doesn't
201         // break our other invariants that the stack is always allocated in
202         // 16-bytes chunks.
203 
204         let mut next_xreg = 0;
205         let mut next_vreg = 0;
206         let mut next_stack: u64 = 0;
207         let mut ret = vec![];
208 
209         if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
210             // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
211             // caller TLS-register values, respectively.
212             next_stack = 16;
213         }
214 
215         let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
216             ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
217 
218             // Note on return values: on the regular ABI, we may return values
219             // in 8 registers for V128 and I64 registers independently of the
220             // number of register values returned in the other class. That is,
221             // we can return values in up to 8 integer and
222             // 8 vector registers at once.
223             //
224             // In Baldrdash and Wasmtime, we can only use one register for
225             // return value for all the register classes. That is, we can't
226             // return values in both one integer and one vector register; only
227             // one return value may be in a register.
228             ArgsOrRets::Rets => {
229                 if is_baldrdash || call_conv.extends_wasmtime() {
230                     (1, 1) // x0 or v0, but not both
231                 } else {
232                     (8, 16) // x0-x7 and v0-v7
233                 }
234             }
235         };
236 
237         for i in 0..params.len() {
238             // Process returns backward, according to the SpiderMonkey ABI (which we
239             // adopt internally if `is_baldrdash` is set).
240             let param = match (args_or_rets, is_baldrdash) {
241                 (ArgsOrRets::Args, _) => &params[i],
242                 (ArgsOrRets::Rets, false) => &params[i],
243                 (ArgsOrRets::Rets, true) => &params[params.len() - 1 - i],
244             };
245 
246             // Validate "purpose".
247             match &param.purpose {
248                 &ir::ArgumentPurpose::VMContext
249                 | &ir::ArgumentPurpose::Normal
250                 | &ir::ArgumentPurpose::StackLimit
251                 | &ir::ArgumentPurpose::SignatureId
252                 | &ir::ArgumentPurpose::CallerTLS
253                 | &ir::ArgumentPurpose::CalleeTLS
254                 | &ir::ArgumentPurpose::StructReturn
255                 | &ir::ArgumentPurpose::StructArgument(_) => {}
256                 _ => panic!(
257                     "Unsupported argument purpose {:?} in signature: {:?}",
258                     param.purpose, params
259                 ),
260             }
261 
262             assert!(
263                 legal_type_for_machine(param.value_type),
264                 "Invalid type for AArch64: {:?}",
265                 param.value_type
266             );
267 
268             let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
269 
270             if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
271                 assert!(rcs[0] == RegClass::I64);
272                 ret.push(param);
273                 continue;
274             }
275 
276             if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
277                 let offset = next_stack as i64;
278                 let size = size as u64;
279                 assert!(size % 8 == 0, "StructArgument size is not properly aligned");
280                 next_stack += size;
281                 ret.push(ABIArg::StructArg {
282                     offset,
283                     size,
284                     purpose: param.purpose,
285                 });
286                 continue;
287             }
288 
289             // Handle multi register params
290             //
291             // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
292             //
293             // For arguments with alignment of 16 we round up the the register number
294             // to the next even value. So we can never allocate for example an i128
295             // to X1 and X2, we have to skip one register and do X2, X3
296             // (Stage C.8)
297             // Note: The Apple ABI deviates a bit here. They don't respect Stage C.8
298             // and will happily allocate a i128 to X1 and X2
299             //
300             // For integer types with alignment of 16 we also have the additional
301             // restriction of passing the lower half in Xn and the upper half in Xn+1
302             // (Stage C.9)
303             //
304             // For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
305             //
306             // On the Apple ABI it is unspecified if we can spill half the value into the stack
307             // i.e load the lower half into x7 and the upper half into the stack
308             // LLVM does not seem to do this, so we are going to replicate that behaviour
309             let is_multi_reg = rcs.len() >= 2;
310             if is_multi_reg {
311                 assert!(
312                     rcs.len() == 2,
313                     "Unable to handle multi reg params with more than 2 regs"
314                 );
315                 assert!(
316                     rcs == &[RegClass::I64, RegClass::I64],
317                     "Unable to handle non i64 regs"
318                 );
319 
320                 let reg_class_space = max_per_class_reg_vals - next_xreg;
321                 let reg_space = remaining_reg_vals;
322 
323                 if reg_space >= 2 && reg_class_space >= 2 {
324                     // The aarch64 ABI does not allow us to start a split argument
325                     // at an odd numbered register. So we need to skip one register
326                     //
327                     // TODO: The Fast ABI should probably not skip the register
328                     if !is_apple_cc && next_xreg % 2 != 0 {
329                         next_xreg += 1;
330                     }
331 
332                     let lower_reg = xreg(next_xreg);
333                     let upper_reg = xreg(next_xreg + 1);
334 
335                     ret.push(ABIArg::Slots {
336                         slots: vec![
337                             ABIArgSlot::Reg {
338                                 reg: lower_reg.to_real_reg(),
339                                 ty: param.value_type,
340                                 extension: param.extension,
341                             },
342                             ABIArgSlot::Reg {
343                                 reg: upper_reg.to_real_reg(),
344                                 ty: param.value_type,
345                                 extension: param.extension,
346                             },
347                         ],
348                         purpose: param.purpose,
349                     });
350 
351                     next_xreg += 2;
352                     remaining_reg_vals -= 2;
353                     continue;
354                 }
355             } else {
356                 // Single Register parameters
357                 let rc = rcs[0];
358                 let next_reg = match rc {
359                     RegClass::I64 => &mut next_xreg,
360                     RegClass::V128 => &mut next_vreg,
361                     _ => panic!("Invalid register class: {:?}", rc),
362                 };
363 
364                 if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
365                     let reg = match rc {
366                         RegClass::I64 => xreg(*next_reg),
367                         RegClass::V128 => vreg(*next_reg),
368                         _ => unreachable!(),
369                     };
370                     ret.push(ABIArg::reg(
371                         reg.to_real_reg(),
372                         param.value_type,
373                         param.extension,
374                         param.purpose,
375                     ));
376                     *next_reg += 1;
377                     remaining_reg_vals -= 1;
378                     continue;
379                 }
380             }
381 
382             // Spill to the stack
383 
384             // Compute the stack slot's size.
385             let size = (ty_bits(param.value_type) / 8) as u64;
386 
387             let size = if is_apple_cc
388                 || (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets)
389             {
390                 // MacOS aarch64 and Wasmtime allow stack slots with
391                 // sizes less than 8 bytes. They still need to be
392                 // properly aligned on their natural data alignment,
393                 // though.
394                 size
395             } else {
396                 // Every arg takes a minimum slot of 8 bytes. (16-byte stack
397                 // alignment happens separately after all args.)
398                 std::cmp::max(size, 8)
399             };
400 
401             // Align the stack slot.
402             debug_assert!(size.is_power_of_two());
403             next_stack = align_to(next_stack, size);
404 
405             let slots = reg_types
406                 .iter()
407                 .copied()
408                 // Build the stack locations from each slot
409                 .scan(next_stack, |next_stack, ty| {
410                     let slot_offset = *next_stack as i64;
411                     *next_stack += (ty_bits(ty) / 8) as u64;
412 
413                     Some((ty, slot_offset))
414                 })
415                 .map(|(ty, offset)| ABIArgSlot::Stack {
416                     offset,
417                     ty,
418                     extension: param.extension,
419                 })
420                 .collect();
421 
422             ret.push(ABIArg::Slots {
423                 slots,
424                 purpose: param.purpose,
425             });
426 
427             next_stack += size;
428         }
429 
430         if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
431             ret.reverse();
432         }
433 
434         let extra_arg = if add_ret_area_ptr {
435             debug_assert!(args_or_rets == ArgsOrRets::Args);
436             if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
437                 ret.push(ABIArg::reg(
438                     xreg(next_xreg).to_real_reg(),
439                     I64,
440                     ir::ArgumentExtension::None,
441                     ir::ArgumentPurpose::Normal,
442                 ));
443             } else {
444                 ret.push(ABIArg::stack(
445                     next_stack as i64,
446                     I64,
447                     ir::ArgumentExtension::None,
448                     ir::ArgumentPurpose::Normal,
449                 ));
450                 next_stack += 8;
451             }
452             Some(ret.len() - 1)
453         } else {
454             None
455         };
456 
457         next_stack = align_to(next_stack, 16);
458 
459         // To avoid overflow issues, limit the arg/return size to something
460         // reasonable -- here, 128 MB.
461         if next_stack > STACK_ARG_RET_SIZE_LIMIT {
462             return Err(CodegenError::ImplLimitExceeded);
463         }
464 
465         Ok((ret, next_stack as i64, extra_arg))
466     }
467 
fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64468     fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 {
469         if call_conv.extends_baldrdash() {
470             let num_words = flags.baldrdash_prologue_words() as i64;
471             debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
472             debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned");
473             num_words * 8
474         } else {
475             16 // frame pointer + return address.
476         }
477     }
478 
gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst479     fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
480         Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
481     }
482 
gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst483     fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
484         Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
485     }
486 
gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst487     fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
488         Inst::gen_move(to_reg, from_reg, ty)
489     }
490 
gen_extend( to_reg: Writable<Reg>, from_reg: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Inst491     fn gen_extend(
492         to_reg: Writable<Reg>,
493         from_reg: Reg,
494         signed: bool,
495         from_bits: u8,
496         to_bits: u8,
497     ) -> Inst {
498         assert!(from_bits < to_bits);
499         Inst::Extend {
500             rd: to_reg,
501             rn: from_reg,
502             signed,
503             from_bits,
504             to_bits,
505         }
506     }
507 
gen_ret() -> Inst508     fn gen_ret() -> Inst {
509         Inst::Ret
510     }
511 
gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst>512     fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
513         let imm = imm as u64;
514         let mut insts = SmallVec::new();
515         if let Some(imm12) = Imm12::maybe_from_u64(imm) {
516             insts.push(Inst::AluRRImm12 {
517                 alu_op: ALUOp::Add64,
518                 rd: into_reg,
519                 rn: from_reg,
520                 imm12,
521             });
522         } else {
523             let scratch2 = writable_tmp2_reg();
524             assert_ne!(scratch2.to_reg(), from_reg);
525             insts.extend(Inst::load_constant(scratch2, imm.into()));
526             insts.push(Inst::AluRRRExtend {
527                 alu_op: ALUOp::Add64,
528                 rd: into_reg,
529                 rn: from_reg,
530                 rm: scratch2.to_reg(),
531                 extendop: ExtendOp::UXTX,
532             });
533         }
534         insts
535     }
536 
gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst>537     fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
538         let mut insts = SmallVec::new();
539         insts.push(Inst::AluRRRExtend {
540             alu_op: ALUOp::SubS64,
541             rd: writable_zero_reg(),
542             rn: stack_reg(),
543             rm: limit_reg,
544             extendop: ExtendOp::UXTX,
545         });
546         insts.push(Inst::TrapIf {
547             trap_code: ir::TrapCode::StackOverflow,
548             // Here `Lo` == "less than" when interpreting the two
549             // operands as unsigned integers.
550             kind: CondBrKind::Cond(Cond::Lo),
551         });
552         insts
553     }
554 
gen_epilogue_placeholder() -> Inst555     fn gen_epilogue_placeholder() -> Inst {
556         Inst::EpiloguePlaceholder
557     }
558 
gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst559     fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
560         let mem = mem.into();
561         Inst::LoadAddr { rd: into_reg, mem }
562     }
563 
get_stacklimit_reg() -> Reg564     fn get_stacklimit_reg() -> Reg {
565         spilltmp_reg()
566     }
567 
gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst568     fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
569         let mem = AMode::RegOffset(base, offset as i64, ty);
570         Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
571     }
572 
gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst573     fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
574         let mem = AMode::RegOffset(base, offset as i64, ty);
575         Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
576     }
577 
gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst>578     fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
579         if amount == 0 {
580             return SmallVec::new();
581         }
582 
583         let (amount, is_sub) = if amount > 0 {
584             (amount as u64, false)
585         } else {
586             (-amount as u64, true)
587         };
588 
589         let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
590 
591         let mut ret = SmallVec::new();
592         if let Some(imm12) = Imm12::maybe_from_u64(amount) {
593             let adj_inst = Inst::AluRRImm12 {
594                 alu_op,
595                 rd: writable_stack_reg(),
596                 rn: stack_reg(),
597                 imm12,
598             };
599             ret.push(adj_inst);
600         } else {
601             let tmp = writable_spilltmp_reg();
602             let const_inst = Inst::load_constant(tmp, amount);
603             let adj_inst = Inst::AluRRRExtend {
604                 alu_op,
605                 rd: writable_stack_reg(),
606                 rn: stack_reg(),
607                 rm: tmp.to_reg(),
608                 extendop: ExtendOp::UXTX,
609             };
610             ret.extend(const_inst);
611             ret.push(adj_inst);
612         }
613         ret
614     }
615 
gen_nominal_sp_adj(offset: i32) -> Inst616     fn gen_nominal_sp_adj(offset: i32) -> Inst {
617         Inst::VirtualSPOffsetAdj {
618             offset: offset as i64,
619         }
620     }
621 
gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst>622     fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst> {
623         let mut insts = SmallVec::new();
624 
625         if flags.unwind_info() {
626             insts.push(Inst::Unwind {
627                 inst: UnwindInst::Aarch64SetPointerAuth {
628                     return_addresses: false,
629                 },
630             });
631         }
632 
633         // stp fp (x29), lr (x30), [sp, #-16]!
634         insts.push(Inst::StoreP64 {
635             rt: fp_reg(),
636             rt2: link_reg(),
637             mem: PairAMode::PreIndexed(
638                 writable_stack_reg(),
639                 SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
640             ),
641             flags: MemFlags::trusted(),
642         });
643 
644         if flags.unwind_info() {
645             insts.push(Inst::Unwind {
646                 inst: UnwindInst::PushFrameRegs {
647                     offset_upward_to_caller_sp: 16, // FP, LR
648                 },
649             });
650         }
651 
652         // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
653         // the usual encoding (`ORR`) does not work with SP.
654         insts.push(Inst::AluRRImm12 {
655             alu_op: ALUOp::Add64,
656             rd: writable_fp_reg(),
657             rn: stack_reg(),
658             imm12: Imm12 {
659                 bits: 0,
660                 shift12: false,
661             },
662         });
663         insts
664     }
665 
gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst>666     fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
667         let mut insts = SmallVec::new();
668 
669         // N.B.: sp is already adjusted to the appropriate place by the
670         // clobber-restore code (which also frees the fixed frame). Hence, there
671         // is no need for the usual `mov sp, fp` here.
672 
673         // `ldp fp, lr, [sp], #16`
674         insts.push(Inst::LoadP64 {
675             rt: writable_fp_reg(),
676             rt2: writable_link_reg(),
677             mem: PairAMode::PostIndexed(
678                 writable_stack_reg(),
679                 SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
680             ),
681             flags: MemFlags::trusted(),
682         });
683         insts
684     }
685 
gen_probestack(_: u32) -> SmallInstVec<Self::I>686     fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
687         // TODO: implement if we ever require stack probes on an AArch64 host
688         // (unlikely unless Lucet is ported)
689         smallvec![]
690     }
691 
692     // Returns stack bytes used as well as instructions. Does not adjust
693     // nominal SP offset; abi_impl generic code will do that.
gen_clobber_save( call_conv: isa::CallConv, flags: &settings::Flags, clobbers: &Set<Writable<RealReg>>, fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>)694     fn gen_clobber_save(
695         call_conv: isa::CallConv,
696         flags: &settings::Flags,
697         clobbers: &Set<Writable<RealReg>>,
698         fixed_frame_storage_size: u32,
699         _outgoing_args_size: u32,
700     ) -> (u64, SmallVec<[Inst; 16]>) {
701         let mut insts = SmallVec::new();
702         let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
703 
704         let (int_save_bytes, vec_save_bytes) =
705             saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
706         let total_save_bytes = int_save_bytes + vec_save_bytes;
707         let clobber_size = total_save_bytes as i32;
708 
709         if flags.unwind_info() {
710             // The *unwind* frame (but not the actual frame) starts at the
711             // clobbers, just below the saved FP/LR pair.
712             insts.push(Inst::Unwind {
713                 inst: UnwindInst::DefineNewFrame {
714                     offset_downward_to_clobbers: clobber_size as u32,
715                     offset_upward_to_caller_sp: 16, // FP, LR
716                 },
717             });
718         }
719 
720         // We use pre-indexed addressing modes here, rather than the possibly
721         // more efficient "subtract sp once then used fixed offsets" scheme,
722         // because (i) we cannot necessarily guarantee that the offset of a
723         // clobber-save slot will be within a SImm7Scaled (+504-byte) offset
724         // range of the whole frame including other slots, it is more complex to
725         // conditionally generate a two-stage SP adjustment (clobbers then fixed
726         // frame) otherwise, and generally we just want to maintain simplicity
727         // here for maintainability.  Because clobbers are at the top of the
728         // frame, just below FP, all that is necessary is to use the pre-indexed
729         // "push" `[sp, #-16]!` addressing mode.
730         //
731         // `frame_offset` tracks offset above start-of-clobbers for unwind-info
732         // purposes.
733         let mut clobber_offset = clobber_size as u32;
734         let clobber_offset_change = 16;
735         let iter = clobbered_int.chunks_exact(2);
736 
737         if let [rd] = iter.remainder() {
738             let rd = rd.to_reg().to_reg();
739 
740             debug_assert_eq!(rd.get_class(), RegClass::I64);
741             // str rd, [sp, #-16]!
742             insts.push(Inst::Store64 {
743                 rd,
744                 mem: AMode::PreIndexed(
745                     writable_stack_reg(),
746                     SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
747                 ),
748                 flags: MemFlags::trusted(),
749             });
750 
751             if flags.unwind_info() {
752                 clobber_offset -= clobber_offset_change as u32;
753                 insts.push(Inst::Unwind {
754                     inst: UnwindInst::SaveReg {
755                         clobber_offset,
756                         reg: rd.to_real_reg(),
757                     },
758                 });
759             }
760         }
761 
762         let mut iter = iter.rev();
763 
764         while let Some([rt, rt2]) = iter.next() {
765             // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
766             let rt = rt.to_reg().to_reg();
767             let rt2 = rt2.to_reg().to_reg();
768 
769             debug_assert!(rt.get_class() == RegClass::I64);
770             debug_assert!(rt2.get_class() == RegClass::I64);
771 
772             // stp rt, rt2, [sp, #-16]!
773             insts.push(Inst::StoreP64 {
774                 rt,
775                 rt2,
776                 mem: PairAMode::PreIndexed(
777                     writable_stack_reg(),
778                     SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
779                 ),
780                 flags: MemFlags::trusted(),
781             });
782 
783             if flags.unwind_info() {
784                 clobber_offset -= clobber_offset_change as u32;
785                 insts.push(Inst::Unwind {
786                     inst: UnwindInst::SaveReg {
787                         clobber_offset,
788                         reg: rt.to_real_reg(),
789                     },
790                 });
791                 insts.push(Inst::Unwind {
792                     inst: UnwindInst::SaveReg {
793                         clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
794                         reg: rt2.to_real_reg(),
795                     },
796                 });
797             }
798         }
799 
800         let store_vec_reg = |rd| {
801             if call_conv.extends_baldrdash() {
802                 Inst::FpuStore128 {
803                     rd,
804                     mem: AMode::PreIndexed(
805                         writable_stack_reg(),
806                         SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
807                     ),
808                     flags: MemFlags::trusted(),
809                 }
810             } else {
811                 Inst::FpuStore64 {
812                     rd,
813                     mem: AMode::PreIndexed(
814                         writable_stack_reg(),
815                         SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
816                     ),
817                     flags: MemFlags::trusted(),
818                 }
819             }
820         };
821         let iter = clobbered_vec.chunks_exact(2);
822 
823         if let [rd] = iter.remainder() {
824             let rd = rd.to_reg().to_reg();
825 
826             debug_assert_eq!(rd.get_class(), RegClass::V128);
827             insts.push(store_vec_reg(rd));
828 
829             if flags.unwind_info() {
830                 clobber_offset -= clobber_offset_change as u32;
831                 insts.push(Inst::Unwind {
832                     inst: UnwindInst::SaveReg {
833                         clobber_offset,
834                         reg: rd.to_real_reg(),
835                     },
836                 });
837             }
838         }
839 
840         let store_vec_reg_pair = |rt, rt2| {
841             if call_conv.extends_baldrdash() {
842                 let clobber_offset_change = 32;
843 
844                 (
845                     Inst::FpuStoreP128 {
846                         rt,
847                         rt2,
848                         mem: PairAMode::PreIndexed(
849                             writable_stack_reg(),
850                             SImm7Scaled::maybe_from_i64(-clobber_offset_change, I8X16).unwrap(),
851                         ),
852                         flags: MemFlags::trusted(),
853                     },
854                     clobber_offset_change as u32,
855                 )
856             } else {
857                 let clobber_offset_change = 16;
858 
859                 (
860                     Inst::FpuStoreP64 {
861                         rt,
862                         rt2,
863                         mem: PairAMode::PreIndexed(
864                             writable_stack_reg(),
865                             SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
866                         ),
867                         flags: MemFlags::trusted(),
868                     },
869                     clobber_offset_change as u32,
870                 )
871             }
872         };
873         let mut iter = iter.rev();
874 
875         while let Some([rt, rt2]) = iter.next() {
876             let rt = rt.to_reg().to_reg();
877             let rt2 = rt2.to_reg().to_reg();
878 
879             debug_assert_eq!(rt.get_class(), RegClass::V128);
880             debug_assert_eq!(rt2.get_class(), RegClass::V128);
881 
882             let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
883 
884             insts.push(inst);
885 
886             if flags.unwind_info() {
887                 clobber_offset -= clobber_offset_change;
888                 insts.push(Inst::Unwind {
889                     inst: UnwindInst::SaveReg {
890                         clobber_offset,
891                         reg: rt.to_real_reg(),
892                     },
893                 });
894                 insts.push(Inst::Unwind {
895                     inst: UnwindInst::SaveReg {
896                         clobber_offset: clobber_offset + clobber_offset_change / 2,
897                         reg: rt2.to_real_reg(),
898                     },
899                 });
900             }
901         }
902 
903         // Allocate the fixed frame below the clobbers if necessary.
904         if fixed_frame_storage_size > 0 {
905             insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)));
906         }
907 
908         (total_save_bytes as u64, insts)
909     }
910 
gen_clobber_restore( call_conv: isa::CallConv, flags: &settings::Flags, clobbers: &Set<Writable<RealReg>>, fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]>911     fn gen_clobber_restore(
912         call_conv: isa::CallConv,
913         flags: &settings::Flags,
914         clobbers: &Set<Writable<RealReg>>,
915         fixed_frame_storage_size: u32,
916         _outgoing_args_size: u32,
917     ) -> SmallVec<[Inst; 16]> {
918         let mut insts = SmallVec::new();
919         let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
920 
921         // Free the fixed frame if necessary.
922         if fixed_frame_storage_size > 0 {
923             insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32));
924         }
925 
926         let load_vec_reg = |rd| {
927             if call_conv.extends_baldrdash() {
928                 Inst::FpuLoad128 {
929                     rd,
930                     mem: AMode::PostIndexed(
931                         writable_stack_reg(),
932                         SImm9::maybe_from_i64(16).unwrap(),
933                     ),
934                     flags: MemFlags::trusted(),
935                 }
936             } else {
937                 Inst::FpuLoad64 {
938                     rd,
939                     mem: AMode::PostIndexed(
940                         writable_stack_reg(),
941                         SImm9::maybe_from_i64(16).unwrap(),
942                     ),
943                     flags: MemFlags::trusted(),
944                 }
945             }
946         };
947         let load_vec_reg_pair = |rt, rt2| {
948             if call_conv.extends_baldrdash() {
949                 Inst::FpuLoadP128 {
950                     rt,
951                     rt2,
952                     mem: PairAMode::PostIndexed(
953                         writable_stack_reg(),
954                         SImm7Scaled::maybe_from_i64(32, I8X16).unwrap(),
955                     ),
956                     flags: MemFlags::trusted(),
957                 }
958             } else {
959                 Inst::FpuLoadP64 {
960                     rt,
961                     rt2,
962                     mem: PairAMode::PostIndexed(
963                         writable_stack_reg(),
964                         SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
965                     ),
966                     flags: MemFlags::trusted(),
967                 }
968             }
969         };
970 
971         let mut iter = clobbered_vec.chunks_exact(2);
972 
973         while let Some([rt, rt2]) = iter.next() {
974             let rt = rt.map(|r| r.to_reg());
975             let rt2 = rt2.map(|r| r.to_reg());
976 
977             debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128);
978             debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128);
979             insts.push(load_vec_reg_pair(rt, rt2));
980         }
981 
982         debug_assert!(iter.remainder().len() <= 1);
983 
984         if let [rd] = iter.remainder() {
985             let rd = rd.map(|r| r.to_reg());
986 
987             debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128);
988             insts.push(load_vec_reg(rd));
989         }
990 
991         let mut iter = clobbered_int.chunks_exact(2);
992 
993         while let Some([rt, rt2]) = iter.next() {
994             let rt = rt.map(|r| r.to_reg());
995             let rt2 = rt2.map(|r| r.to_reg());
996 
997             debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64);
998             debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64);
999             // ldp rt, rt2, [sp], #16
1000             insts.push(Inst::LoadP64 {
1001                 rt,
1002                 rt2,
1003                 mem: PairAMode::PostIndexed(
1004                     writable_stack_reg(),
1005                     SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
1006                 ),
1007                 flags: MemFlags::trusted(),
1008             });
1009         }
1010 
1011         debug_assert!(iter.remainder().len() <= 1);
1012 
1013         if let [rd] = iter.remainder() {
1014             let rd = rd.map(|r| r.to_reg());
1015 
1016             debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64);
1017             // ldr rd, [sp], #16
1018             insts.push(Inst::ULoad64 {
1019                 rd,
1020                 mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
1021                 flags: MemFlags::trusted(),
1022             });
1023         }
1024 
1025         // If this is Baldrdash-2020, restore the callee (i.e., our) TLS
1026         // register. We may have allocated it for something else and clobbered
1027         // it, but the ABI expects us to leave the TLS register unchanged.
1028         if call_conv == isa::CallConv::Baldrdash2020 {
1029             let off = BALDRDASH_CALLEE_TLS_OFFSET + Self::fp_to_arg_offset(call_conv, flags);
1030             insts.push(Inst::gen_load(
1031                 writable_xreg(BALDRDASH_TLS_REG),
1032                 AMode::UnsignedOffset(fp_reg(), UImm12Scaled::maybe_from_i64(off, I64).unwrap()),
1033                 I64,
1034                 MemFlags::trusted(),
1035             ));
1036         }
1037 
1038         insts
1039     }
1040 
gen_call( dest: &CallDest, uses: Vec<Reg>, defs: Vec<Writable<Reg>>, opcode: ir::Opcode, tmp: Writable<Reg>, callee_conv: isa::CallConv, caller_conv: isa::CallConv, ) -> SmallVec<[(InstIsSafepoint, Inst); 2]>1041     fn gen_call(
1042         dest: &CallDest,
1043         uses: Vec<Reg>,
1044         defs: Vec<Writable<Reg>>,
1045         opcode: ir::Opcode,
1046         tmp: Writable<Reg>,
1047         callee_conv: isa::CallConv,
1048         caller_conv: isa::CallConv,
1049     ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
1050         let mut insts = SmallVec::new();
1051         match &dest {
1052             &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
1053                 InstIsSafepoint::Yes,
1054                 Inst::Call {
1055                     info: Box::new(CallInfo {
1056                         dest: name.clone(),
1057                         uses,
1058                         defs,
1059                         opcode,
1060                         caller_callconv: caller_conv,
1061                         callee_callconv: callee_conv,
1062                     }),
1063                 },
1064             )),
1065             &CallDest::ExtName(ref name, RelocDistance::Far) => {
1066                 insts.push((
1067                     InstIsSafepoint::No,
1068                     Inst::LoadExtName {
1069                         rd: tmp,
1070                         name: Box::new(name.clone()),
1071                         offset: 0,
1072                     },
1073                 ));
1074                 insts.push((
1075                     InstIsSafepoint::Yes,
1076                     Inst::CallInd {
1077                         info: Box::new(CallIndInfo {
1078                             rn: tmp.to_reg(),
1079                             uses,
1080                             defs,
1081                             opcode,
1082                             caller_callconv: caller_conv,
1083                             callee_callconv: callee_conv,
1084                         }),
1085                     },
1086                 ));
1087             }
1088             &CallDest::Reg(reg) => insts.push((
1089                 InstIsSafepoint::Yes,
1090                 Inst::CallInd {
1091                     info: Box::new(CallIndInfo {
1092                         rn: *reg,
1093                         uses,
1094                         defs,
1095                         opcode,
1096                         caller_callconv: caller_conv,
1097                         callee_callconv: callee_conv,
1098                     }),
1099                 },
1100             )),
1101         }
1102 
1103         insts
1104     }
1105 
gen_memcpy( call_conv: isa::CallConv, dst: Reg, src: Reg, size: usize, ) -> SmallVec<[Self::I; 8]>1106     fn gen_memcpy(
1107         call_conv: isa::CallConv,
1108         dst: Reg,
1109         src: Reg,
1110         size: usize,
1111     ) -> SmallVec<[Self::I; 8]> {
1112         // Baldrdash should not use struct args.
1113         assert!(!call_conv.extends_baldrdash());
1114         let mut insts = SmallVec::new();
1115         let arg0 = writable_xreg(0);
1116         let arg1 = writable_xreg(1);
1117         let arg2 = writable_xreg(2);
1118         insts.push(Inst::gen_move(arg0, dst, I64));
1119         insts.push(Inst::gen_move(arg1, src, I64));
1120         insts.extend(Inst::load_constant(arg2, size as u64).into_iter());
1121         insts.push(Inst::Call {
1122             info: Box::new(CallInfo {
1123                 dest: ExternalName::LibCall(LibCall::Memcpy),
1124                 uses: vec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()],
1125                 defs: Self::get_regs_clobbered_by_call(call_conv),
1126                 opcode: Opcode::Call,
1127                 caller_callconv: call_conv,
1128                 callee_callconv: call_conv,
1129             }),
1130         });
1131         insts
1132     }
1133 
get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u321134     fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
1135         // We allocate in terms of 8-byte slots.
1136         match (rc, ty) {
1137             (RegClass::I64, _) => 1,
1138             (RegClass::V128, F32) | (RegClass::V128, F64) => 1,
1139             (RegClass::V128, _) => 2,
1140             _ => panic!("Unexpected register class!"),
1141         }
1142     }
1143 
1144     /// Get the current virtual-SP offset from an instruction-emission state.
get_virtual_sp_offset_from_state(s: &EmitState) -> i641145     fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
1146         s.virtual_sp_offset
1147     }
1148 
1149     /// Get the nominal-SP-to-FP offset from an instruction-emission state.
get_nominal_sp_to_fp(s: &EmitState) -> i641150     fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
1151         s.nominal_sp_to_fp
1152     }
1153 
get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>>1154     fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
1155         let mut caller_saved = Vec::new();
1156         for i in 0..29 {
1157             let x = writable_xreg(i);
1158             if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
1159                 caller_saved.push(x);
1160             }
1161         }
1162         for i in 0..32 {
1163             let v = writable_vreg(i);
1164             if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
1165                 caller_saved.push(v);
1166             }
1167         }
1168         caller_saved
1169     }
1170 
get_ext_mode( call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension1171     fn get_ext_mode(
1172         call_conv: isa::CallConv,
1173         specified: ir::ArgumentExtension,
1174     ) -> ir::ArgumentExtension {
1175         if call_conv.extends_baldrdash() {
1176             // Baldrdash (SpiderMonkey) always extends args and return values to the full register.
1177             specified
1178         } else {
1179             // No other supported ABI on AArch64 does so.
1180             ir::ArgumentExtension::None
1181         }
1182     }
1183 }
1184 
1185 /// Is this type supposed to be seen on this machine? E.g. references of the
1186 /// wrong width are invalid.
legal_type_for_machine(ty: Type) -> bool1187 fn legal_type_for_machine(ty: Type) -> bool {
1188     match ty {
1189         R32 => false,
1190         _ => true,
1191     }
1192 }
1193 
1194 /// Is the given register saved in the prologue if clobbered, i.e., is it a
1195 /// callee-save?
is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool1196 fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
1197     if call_conv.extends_baldrdash() {
1198         match r.get_class() {
1199             RegClass::I64 => {
1200                 let enc = r.get_hw_encoding();
1201                 return BALDRDASH_JIT_CALLEE_SAVED_GPR[enc];
1202             }
1203             RegClass::V128 => {
1204                 let enc = r.get_hw_encoding();
1205                 return BALDRDASH_JIT_CALLEE_SAVED_FPU[enc];
1206             }
1207             _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
1208         };
1209     }
1210 
1211     match r.get_class() {
1212         RegClass::I64 => {
1213             // x19 - x28 inclusive are callee-saves.
1214             r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28
1215         }
1216         RegClass::V128 => {
1217             // v8 - v15 inclusive are callee-saves.
1218             r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
1219         }
1220         _ => panic!("Unexpected RegClass"),
1221     }
1222 }
1223 
1224 /// Return the set of all integer and vector registers that must be saved in the
1225 /// prologue and restored in the epilogue, given the set of all registers
1226 /// written by the function's body.
get_regs_saved_in_prologue( call_conv: isa::CallConv, regs: &Set<Writable<RealReg>>, ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>)1227 fn get_regs_saved_in_prologue(
1228     call_conv: isa::CallConv,
1229     regs: &Set<Writable<RealReg>>,
1230 ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
1231     let mut int_saves = vec![];
1232     let mut vec_saves = vec![];
1233     for &reg in regs.iter() {
1234         if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
1235             match reg.to_reg().get_class() {
1236                 RegClass::I64 => int_saves.push(reg),
1237                 RegClass::V128 => vec_saves.push(reg),
1238                 _ => panic!("Unexpected RegClass"),
1239             }
1240         }
1241     }
1242     // Sort registers for deterministic code output. We can do an unstable sort because the
1243     // registers will be unique (there are no dups).
1244     int_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
1245     vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
1246     (int_saves, vec_saves)
1247 }
1248 
is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool1249 fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool {
1250     if call_conv_of_callee.extends_baldrdash() {
1251         match r.get_class() {
1252             RegClass::I64 => {
1253                 let enc = r.get_hw_encoding();
1254                 if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
1255                     return true;
1256                 }
1257                 // Otherwise, fall through to preserve native's ABI caller-saved.
1258             }
1259             RegClass::V128 => {
1260                 let enc = r.get_hw_encoding();
1261                 if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
1262                     return true;
1263                 }
1264                 // Otherwise, fall through to preserve native's ABI caller-saved.
1265             }
1266             _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
1267         };
1268     }
1269 
1270     match r.get_class() {
1271         RegClass::I64 => {
1272             // x0 - x17 inclusive are caller-saves.
1273             r.get_hw_encoding() <= 17
1274         }
1275         RegClass::V128 => {
1276             // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The
1277             // upper 64 bits of v8 - v15 inclusive are also caller-saves.
1278             // However, because we cannot currently represent partial registers
1279             // to regalloc.rs, we indicate here that every vector register is
1280             // caller-save. Because this function is used at *callsites*,
1281             // approximating in this direction (save more than necessary) is
1282             // conservative and thus safe.
1283             //
1284             // Note that we set the 'not included in clobber set' flag in the
1285             // regalloc.rs API when a call instruction's callee has the same ABI
1286             // as the caller (the current function body); this is safe (anything
1287             // clobbered by callee can be clobbered by caller as well) and
1288             // avoids unnecessary saves of v8-v15 in the prologue even though we
1289             // include them as defs here.
1290             true
1291         }
1292         _ => panic!("Unexpected RegClass"),
1293     }
1294 }
1295