1 //! AArch64 ISA: binary code emission.
2 
3 use crate::binemit::{CodeOffset, Reloc, StackMap};
4 use crate::ir::constant::ConstantData;
5 use crate::ir::types::*;
6 use crate::ir::{LibCall, MemFlags, TrapCode};
7 use crate::isa::aarch64::inst::*;
8 use crate::machinst::ty_bits;
9 
10 use regalloc::{Reg, RegClass, Writable};
11 
12 use core::convert::TryFrom;
13 
14 /// Memory label/reference finalization: convert a MemLabel to a PC-relative
15 /// offset, possibly emitting relocation(s) as necessary.
memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i3216 pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
17     match label {
18         &MemLabel::PCRel(rel) => rel,
19     }
20 }
21 
22 /// Memory addressing mode finalization: convert "special" modes (e.g.,
23 /// generic arbitrary stack offset) into real addressing modes, possibly by
24 /// emitting some helper instructions that come immediately before the use
25 /// of this amode.
mem_finalize( insn_off: CodeOffset, mem: &AMode, state: &EmitState, ) -> (SmallVec<[Inst; 4]>, AMode)26 pub fn mem_finalize(
27     insn_off: CodeOffset,
28     mem: &AMode,
29     state: &EmitState,
30 ) -> (SmallVec<[Inst; 4]>, AMode) {
31     match mem {
32         &AMode::RegOffset(_, off, ty)
33         | &AMode::SPOffset(off, ty)
34         | &AMode::FPOffset(off, ty)
35         | &AMode::NominalSPOffset(off, ty) => {
36             let basereg = match mem {
37                 &AMode::RegOffset(reg, _, _) => reg,
38                 &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(),
39                 &AMode::FPOffset(..) => fp_reg(),
40                 _ => unreachable!(),
41             };
42             let adj = match mem {
43                 &AMode::NominalSPOffset(..) => {
44                     log::trace!(
45                         "mem_finalize: nominal SP offset {} + adj {} -> {}",
46                         off,
47                         state.virtual_sp_offset,
48                         off + state.virtual_sp_offset
49                     );
50                     state.virtual_sp_offset
51                 }
52                 _ => 0,
53             };
54             let off = off + adj;
55 
56             if let Some(simm9) = SImm9::maybe_from_i64(off) {
57                 let mem = AMode::Unscaled(basereg, simm9);
58                 (smallvec![], mem)
59             } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
60                 let mem = AMode::UnsignedOffset(basereg, uimm12s);
61                 (smallvec![], mem)
62             } else {
63                 let tmp = writable_spilltmp_reg();
64                 let mut const_insts = Inst::load_constant(tmp, off as u64);
65                 // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
66                 // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
67                 // is a valid base (for SPOffset) which we must handle here.
68                 // Also, SP needs to be the first arg, not second.
69                 let add_inst = Inst::AluRRRExtend {
70                     alu_op: ALUOp::Add64,
71                     rd: tmp,
72                     rn: basereg,
73                     rm: tmp.to_reg(),
74                     extendop: ExtendOp::UXTX,
75                 };
76                 const_insts.push(add_inst);
77                 (const_insts, AMode::reg(tmp.to_reg()))
78             }
79         }
80 
81         &AMode::Label(ref label) => {
82             let off = memlabel_finalize(insn_off, label);
83             (smallvec![], AMode::Label(MemLabel::PCRel(off)))
84         }
85 
86         _ => (smallvec![], mem.clone()),
87     }
88 }
89 
90 /// Helper: get a ConstantData from a u64.
u64_constant(bits: u64) -> ConstantData91 pub fn u64_constant(bits: u64) -> ConstantData {
92     let data = bits.to_le_bytes();
93     ConstantData::from(&data[..])
94 }
95 
96 //=============================================================================
97 // Instructions and subcomponents: emission
98 
machreg_to_gpr(m: Reg) -> u3299 fn machreg_to_gpr(m: Reg) -> u32 {
100     assert_eq!(m.get_class(), RegClass::I64);
101     u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
102 }
103 
machreg_to_vec(m: Reg) -> u32104 fn machreg_to_vec(m: Reg) -> u32 {
105     assert_eq!(m.get_class(), RegClass::V128);
106     u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
107 }
108 
machreg_to_gpr_or_vec(m: Reg) -> u32109 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
110     u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
111 }
112 
enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32113 fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
114     (bits_31_21 << 21)
115         | (bits_15_10 << 10)
116         | machreg_to_gpr(rd.to_reg())
117         | (machreg_to_gpr(rn) << 5)
118         | (machreg_to_gpr(rm) << 16)
119 }
120 
enc_arith_rr_imm12( bits_31_24: u32, immshift: u32, imm12: u32, rn: Reg, rd: Writable<Reg>, ) -> u32121 fn enc_arith_rr_imm12(
122     bits_31_24: u32,
123     immshift: u32,
124     imm12: u32,
125     rn: Reg,
126     rd: Writable<Reg>,
127 ) -> u32 {
128     (bits_31_24 << 24)
129         | (immshift << 22)
130         | (imm12 << 10)
131         | (machreg_to_gpr(rn) << 5)
132         | machreg_to_gpr(rd.to_reg())
133 }
134 
enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32135 fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
136     (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
137 }
138 
enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32139 fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
140     (top11 << 21)
141         | (machreg_to_gpr(rm) << 16)
142         | (bit15 << 15)
143         | (machreg_to_gpr(ra) << 10)
144         | (machreg_to_gpr(rn) << 5)
145         | machreg_to_gpr(rd.to_reg())
146 }
147 
enc_jump26(op_31_26: u32, off_26_0: u32) -> u32148 fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
149     assert!(off_26_0 < (1 << 26));
150     (op_31_26 << 26) | off_26_0
151 }
152 
enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32153 fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
154     assert!(off_18_0 < (1 << 19));
155     (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
156 }
157 
enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32158 fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
159     assert!(off_18_0 < (1 << 19));
160     assert!(cond < (1 << 4));
161     (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
162 }
163 
enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32164 fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
165     match kind {
166         CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
167         CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
168         CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
169     }
170 }
171 
172 const MOVE_WIDE_FIXED: u32 = 0x12800000;
173 
174 #[repr(u32)]
175 enum MoveWideOpcode {
176     MOVN = 0b00,
177     MOVZ = 0b10,
178     MOVK = 0b11,
179 }
180 
enc_move_wide( op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize, ) -> u32181 fn enc_move_wide(
182     op: MoveWideOpcode,
183     rd: Writable<Reg>,
184     imm: MoveWideConst,
185     size: OperandSize,
186 ) -> u32 {
187     assert!(imm.shift <= 0b11);
188     MOVE_WIDE_FIXED
189         | size.sf_bit() << 31
190         | (op as u32) << 29
191         | u32::from(imm.shift) << 21
192         | u32::from(imm.bits) << 5
193         | machreg_to_gpr(rd.to_reg())
194 }
195 
enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32196 fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
197     (op_31_22 << 22)
198         | (simm7.bits() << 15)
199         | (machreg_to_gpr(rt2) << 10)
200         | (machreg_to_gpr(rn) << 5)
201         | machreg_to_gpr(rt)
202 }
203 
enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32204 fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
205     (op_31_22 << 22)
206         | (simm9.bits() << 12)
207         | (op_11_10 << 10)
208         | (machreg_to_gpr(rn) << 5)
209         | machreg_to_gpr_or_vec(rd)
210 }
211 
enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32212 fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
213     (op_31_22 << 22)
214         | (0b1 << 24)
215         | (uimm12.bits() << 10)
216         | (machreg_to_gpr(rn) << 5)
217         | machreg_to_gpr_or_vec(rd)
218 }
219 
enc_ldst_reg( op_31_22: u32, rn: Reg, rm: Reg, s_bit: bool, extendop: Option<ExtendOp>, rd: Reg, ) -> u32220 fn enc_ldst_reg(
221     op_31_22: u32,
222     rn: Reg,
223     rm: Reg,
224     s_bit: bool,
225     extendop: Option<ExtendOp>,
226     rd: Reg,
227 ) -> u32 {
228     let s_bit = if s_bit { 1 } else { 0 };
229     let extend_bits = match extendop {
230         Some(ExtendOp::UXTW) => 0b010,
231         Some(ExtendOp::SXTW) => 0b110,
232         Some(ExtendOp::SXTX) => 0b111,
233         None => 0b011, // LSL
234         _ => panic!("bad extend mode for ld/st AMode"),
235     };
236     (op_31_22 << 22)
237         | (1 << 21)
238         | (machreg_to_gpr(rm) << 16)
239         | (extend_bits << 13)
240         | (s_bit << 12)
241         | (0b10 << 10)
242         | (machreg_to_gpr(rn) << 5)
243         | machreg_to_gpr_or_vec(rd)
244 }
245 
enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32246 fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
247     (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
248 }
249 
enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32250 fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
251     debug_assert_eq!(q & 0b1, q);
252     debug_assert_eq!(size & 0b11, size);
253     0b0_0_0011010_10_00000_110_0_00_00000_00000
254         | q << 30
255         | size << 10
256         | machreg_to_gpr(rn) << 5
257         | machreg_to_vec(rt.to_reg())
258 }
259 
enc_ldst_vec_pair( opc: u32, amode: u32, is_load: bool, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg, ) -> u32260 fn enc_ldst_vec_pair(
261     opc: u32,
262     amode: u32,
263     is_load: bool,
264     simm7: SImm7Scaled,
265     rn: Reg,
266     rt: Reg,
267     rt2: Reg,
268 ) -> u32 {
269     debug_assert_eq!(opc & 0b11, opc);
270     debug_assert_eq!(amode & 0b11, amode);
271 
272     0b00_10110_00_0_0000000_00000_00000_00000
273         | opc << 30
274         | amode << 23
275         | (is_load as u32) << 22
276         | simm7.bits() << 15
277         | machreg_to_vec(rt2) << 10
278         | machreg_to_gpr(rn) << 5
279         | machreg_to_vec(rt)
280 }
281 
enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32282 fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
283     (top11 << 21)
284         | (machreg_to_vec(rm) << 16)
285         | (bit15_10 << 10)
286         | (machreg_to_vec(rn) << 5)
287         | machreg_to_vec(rd.to_reg())
288 }
289 
enc_vec_rrr_long( q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>, ) -> u32290 fn enc_vec_rrr_long(
291     q: u32,
292     u: u32,
293     size: u32,
294     bit14: u32,
295     rm: Reg,
296     rn: Reg,
297     rd: Writable<Reg>,
298 ) -> u32 {
299     debug_assert_eq!(q & 0b1, q);
300     debug_assert_eq!(u & 0b1, u);
301     debug_assert_eq!(size & 0b11, size);
302     debug_assert_eq!(bit14 & 0b1, bit14);
303 
304     0b0_0_0_01110_00_1_00000_100000_00000_00000
305         | q << 30
306         | u << 29
307         | size << 22
308         | bit14 << 14
309         | (machreg_to_vec(rm) << 16)
310         | (machreg_to_vec(rn) << 5)
311         | machreg_to_vec(rd.to_reg())
312 }
313 
enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32314 fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
315     (0b01011010110 << 21)
316         | size << 31
317         | opcode2 << 16
318         | opcode1 << 10
319         | machreg_to_gpr(rn) << 5
320         | machreg_to_gpr(rd.to_reg())
321 }
322 
enc_br(rn: Reg) -> u32323 fn enc_br(rn: Reg) -> u32 {
324     0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
325 }
326 
enc_adr(off: i32, rd: Writable<Reg>) -> u32327 fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
328     let off = u32::try_from(off).unwrap();
329     let immlo = off & 3;
330     let immhi = (off >> 2) & ((1 << 19) - 1);
331     (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
332 }
333 
enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32334 fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
335     0b100_11010100_00000_0000_00_00000_00000
336         | (machreg_to_gpr(rm) << 16)
337         | (machreg_to_gpr(rn) << 5)
338         | machreg_to_gpr(rd.to_reg())
339         | (cond.bits() << 12)
340 }
341 
enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32342 fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
343     0b000_11110_00_1_00000_0000_11_00000_00000
344         | (size.ftype() << 22)
345         | (machreg_to_vec(rm) << 16)
346         | (machreg_to_vec(rn) << 5)
347         | machreg_to_vec(rd.to_reg())
348         | (cond.bits() << 12)
349 }
350 
enc_cset(rd: Writable<Reg>, cond: Cond) -> u32351 fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
352     0b100_11010100_11111_0000_01_11111_00000
353         | machreg_to_gpr(rd.to_reg())
354         | (cond.invert().bits() << 12)
355 }
356 
enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32357 fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
358     0b110_11010100_11111_0000_00_11111_00000
359         | machreg_to_gpr(rd.to_reg())
360         | (cond.invert().bits() << 12)
361 }
362 
enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32363 fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
364     0b0_1_1_11010010_00000_0000_10_00000_0_0000
365         | size.sf_bit() << 31
366         | imm.bits() << 16
367         | cond.bits() << 12
368         | machreg_to_gpr(rn) << 5
369         | nzcv.bits()
370 }
371 
enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32372 fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
373     match size {
374         OperandSize::Size64 => {
375             debug_assert!(immr <= 63);
376             debug_assert!(imms <= 63);
377         }
378         OperandSize::Size32 => {
379             debug_assert!(immr <= 31);
380             debug_assert!(imms <= 31);
381         }
382     }
383     debug_assert_eq!(opc & 0b11, opc);
384     let n_bit = size.sf_bit();
385     0b0_00_100110_0_000000_000000_00000_00000
386         | size.sf_bit() << 31
387         | u32::from(opc) << 29
388         | n_bit << 22
389         | u32::from(immr) << 16
390         | u32::from(imms) << 10
391         | machreg_to_gpr(rn) << 5
392         | machreg_to_gpr(rd.to_reg())
393 }
394 
enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32395 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
396     0b00001110_101_00000_00011_1_00000_00000
397         | ((is_16b as u32) << 30)
398         | machreg_to_vec(rd.to_reg())
399         | (machreg_to_vec(rn) << 16)
400         | (machreg_to_vec(rn) << 5)
401 }
402 
enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32403 fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
404     (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
405 }
406 
enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32407 fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
408     (top22 << 10)
409         | (machreg_to_vec(rm) << 16)
410         | (machreg_to_vec(rn) << 5)
411         | machreg_to_vec(rd.to_reg())
412 }
413 
enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32414 fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
415     (top17 << 15)
416         | (machreg_to_vec(rm) << 16)
417         | (machreg_to_vec(ra) << 10)
418         | (machreg_to_vec(rn) << 5)
419         | machreg_to_vec(rd.to_reg())
420 }
421 
enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32422 fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
423     0b000_11110_00_1_00000_00_1000_00000_00000
424         | (size.ftype() << 22)
425         | (machreg_to_vec(rm) << 16)
426         | (machreg_to_vec(rn) << 5)
427 }
428 
enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32429 fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
430     (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
431 }
432 
enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32433 fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
434     (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
435 }
436 
enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32437 fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
438     (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
439 }
440 
enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32441 fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
442     debug_assert_eq!(qu & 0b11, qu);
443     debug_assert_eq!(size & 0b11, size);
444     debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
445     let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
446     bits | qu << 29
447         | size << 22
448         | bits_12_16 << 12
449         | machreg_to_vec(rn) << 5
450         | machreg_to_vec(rd.to_reg())
451 }
452 
enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32453 fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
454     debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
455 
456     0b010_11110_11_11000_11011_10_00000_00000
457         | bits_12_16 << 12
458         | machreg_to_vec(rn) << 5
459         | machreg_to_vec(rd.to_reg())
460 }
461 
enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32462 fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
463     debug_assert_eq!(u & 0b1, u);
464     debug_assert_eq!(enc_size & 0b1, enc_size);
465 
466     0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
467         | u << 29
468         | enc_size << 22
469         | machreg_to_vec(rn) << 5
470         | machreg_to_vec(rd.to_reg())
471 }
472 
enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32473 fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
474     debug_assert_eq!(q & 0b1, q);
475     debug_assert_eq!(u & 0b1, u);
476     debug_assert_eq!(size & 0b11, size);
477     debug_assert_eq!(opcode & 0b11111, opcode);
478     0b0_0_0_01110_00_11000_0_0000_10_00000_00000
479         | q << 30
480         | u << 29
481         | size << 22
482         | opcode << 12
483         | machreg_to_vec(rn) << 5
484         | machreg_to_vec(rd.to_reg())
485 }
486 
enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32487 fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
488     debug_assert_eq!(len & 0b11, len);
489     0b0_1_001110_000_00000_0_00_0_00_00000_00000
490         | (machreg_to_vec(rm) << 16)
491         | len << 13
492         | (is_extension as u32) << 12
493         | (machreg_to_vec(rn) << 5)
494         | machreg_to_vec(rd.to_reg())
495 }
496 
enc_dmb_ish() -> u32497 fn enc_dmb_ish() -> u32 {
498     0xD5033BBF
499 }
500 
enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32501 fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
502     let sz = match ty {
503         I64 => 0b11,
504         I32 => 0b10,
505         I16 => 0b01,
506         I8 => 0b00,
507         _ => unreachable!(),
508     };
509     0b00_001000_1_1_0_11111_1_11111_00000_00000
510         | (sz << 30)
511         | (machreg_to_gpr(rn) << 5)
512         | machreg_to_gpr(rt.to_reg())
513 }
514 
enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32515 fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
516     let sz = match ty {
517         I64 => 0b11,
518         I32 => 0b10,
519         I16 => 0b01,
520         I8 => 0b00,
521         _ => unreachable!(),
522     };
523     0b00_001000_100_11111_1_11111_00000_00000
524         | (sz << 30)
525         | (machreg_to_gpr(rn) << 5)
526         | machreg_to_gpr(rt)
527 }
528 
enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32529 fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
530     let sz = match ty {
531         I64 => 0b11,
532         I32 => 0b10,
533         I16 => 0b01,
534         I8 => 0b00,
535         _ => unreachable!(),
536     };
537     0b00_001000_0_1_0_11111_1_11111_00000_00000
538         | (sz << 30)
539         | (machreg_to_gpr(rn) << 5)
540         | machreg_to_gpr(rt.to_reg())
541 }
542 
enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32543 fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
544     let sz = match ty {
545         I64 => 0b11,
546         I32 => 0b10,
547         I16 => 0b01,
548         I8 => 0b00,
549         _ => unreachable!(),
550     };
551     0b00_001000_000_00000_1_11111_00000_00000
552         | (sz << 30)
553         | (machreg_to_gpr(rs.to_reg()) << 16)
554         | (machreg_to_gpr(rn) << 5)
555         | machreg_to_gpr(rt)
556 }
557 
enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32558 fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
559     debug_assert_eq!(size & 0b11, size);
560 
561     0b00_0010001_1_1_00000_1_11111_00000_00000
562         | size << 30
563         | machreg_to_gpr(rs.to_reg()) << 16
564         | machreg_to_gpr(rn) << 5
565         | machreg_to_gpr(rt)
566 }
567 
enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32568 fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
569     let abc = (imm >> 5) as u32;
570     let defgh = (imm & 0b11111) as u32;
571 
572     debug_assert_eq!(cmode & 0b1111, cmode);
573     debug_assert_eq!(q_op & 0b11, q_op);
574 
575     0b0_0_0_0111100000_000_0000_01_00000_00000
576         | (q_op << 29)
577         | (abc << 16)
578         | (cmode << 12)
579         | (defgh << 5)
580         | machreg_to_vec(rd.to_reg())
581 }
582 
583 /// State carried between emissions of a sequence of instructions.
584 #[derive(Default, Clone, Debug)]
585 pub struct EmitState {
586     /// Addend to convert nominal-SP offsets to real-SP offsets at the current
587     /// program point.
588     pub(crate) virtual_sp_offset: i64,
589     /// Offset of FP from nominal-SP.
590     pub(crate) nominal_sp_to_fp: i64,
591     /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
592     stack_map: Option<StackMap>,
593     /// Current source-code location corresponding to instruction to be emitted.
594     cur_srcloc: SourceLoc,
595 }
596 
597 impl MachInstEmitState<Inst> for EmitState {
new(abi: &dyn ABICallee<I = Inst>) -> Self598     fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
599         EmitState {
600             virtual_sp_offset: 0,
601             nominal_sp_to_fp: abi.frame_size() as i64,
602             stack_map: None,
603             cur_srcloc: SourceLoc::default(),
604         }
605     }
606 
pre_safepoint(&mut self, stack_map: StackMap)607     fn pre_safepoint(&mut self, stack_map: StackMap) {
608         self.stack_map = Some(stack_map);
609     }
610 
pre_sourceloc(&mut self, srcloc: SourceLoc)611     fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
612         self.cur_srcloc = srcloc;
613     }
614 }
615 
616 impl EmitState {
take_stack_map(&mut self) -> Option<StackMap>617     fn take_stack_map(&mut self) -> Option<StackMap> {
618         self.stack_map.take()
619     }
620 
clear_post_insn(&mut self)621     fn clear_post_insn(&mut self) {
622         self.stack_map = None;
623     }
624 
cur_srcloc(&self) -> SourceLoc625     fn cur_srcloc(&self) -> SourceLoc {
626         self.cur_srcloc
627     }
628 }
629 
630 /// Constant state used during function compilation.
631 pub struct EmitInfo(settings::Flags);
632 
633 impl EmitInfo {
new(flags: settings::Flags) -> Self634     pub(crate) fn new(flags: settings::Flags) -> Self {
635         Self(flags)
636     }
637 }
638 
639 impl MachInstEmitInfo for EmitInfo {
flags(&self) -> &settings::Flags640     fn flags(&self) -> &settings::Flags {
641         &self.0
642     }
643 }
644 
645 impl MachInstEmit for Inst {
646     type State = EmitState;
647     type Info = EmitInfo;
648 
emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState)649     fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
650         // N.B.: we *must* not exceed the "worst-case size" used to compute
651         // where to insert islands, except when islands are explicitly triggered
652         // (with an `EmitIsland`). We check this in debug builds. This is `mut`
653         // to allow disabling the check for `JTSequence`, which is always
654         // emitted following an `EmitIsland`.
655         let mut start_off = sink.cur_offset();
656 
657         match self {
658             &Inst::AluRRR { alu_op, rd, rn, rm } => {
659                 let top11 = match alu_op {
660                     ALUOp::Add32 => 0b00001011_000,
661                     ALUOp::Add64 => 0b10001011_000,
662                     ALUOp::Adc32 => 0b00011010_000,
663                     ALUOp::Adc64 => 0b10011010_000,
664                     ALUOp::AdcS32 => 0b00111010_000,
665                     ALUOp::AdcS64 => 0b10111010_000,
666                     ALUOp::Sub32 => 0b01001011_000,
667                     ALUOp::Sub64 => 0b11001011_000,
668                     ALUOp::Sbc32 => 0b01011010_000,
669                     ALUOp::Sbc64 => 0b11011010_000,
670                     ALUOp::SbcS32 => 0b01111010_000,
671                     ALUOp::SbcS64 => 0b11111010_000,
672                     ALUOp::Orr32 => 0b00101010_000,
673                     ALUOp::Orr64 => 0b10101010_000,
674                     ALUOp::And32 => 0b00001010_000,
675                     ALUOp::And64 => 0b10001010_000,
676                     ALUOp::AndS32 => 0b01101010_000,
677                     ALUOp::AndS64 => 0b11101010_000,
678                     ALUOp::Eor32 => 0b01001010_000,
679                     ALUOp::Eor64 => 0b11001010_000,
680                     ALUOp::OrrNot32 => 0b00101010_001,
681                     ALUOp::OrrNot64 => 0b10101010_001,
682                     ALUOp::AndNot32 => 0b00001010_001,
683                     ALUOp::AndNot64 => 0b10001010_001,
684                     ALUOp::EorNot32 => 0b01001010_001,
685                     ALUOp::EorNot64 => 0b11001010_001,
686                     ALUOp::AddS32 => 0b00101011_000,
687                     ALUOp::AddS64 => 0b10101011_000,
688                     ALUOp::SubS32 => 0b01101011_000,
689                     ALUOp::SubS64 => 0b11101011_000,
690                     ALUOp::SDiv64 => 0b10011010_110,
691                     ALUOp::UDiv64 => 0b10011010_110,
692                     ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
693                     ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
694                     ALUOp::SMulH => 0b10011011_010,
695                     ALUOp::UMulH => 0b10011011_110,
696                 };
697                 let bit15_10 = match alu_op {
698                     ALUOp::SDiv64 => 0b000011,
699                     ALUOp::UDiv64 => 0b000010,
700                     ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
701                     ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
702                     ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
703                     ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
704                     ALUOp::SMulH | ALUOp::UMulH => 0b011111,
705                     _ => 0b000000,
706                 };
707                 debug_assert_ne!(writable_stack_reg(), rd);
708                 // The stack pointer is the zero register in this context, so this might be an
709                 // indication that something is wrong.
710                 debug_assert_ne!(stack_reg(), rn);
711                 debug_assert_ne!(stack_reg(), rm);
712                 sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
713             }
714             &Inst::AluRRRR {
715                 alu_op,
716                 rd,
717                 rm,
718                 rn,
719                 ra,
720             } => {
721                 let (top11, bit15) = match alu_op {
722                     ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
723                     ALUOp3::MSub32 => (0b0_00_11011_000, 1),
724                     ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
725                     ALUOp3::MSub64 => (0b1_00_11011_000, 1),
726                 };
727                 sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
728             }
729             &Inst::AluRRImm12 {
730                 alu_op,
731                 rd,
732                 rn,
733                 ref imm12,
734             } => {
735                 let top8 = match alu_op {
736                     ALUOp::Add32 => 0b000_10001,
737                     ALUOp::Add64 => 0b100_10001,
738                     ALUOp::Sub32 => 0b010_10001,
739                     ALUOp::Sub64 => 0b110_10001,
740                     ALUOp::AddS32 => 0b001_10001,
741                     ALUOp::AddS64 => 0b101_10001,
742                     ALUOp::SubS32 => 0b011_10001,
743                     ALUOp::SubS64 => 0b111_10001,
744                     _ => unimplemented!("{:?}", alu_op),
745                 };
746                 sink.put4(enc_arith_rr_imm12(
747                     top8,
748                     imm12.shift_bits(),
749                     imm12.imm_bits(),
750                     rn,
751                     rd,
752                 ));
753             }
754             &Inst::AluRRImmLogic {
755                 alu_op,
756                 rd,
757                 rn,
758                 ref imml,
759             } => {
760                 let (top9, inv) = match alu_op {
761                     ALUOp::Orr32 => (0b001_100100, false),
762                     ALUOp::Orr64 => (0b101_100100, false),
763                     ALUOp::And32 => (0b000_100100, false),
764                     ALUOp::And64 => (0b100_100100, false),
765                     ALUOp::AndS32 => (0b011_100100, false),
766                     ALUOp::AndS64 => (0b111_100100, false),
767                     ALUOp::Eor32 => (0b010_100100, false),
768                     ALUOp::Eor64 => (0b110_100100, false),
769                     ALUOp::OrrNot32 => (0b001_100100, true),
770                     ALUOp::OrrNot64 => (0b101_100100, true),
771                     ALUOp::AndNot32 => (0b000_100100, true),
772                     ALUOp::AndNot64 => (0b100_100100, true),
773                     ALUOp::EorNot32 => (0b010_100100, true),
774                     ALUOp::EorNot64 => (0b110_100100, true),
775                     _ => unimplemented!("{:?}", alu_op),
776                 };
777                 let imml = if inv { imml.invert() } else { imml.clone() };
778                 sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
779             }
780 
781             &Inst::AluRRImmShift {
782                 alu_op,
783                 rd,
784                 rn,
785                 ref immshift,
786             } => {
787                 let amt = immshift.value();
788                 let (top10, immr, imms) = match alu_op {
789                     ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
790                     ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
791                     ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
792                     ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
793                     ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
794                     ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
795                     ALUOp::Lsl32 => (
796                         0b0101001100,
797                         u32::from((32 - amt) % 32),
798                         u32::from(31 - amt),
799                     ),
800                     ALUOp::Lsl64 => (
801                         0b1101001101,
802                         u32::from((64 - amt) % 64),
803                         u32::from(63 - amt),
804                     ),
805                     _ => unimplemented!("{:?}", alu_op),
806                 };
807                 sink.put4(
808                     (top10 << 22)
809                         | (immr << 16)
810                         | (imms << 10)
811                         | (machreg_to_gpr(rn) << 5)
812                         | machreg_to_gpr(rd.to_reg()),
813                 );
814             }
815 
816             &Inst::AluRRRShift {
817                 alu_op,
818                 rd,
819                 rn,
820                 rm,
821                 ref shiftop,
822             } => {
823                 let top11: u32 = match alu_op {
824                     ALUOp::Add32 => 0b000_01011000,
825                     ALUOp::Add64 => 0b100_01011000,
826                     ALUOp::AddS32 => 0b001_01011000,
827                     ALUOp::AddS64 => 0b101_01011000,
828                     ALUOp::Sub32 => 0b010_01011000,
829                     ALUOp::Sub64 => 0b110_01011000,
830                     ALUOp::SubS32 => 0b011_01011000,
831                     ALUOp::SubS64 => 0b111_01011000,
832                     ALUOp::Orr32 => 0b001_01010000,
833                     ALUOp::Orr64 => 0b101_01010000,
834                     ALUOp::And32 => 0b000_01010000,
835                     ALUOp::And64 => 0b100_01010000,
836                     ALUOp::AndS32 => 0b011_01010000,
837                     ALUOp::AndS64 => 0b111_01010000,
838                     ALUOp::Eor32 => 0b010_01010000,
839                     ALUOp::Eor64 => 0b110_01010000,
840                     ALUOp::OrrNot32 => 0b001_01010001,
841                     ALUOp::OrrNot64 => 0b101_01010001,
842                     ALUOp::EorNot32 => 0b010_01010001,
843                     ALUOp::EorNot64 => 0b110_01010001,
844                     ALUOp::AndNot32 => 0b000_01010001,
845                     ALUOp::AndNot64 => 0b100_01010001,
846                     _ => unimplemented!("{:?}", alu_op),
847                 };
848                 let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
849                 let bits_15_10 = u32::from(shiftop.amt().value());
850                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
851             }
852 
853             &Inst::AluRRRExtend {
854                 alu_op,
855                 rd,
856                 rn,
857                 rm,
858                 extendop,
859             } => {
860                 let top11: u32 = match alu_op {
861                     ALUOp::Add32 => 0b00001011001,
862                     ALUOp::Add64 => 0b10001011001,
863                     ALUOp::Sub32 => 0b01001011001,
864                     ALUOp::Sub64 => 0b11001011001,
865                     ALUOp::AddS32 => 0b00101011001,
866                     ALUOp::AddS64 => 0b10101011001,
867                     ALUOp::SubS32 => 0b01101011001,
868                     ALUOp::SubS64 => 0b11101011001,
869                     _ => unimplemented!("{:?}", alu_op),
870                 };
871                 let bits_15_10 = u32::from(extendop.bits()) << 3;
872                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
873             }
874 
875             &Inst::BitRR { op, rd, rn, .. } => {
876                 let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
877                 let (op1, op2) = match op {
878                     BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
879                     BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
880                     BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
881                 };
882                 sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
883             }
884 
885             &Inst::ULoad8 { rd, ref mem, flags }
886             | &Inst::SLoad8 { rd, ref mem, flags }
887             | &Inst::ULoad16 { rd, ref mem, flags }
888             | &Inst::SLoad16 { rd, ref mem, flags }
889             | &Inst::ULoad32 { rd, ref mem, flags }
890             | &Inst::SLoad32 { rd, ref mem, flags }
891             | &Inst::ULoad64 {
892                 rd, ref mem, flags, ..
893             }
894             | &Inst::FpuLoad32 { rd, ref mem, flags }
895             | &Inst::FpuLoad64 { rd, ref mem, flags }
896             | &Inst::FpuLoad128 { rd, ref mem, flags } => {
897                 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
898 
899                 for inst in mem_insts.into_iter() {
900                     inst.emit(sink, emit_info, state);
901                 }
902 
903                 // ldst encoding helpers take Reg, not Writable<Reg>.
904                 let rd = rd.to_reg();
905 
906                 // This is the base opcode (top 10 bits) for the "unscaled
907                 // immediate" form (Unscaled). Other addressing modes will OR in
908                 // other values for bits 24/25 (bits 1/2 of this constant).
909                 let (op, bits) = match self {
910                     &Inst::ULoad8 { .. } => (0b0011100001, 8),
911                     &Inst::SLoad8 { .. } => (0b0011100010, 8),
912                     &Inst::ULoad16 { .. } => (0b0111100001, 16),
913                     &Inst::SLoad16 { .. } => (0b0111100010, 16),
914                     &Inst::ULoad32 { .. } => (0b1011100001, 32),
915                     &Inst::SLoad32 { .. } => (0b1011100010, 32),
916                     &Inst::ULoad64 { .. } => (0b1111100001, 64),
917                     &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
918                     &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
919                     &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
920                     _ => unreachable!(),
921                 };
922 
923                 let srcloc = state.cur_srcloc();
924                 if srcloc != SourceLoc::default() && !flags.notrap() {
925                     // Register the offset at which the actual load instruction starts.
926                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
927                 }
928 
929                 match &mem {
930                     &AMode::Unscaled(reg, simm9) => {
931                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
932                     }
933                     &AMode::UnsignedOffset(reg, uimm12scaled) => {
934                         if uimm12scaled.value() != 0 {
935                             assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
936                         }
937                         sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
938                     }
939                     &AMode::RegReg(r1, r2) => {
940                         sink.put4(enc_ldst_reg(
941                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
942                         ));
943                     }
944                     &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
945                         assert_eq!(bits, ty_bits(ty));
946                         let extendop = match &mem {
947                             &AMode::RegScaled(..) => None,
948                             &AMode::RegScaledExtended(_, _, _, op) => Some(op),
949                             _ => unreachable!(),
950                         };
951                         sink.put4(enc_ldst_reg(
952                             op, r1, r2, /* scaled = */ true, extendop, rd,
953                         ));
954                     }
955                     &AMode::RegExtended(r1, r2, extendop) => {
956                         sink.put4(enc_ldst_reg(
957                             op,
958                             r1,
959                             r2,
960                             /* scaled = */ false,
961                             Some(extendop),
962                             rd,
963                         ));
964                     }
965                     &AMode::Label(ref label) => {
966                         let offset = match label {
967                             // cast i32 to u32 (two's-complement)
968                             &MemLabel::PCRel(off) => off as u32,
969                         } / 4;
970                         assert!(offset < (1 << 19));
971                         match self {
972                             &Inst::ULoad32 { .. } => {
973                                 sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
974                             }
975                             &Inst::SLoad32 { .. } => {
976                                 sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
977                             }
978                             &Inst::FpuLoad32 { .. } => {
979                                 sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
980                             }
981                             &Inst::ULoad64 { .. } => {
982                                 sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
983                             }
984                             &Inst::FpuLoad64 { .. } => {
985                                 sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
986                             }
987                             &Inst::FpuLoad128 { .. } => {
988                                 sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
989                             }
990                             _ => panic!("Unspported size for LDR from constant pool!"),
991                         }
992                     }
993                     &AMode::PreIndexed(reg, simm9) => {
994                         sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
995                     }
996                     &AMode::PostIndexed(reg, simm9) => {
997                         sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
998                     }
999                     // Eliminated by `mem_finalize()` above.
1000                     &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
1001                         panic!("Should not see stack-offset here!")
1002                     }
1003                     &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
1004                 }
1005             }
1006 
1007             &Inst::Store8 { rd, ref mem, flags }
1008             | &Inst::Store16 { rd, ref mem, flags }
1009             | &Inst::Store32 { rd, ref mem, flags }
1010             | &Inst::Store64 { rd, ref mem, flags }
1011             | &Inst::FpuStore32 { rd, ref mem, flags }
1012             | &Inst::FpuStore64 { rd, ref mem, flags }
1013             | &Inst::FpuStore128 { rd, ref mem, flags } => {
1014                 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
1015 
1016                 for inst in mem_insts.into_iter() {
1017                     inst.emit(sink, emit_info, state);
1018                 }
1019 
1020                 let (op, bits) = match self {
1021                     &Inst::Store8 { .. } => (0b0011100000, 8),
1022                     &Inst::Store16 { .. } => (0b0111100000, 16),
1023                     &Inst::Store32 { .. } => (0b1011100000, 32),
1024                     &Inst::Store64 { .. } => (0b1111100000, 64),
1025                     &Inst::FpuStore32 { .. } => (0b1011110000, 32),
1026                     &Inst::FpuStore64 { .. } => (0b1111110000, 64),
1027                     &Inst::FpuStore128 { .. } => (0b0011110010, 128),
1028                     _ => unreachable!(),
1029                 };
1030 
1031                 let srcloc = state.cur_srcloc();
1032                 if srcloc != SourceLoc::default() && !flags.notrap() {
1033                     // Register the offset at which the actual store instruction starts.
1034                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1035                 }
1036 
1037                 match &mem {
1038                     &AMode::Unscaled(reg, simm9) => {
1039                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1040                     }
1041                     &AMode::UnsignedOffset(reg, uimm12scaled) => {
1042                         if uimm12scaled.value() != 0 {
1043                             assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
1044                         }
1045                         sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
1046                     }
1047                     &AMode::RegReg(r1, r2) => {
1048                         sink.put4(enc_ldst_reg(
1049                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1050                         ));
1051                     }
1052                     &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
1053                         let extendop = match &mem {
1054                             &AMode::RegScaled(..) => None,
1055                             &AMode::RegScaledExtended(_, _, _, op) => Some(op),
1056                             _ => unreachable!(),
1057                         };
1058                         sink.put4(enc_ldst_reg(
1059                             op, r1, r2, /* scaled = */ true, extendop, rd,
1060                         ));
1061                     }
1062                     &AMode::RegExtended(r1, r2, extendop) => {
1063                         sink.put4(enc_ldst_reg(
1064                             op,
1065                             r1,
1066                             r2,
1067                             /* scaled = */ false,
1068                             Some(extendop),
1069                             rd,
1070                         ));
1071                     }
1072                     &AMode::Label(..) => {
1073                         panic!("Store to a MemLabel not implemented!");
1074                     }
1075                     &AMode::PreIndexed(reg, simm9) => {
1076                         sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
1077                     }
1078                     &AMode::PostIndexed(reg, simm9) => {
1079                         sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
1080                     }
1081                     // Eliminated by `mem_finalize()` above.
1082                     &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
1083                         panic!("Should not see stack-offset here!")
1084                     }
1085                     &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
1086                 }
1087             }
1088 
1089             &Inst::StoreP64 {
1090                 rt,
1091                 rt2,
1092                 ref mem,
1093                 flags,
1094             } => {
1095                 let srcloc = state.cur_srcloc();
1096                 if srcloc != SourceLoc::default() && !flags.notrap() {
1097                     // Register the offset at which the actual store instruction starts.
1098                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1099                 }
1100                 match mem {
1101                     &PairAMode::SignedOffset(reg, simm7) => {
1102                         assert_eq!(simm7.scale_ty, I64);
1103                         sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1104                     }
1105                     &PairAMode::PreIndexed(reg, simm7) => {
1106                         assert_eq!(simm7.scale_ty, I64);
1107                         sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
1108                     }
1109                     &PairAMode::PostIndexed(reg, simm7) => {
1110                         assert_eq!(simm7.scale_ty, I64);
1111                         sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
1112                     }
1113                 }
1114             }
1115             &Inst::LoadP64 {
1116                 rt,
1117                 rt2,
1118                 ref mem,
1119                 flags,
1120             } => {
1121                 let srcloc = state.cur_srcloc();
1122                 if srcloc != SourceLoc::default() && !flags.notrap() {
1123                     // Register the offset at which the actual load instruction starts.
1124                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1125                 }
1126 
1127                 let rt = rt.to_reg();
1128                 let rt2 = rt2.to_reg();
1129                 match mem {
1130                     &PairAMode::SignedOffset(reg, simm7) => {
1131                         assert_eq!(simm7.scale_ty, I64);
1132                         sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1133                     }
1134                     &PairAMode::PreIndexed(reg, simm7) => {
1135                         assert_eq!(simm7.scale_ty, I64);
1136                         sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
1137                     }
1138                     &PairAMode::PostIndexed(reg, simm7) => {
1139                         assert_eq!(simm7.scale_ty, I64);
1140                         sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
1141                     }
1142                 }
1143             }
1144             &Inst::FpuLoadP64 {
1145                 rt,
1146                 rt2,
1147                 ref mem,
1148                 flags,
1149             }
1150             | &Inst::FpuLoadP128 {
1151                 rt,
1152                 rt2,
1153                 ref mem,
1154                 flags,
1155             } => {
1156                 let srcloc = state.cur_srcloc();
1157 
1158                 if srcloc != SourceLoc::default() && !flags.notrap() {
1159                     // Register the offset at which the actual load instruction starts.
1160                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1161                 }
1162 
1163                 let opc = match self {
1164                     &Inst::FpuLoadP64 { .. } => 0b01,
1165                     &Inst::FpuLoadP128 { .. } => 0b10,
1166                     _ => unreachable!(),
1167                 };
1168                 let rt = rt.to_reg();
1169                 let rt2 = rt2.to_reg();
1170 
1171                 match mem {
1172                     &PairAMode::SignedOffset(reg, simm7) => {
1173                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1174                         sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1175                     }
1176                     &PairAMode::PreIndexed(reg, simm7) => {
1177                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1178                         sink.put4(enc_ldst_vec_pair(
1179                             opc,
1180                             0b11,
1181                             true,
1182                             simm7,
1183                             reg.to_reg(),
1184                             rt,
1185                             rt2,
1186                         ));
1187                     }
1188                     &PairAMode::PostIndexed(reg, simm7) => {
1189                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1190                         sink.put4(enc_ldst_vec_pair(
1191                             opc,
1192                             0b01,
1193                             true,
1194                             simm7,
1195                             reg.to_reg(),
1196                             rt,
1197                             rt2,
1198                         ));
1199                     }
1200                 }
1201             }
1202             &Inst::FpuStoreP64 {
1203                 rt,
1204                 rt2,
1205                 ref mem,
1206                 flags,
1207             }
1208             | &Inst::FpuStoreP128 {
1209                 rt,
1210                 rt2,
1211                 ref mem,
1212                 flags,
1213             } => {
1214                 let srcloc = state.cur_srcloc();
1215 
1216                 if srcloc != SourceLoc::default() && !flags.notrap() {
1217                     // Register the offset at which the actual store instruction starts.
1218                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1219                 }
1220 
1221                 let opc = match self {
1222                     &Inst::FpuStoreP64 { .. } => 0b01,
1223                     &Inst::FpuStoreP128 { .. } => 0b10,
1224                     _ => unreachable!(),
1225                 };
1226 
1227                 match mem {
1228                     &PairAMode::SignedOffset(reg, simm7) => {
1229                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1230                         sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1231                     }
1232                     &PairAMode::PreIndexed(reg, simm7) => {
1233                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1234                         sink.put4(enc_ldst_vec_pair(
1235                             opc,
1236                             0b11,
1237                             false,
1238                             simm7,
1239                             reg.to_reg(),
1240                             rt,
1241                             rt2,
1242                         ));
1243                     }
1244                     &PairAMode::PostIndexed(reg, simm7) => {
1245                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1246                         sink.put4(enc_ldst_vec_pair(
1247                             opc,
1248                             0b01,
1249                             false,
1250                             simm7,
1251                             reg.to_reg(),
1252                             rt,
1253                             rt2,
1254                         ));
1255                     }
1256                 }
1257             }
1258             &Inst::Mov64 { rd, rm } => {
1259                 assert!(rd.to_reg().get_class() == rm.get_class());
1260                 assert!(rm.get_class() == RegClass::I64);
1261 
1262                 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1263                 // should never MOV to XZR.
1264                 assert!(rd.to_reg() != stack_reg());
1265 
1266                 if rm == stack_reg() {
1267                     // We can't use ORR here, so use an `add rd, sp, #0` instead.
1268                     let imm12 = Imm12::maybe_from_u64(0).unwrap();
1269                     sink.put4(enc_arith_rr_imm12(
1270                         0b100_10001,
1271                         imm12.shift_bits(),
1272                         imm12.imm_bits(),
1273                         rm,
1274                         rd,
1275                     ));
1276                 } else {
1277                     // Encoded as ORR rd, rm, zero.
1278                     sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1279                 }
1280             }
1281             &Inst::Mov32 { rd, rm } => {
1282                 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1283                 // should never MOV to XZR.
1284                 assert!(machreg_to_gpr(rd.to_reg()) != 31);
1285                 // Encoded as ORR rd, rm, zero.
1286                 sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1287             }
1288             &Inst::MovZ { rd, imm, size } => {
1289                 sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
1290             }
1291             &Inst::MovN { rd, imm, size } => {
1292                 sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
1293             }
1294             &Inst::MovK { rd, imm, size } => {
1295                 sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
1296             }
1297             &Inst::CSel { rd, rn, rm, cond } => {
1298                 sink.put4(enc_csel(rd, rn, rm, cond));
1299             }
1300             &Inst::CSet { rd, cond } => {
1301                 sink.put4(enc_cset(rd, cond));
1302             }
1303             &Inst::CSetm { rd, cond } => {
1304                 sink.put4(enc_csetm(rd, cond));
1305             }
1306             &Inst::CCmpImm {
1307                 size,
1308                 rn,
1309                 imm,
1310                 nzcv,
1311                 cond,
1312             } => {
1313                 sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1314             }
1315             &Inst::AtomicRMW { ty, op } => {
1316                 /* Emit this:
1317                      again:
1318                       ldaxr{,b,h}  x/w27, [x25]
1319                       op          x28, x27, x26 // op is add,sub,and,orr,eor
1320                       stlxr{,b,h}  w24, x/w28, [x25]
1321                       cbnz        x24, again
1322 
1323                    Operand conventions:
1324                       IN:  x25 (addr), x26 (2nd arg for op)
1325                       OUT: x27 (old value), x24 (trashed), x28 (trashed)
1326 
1327                    It is unfortunate that, per the ARM documentation, x28 cannot be used for
1328                    both the store-data and success-flag operands of stlxr.  This causes the
1329                    instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1330                    instead for the success-flag.
1331 
1332                    In the case where the operation is 'xchg', the second insn is instead
1333                      mov          x28, x26
1334                    so that we simply write in the destination, the "2nd arg for op".
1335                 */
1336                 // TODO: We should not hardcode registers here, a better idea would be to
1337                 // pass some scratch registers in the AtomicRMW pseudo-instruction, and use those
1338                 let xzr = zero_reg();
1339                 let x24 = xreg(24);
1340                 let x25 = xreg(25);
1341                 let x26 = xreg(26);
1342                 let x27 = xreg(27);
1343                 let x28 = xreg(28);
1344                 let x24wr = writable_xreg(24);
1345                 let x27wr = writable_xreg(27);
1346                 let x28wr = writable_xreg(28);
1347                 let again_label = sink.get_label();
1348 
1349                 // again:
1350                 sink.bind_label(again_label);
1351                 let srcloc = state.cur_srcloc();
1352                 if srcloc != SourceLoc::default() {
1353                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1354                 }
1355                 sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1356 
1357                 match op {
1358                     AtomicRmwOp::Xchg => {
1359                         // mov x28, x26
1360                         Inst::Mov64 { rd: x28wr, rm: x26 }.emit(sink, emit_info, state);
1361                     }
1362                     AtomicRmwOp::Nand => {
1363                         // and x28, x27, x26
1364                         // mvn x28, x28
1365 
1366                         Inst::AluRRR {
1367                             alu_op: ALUOp::And64,
1368                             rd: x28wr,
1369                             rn: x27,
1370                             rm: x26,
1371                         }
1372                         .emit(sink, emit_info, state);
1373 
1374                         Inst::AluRRR {
1375                             alu_op: ALUOp::OrrNot64,
1376                             rd: x28wr,
1377                             rn: xzr,
1378                             rm: x28,
1379                         }
1380                         .emit(sink, emit_info, state);
1381                     }
1382                     AtomicRmwOp::Umin
1383                     | AtomicRmwOp::Umax
1384                     | AtomicRmwOp::Smin
1385                     | AtomicRmwOp::Smax => {
1386                         // cmp x27, x26
1387                         // csel.op x28, x27, x26
1388 
1389                         let cond = match op {
1390                             AtomicRmwOp::Umin => Cond::Lo,
1391                             AtomicRmwOp::Umax => Cond::Hi,
1392                             AtomicRmwOp::Smin => Cond::Lt,
1393                             AtomicRmwOp::Smax => Cond::Gt,
1394                             _ => unreachable!(),
1395                         };
1396 
1397                         Inst::AluRRR {
1398                             alu_op: if ty == I64 {
1399                                 ALUOp::SubS64
1400                             } else {
1401                                 ALUOp::SubS32
1402                             },
1403                             rd: writable_zero_reg(),
1404                             rn: x27,
1405                             rm: x26,
1406                         }
1407                         .emit(sink, emit_info, state);
1408 
1409                         Inst::CSel {
1410                             cond,
1411                             rd: x28wr,
1412                             rn: x27,
1413                             rm: x26,
1414                         }
1415                         .emit(sink, emit_info, state);
1416                     }
1417                     _ => {
1418                         // add/sub/and/orr/eor x28, x27, x26
1419                         let alu_op = match op {
1420                             AtomicRmwOp::Add => ALUOp::Add64,
1421                             AtomicRmwOp::Sub => ALUOp::Sub64,
1422                             AtomicRmwOp::And => ALUOp::And64,
1423                             AtomicRmwOp::Or => ALUOp::Orr64,
1424                             AtomicRmwOp::Xor => ALUOp::Eor64,
1425                             AtomicRmwOp::Nand
1426                             | AtomicRmwOp::Umin
1427                             | AtomicRmwOp::Umax
1428                             | AtomicRmwOp::Smin
1429                             | AtomicRmwOp::Smax
1430                             | AtomicRmwOp::Xchg => unreachable!(),
1431                         };
1432 
1433                         Inst::AluRRR {
1434                             alu_op,
1435                             rd: x28wr,
1436                             rn: x27,
1437                             rm: x26,
1438                         }
1439                         .emit(sink, emit_info, state);
1440                     }
1441                 }
1442 
1443                 let srcloc = state.cur_srcloc();
1444                 if srcloc != SourceLoc::default() {
1445                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1446                 }
1447                 sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1448 
1449                 // cbnz w24, again
1450                 // Note, we're actually testing x24, and relying on the default zero-high-half
1451                 // rule in the assignment that `stlxr` does.
1452                 let br_offset = sink.cur_offset();
1453                 sink.put4(enc_conditional_br(
1454                     BranchTarget::Label(again_label),
1455                     CondBrKind::NotZero(x24),
1456                 ));
1457                 sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1458             }
1459             &Inst::AtomicCAS { rs, rt, rn, ty } => {
1460                 let size = match ty {
1461                     I8 => 0b00,
1462                     I16 => 0b01,
1463                     I32 => 0b10,
1464                     I64 => 0b11,
1465                     _ => panic!("Unsupported type: {}", ty),
1466                 };
1467 
1468                 sink.put4(enc_cas(size, rs, rt, rn));
1469             }
1470             &Inst::AtomicCASLoop { ty } => {
1471                 /* Emit this:
1472                     again:
1473                      ldaxr{,b,h} x/w27, [x25]
1474                      cmp         x27, x/w26 uxt{b,h}
1475                      b.ne        out
1476                      stlxr{,b,h} w24, x/w28, [x25]
1477                      cbnz        x24, again
1478                     out:
1479 
1480                   Operand conventions:
1481                      IN:  x25 (addr), x26 (expected value), x28 (replacement value)
1482                      OUT: x27 (old value), x24 (trashed)
1483                 */
1484                 let x24 = xreg(24);
1485                 let x25 = xreg(25);
1486                 let x26 = xreg(26);
1487                 let x27 = xreg(27);
1488                 let x28 = xreg(28);
1489                 let xzrwr = writable_zero_reg();
1490                 let x24wr = writable_xreg(24);
1491                 let x27wr = writable_xreg(27);
1492                 let again_label = sink.get_label();
1493                 let out_label = sink.get_label();
1494 
1495                 // again:
1496                 sink.bind_label(again_label);
1497                 let srcloc = state.cur_srcloc();
1498                 if srcloc != SourceLoc::default() {
1499                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1500                 }
1501                 // ldaxr x27, [x25]
1502                 sink.put4(enc_ldaxr(ty, x27wr, x25));
1503 
1504                 // The top 32-bits are zero-extended by the ldaxr so we don't
1505                 // have to use UXTW, just the x-form of the register.
1506                 let (bit21, extend_op) = match ty {
1507                     I8 => (0b1, 0b000000),
1508                     I16 => (0b1, 0b001000),
1509                     _ => (0b0, 0b000000),
1510                 };
1511                 let bits_31_21 = 0b111_01011_000 | bit21;
1512                 // cmp x27, x26 (== subs xzr, x27, x26)
1513                 sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1514 
1515                 // b.ne out
1516                 let br_out_offset = sink.cur_offset();
1517                 sink.put4(enc_conditional_br(
1518                     BranchTarget::Label(out_label),
1519                     CondBrKind::Cond(Cond::Ne),
1520                 ));
1521                 sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1522 
1523                 let srcloc = state.cur_srcloc();
1524                 if srcloc != SourceLoc::default() {
1525                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1526                 }
1527                 sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1528 
1529                 // cbnz w24, again.
1530                 // Note, we're actually testing x24, and relying on the default zero-high-half
1531                 // rule in the assignment that `stlxr` does.
1532                 let br_again_offset = sink.cur_offset();
1533                 sink.put4(enc_conditional_br(
1534                     BranchTarget::Label(again_label),
1535                     CondBrKind::NotZero(x24),
1536                 ));
1537                 sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1538 
1539                 // out:
1540                 sink.bind_label(out_label);
1541             }
1542             &Inst::LoadAcquire { access_ty, rt, rn } => {
1543                 sink.put4(enc_ldar(access_ty, rt, rn));
1544             }
1545             &Inst::StoreRelease { access_ty, rt, rn } => {
1546                 sink.put4(enc_stlr(access_ty, rt, rn));
1547             }
1548             &Inst::Fence {} => {
1549                 sink.put4(enc_dmb_ish()); // dmb ish
1550             }
1551             &Inst::FpuMove64 { rd, rn } => {
1552                 sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1553             }
1554             &Inst::FpuMove128 { rd, rn } => {
1555                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1556             }
1557             &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1558                 let (imm5, shift, mask) = match size.lane_size() {
1559                     ScalarSize::Size32 => (0b00100, 3, 0b011),
1560                     ScalarSize::Size64 => (0b01000, 4, 0b001),
1561                     _ => unimplemented!(),
1562                 };
1563                 debug_assert_eq!(idx & mask, idx);
1564                 let imm5 = imm5 | ((idx as u32) << shift);
1565                 sink.put4(
1566                     0b010_11110000_00000_000001_00000_00000
1567                         | (imm5 << 16)
1568                         | (machreg_to_vec(rn) << 5)
1569                         | machreg_to_vec(rd.to_reg()),
1570                 );
1571             }
1572             &Inst::FpuExtend { rd, rn, size } => {
1573                 sink.put4(enc_fpurr(
1574                     0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
1575                     rd,
1576                     rn,
1577                 ));
1578             }
1579             &Inst::FpuRR { fpu_op, rd, rn } => {
1580                 let top22 = match fpu_op {
1581                     FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
1582                     FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
1583                     FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
1584                     FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
1585                     FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
1586                     FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
1587                     FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
1588                     FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
1589                 };
1590                 sink.put4(enc_fpurr(top22, rd, rn));
1591             }
1592             &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
1593                 let top22 = match fpu_op {
1594                     FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
1595                     FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
1596                     FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
1597                     FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
1598                     FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
1599                     FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
1600                     FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
1601                     FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
1602                     FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
1603                     FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
1604                     FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
1605                     FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
1606                     FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
1607                     FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
1608                     FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
1609                     FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
1610                 };
1611                 sink.put4(enc_fpurrr(top22, rd, rn, rm));
1612             }
1613             &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1614                 FPUOpRI::UShr32(imm) => {
1615                     debug_assert_eq!(32, imm.lane_size_in_bits);
1616                     sink.put4(
1617                         0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1618                             | imm.enc() << 16
1619                             | machreg_to_vec(rn) << 5
1620                             | machreg_to_vec(rd.to_reg()),
1621                     )
1622                 }
1623                 FPUOpRI::UShr64(imm) => {
1624                     debug_assert_eq!(64, imm.lane_size_in_bits);
1625                     sink.put4(
1626                         0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1627                             | imm.enc() << 16
1628                             | machreg_to_vec(rn) << 5
1629                             | machreg_to_vec(rd.to_reg()),
1630                     )
1631                 }
1632                 FPUOpRI::Sli64(imm) => {
1633                     debug_assert_eq!(64, imm.lane_size_in_bits);
1634                     sink.put4(
1635                         0b01_1_111110_0000000_010101_00000_00000
1636                             | imm.enc() << 16
1637                             | machreg_to_vec(rn) << 5
1638                             | machreg_to_vec(rd.to_reg()),
1639                     )
1640                 }
1641                 FPUOpRI::Sli32(imm) => {
1642                     debug_assert_eq!(32, imm.lane_size_in_bits);
1643                     sink.put4(
1644                         0b0_0_1_011110_0000000_010101_00000_00000
1645                             | imm.enc() << 16
1646                             | machreg_to_vec(rn) << 5
1647                             | machreg_to_vec(rd.to_reg()),
1648                     )
1649                 }
1650             },
1651             &Inst::FpuRRRR {
1652                 fpu_op,
1653                 rd,
1654                 rn,
1655                 rm,
1656                 ra,
1657             } => {
1658                 let top17 = match fpu_op {
1659                     FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
1660                     FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
1661                 };
1662                 sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1663             }
1664             &Inst::VecMisc { op, rd, rn, size } => {
1665                 let (q, enc_size) = size.enc_size();
1666                 let (u, bits_12_16, size) = match op {
1667                     VecMisc2::Not => (0b1, 0b00101, 0b00),
1668                     VecMisc2::Neg => (0b1, 0b01011, enc_size),
1669                     VecMisc2::Abs => (0b0, 0b01011, enc_size),
1670                     VecMisc2::Fabs => {
1671                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1672                         (0b0, 0b01111, enc_size)
1673                     }
1674                     VecMisc2::Fneg => {
1675                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1676                         (0b1, 0b01111, enc_size)
1677                     }
1678                     VecMisc2::Fsqrt => {
1679                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1680                         (0b1, 0b11111, enc_size)
1681                     }
1682                     VecMisc2::Rev64 => {
1683                         debug_assert_ne!(VectorSize::Size64x2, size);
1684                         (0b0, 0b00000, enc_size)
1685                     }
1686                     VecMisc2::Fcvtzs => {
1687                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1688                         (0b0, 0b11011, enc_size)
1689                     }
1690                     VecMisc2::Fcvtzu => {
1691                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1692                         (0b1, 0b11011, enc_size)
1693                     }
1694                     VecMisc2::Scvtf => {
1695                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1696                         (0b0, 0b11101, enc_size & 0b1)
1697                     }
1698                     VecMisc2::Ucvtf => {
1699                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1700                         (0b1, 0b11101, enc_size & 0b1)
1701                     }
1702                     VecMisc2::Frintn => {
1703                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1704                         (0b0, 0b11000, enc_size & 0b01)
1705                     }
1706                     VecMisc2::Frintz => {
1707                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1708                         (0b0, 0b11001, enc_size | 0b10)
1709                     }
1710                     VecMisc2::Frintm => {
1711                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1712                         (0b0, 0b11001, enc_size & 0b01)
1713                     }
1714                     VecMisc2::Frintp => {
1715                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1716                         (0b0, 0b11000, enc_size | 0b10)
1717                     }
1718                     VecMisc2::Cnt => {
1719                         debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1720                         (0b0, 0b00101, enc_size)
1721                     }
1722                     VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
1723                 };
1724                 sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
1725             }
1726             &Inst::VecLanes { op, rd, rn, size } => {
1727                 let (q, size) = match size {
1728                     VectorSize::Size8x8 => (0b0, 0b00),
1729                     VectorSize::Size8x16 => (0b1, 0b00),
1730                     VectorSize::Size16x4 => (0b0, 0b01),
1731                     VectorSize::Size16x8 => (0b1, 0b01),
1732                     VectorSize::Size32x4 => (0b1, 0b10),
1733                     _ => unreachable!(),
1734                 };
1735                 let (u, opcode) = match op {
1736                     VecLanesOp::Uminv => (0b1, 0b11010),
1737                     VecLanesOp::Addv => (0b0, 0b11011),
1738                 };
1739                 sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
1740             }
1741             &Inst::VecShiftImm {
1742                 op,
1743                 rd,
1744                 rn,
1745                 size,
1746                 imm,
1747             } => {
1748                 let (is_shr, template) = match op {
1749                     VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
1750                     VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
1751                     VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
1752                 };
1753                 let imm = imm as u32;
1754                 // Deal with the somewhat strange encoding scheme for, and limits on,
1755                 // the shift amount.
1756                 let immh_immb = match (size, is_shr) {
1757                     (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
1758                         0b_1000_000_u32 | (64 - imm)
1759                     }
1760                     (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
1761                         0b_0100_000_u32 | (32 - imm)
1762                     }
1763                     (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
1764                         0b_0010_000_u32 | (16 - imm)
1765                     }
1766                     (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
1767                         0b_0001_000_u32 | (8 - imm)
1768                     }
1769                     (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
1770                     (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
1771                     (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
1772                     (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
1773                     _ => panic!(
1774                         "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
1775                         op, size, imm
1776                     ),
1777                 };
1778                 let rn_enc = machreg_to_vec(rn);
1779                 let rd_enc = machreg_to_vec(rd.to_reg());
1780                 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
1781             }
1782             &Inst::VecExtract { rd, rn, rm, imm4 } => {
1783                 if imm4 < 16 {
1784                     let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
1785                     let rm_enc = machreg_to_vec(rm);
1786                     let rn_enc = machreg_to_vec(rn);
1787                     let rd_enc = machreg_to_vec(rd.to_reg());
1788                     sink.put4(
1789                         template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
1790                     );
1791                 } else {
1792                     panic!(
1793                         "aarch64: Inst::VecExtract: emit: invalid extract index {}",
1794                         imm4
1795                     );
1796                 }
1797             }
1798             &Inst::VecTbl {
1799                 rd,
1800                 rn,
1801                 rm,
1802                 is_extension,
1803             } => {
1804                 sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
1805             }
1806             &Inst::VecTbl2 {
1807                 rd,
1808                 rn,
1809                 rn2,
1810                 rm,
1811                 is_extension,
1812             } => {
1813                 assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
1814                 sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
1815             }
1816             &Inst::FpuCmp32 { rn, rm } => {
1817                 sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
1818             }
1819             &Inst::FpuCmp64 { rn, rm } => {
1820                 sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
1821             }
1822             &Inst::FpuToInt { op, rd, rn } => {
1823                 let top16 = match op {
1824                     // FCVTZS (32/32-bit)
1825                     FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
1826                     // FCVTZU (32/32-bit)
1827                     FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
1828                     // FCVTZS (32/64-bit)
1829                     FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
1830                     // FCVTZU (32/64-bit)
1831                     FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
1832                     // FCVTZS (64/32-bit)
1833                     FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
1834                     // FCVTZU (64/32-bit)
1835                     FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
1836                     // FCVTZS (64/64-bit)
1837                     FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
1838                     // FCVTZU (64/64-bit)
1839                     FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
1840                 };
1841                 sink.put4(enc_fputoint(top16, rd, rn));
1842             }
1843             &Inst::IntToFpu { op, rd, rn } => {
1844                 let top16 = match op {
1845                     // SCVTF (32/32-bit)
1846                     IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
1847                     // UCVTF (32/32-bit)
1848                     IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
1849                     // SCVTF (64/32-bit)
1850                     IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
1851                     // UCVTF (64/32-bit)
1852                     IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
1853                     // SCVTF (32/64-bit)
1854                     IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
1855                     // UCVTF (32/64-bit)
1856                     IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
1857                     // SCVTF (64/64-bit)
1858                     IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
1859                     // UCVTF (64/64-bit)
1860                     IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
1861                 };
1862                 sink.put4(enc_inttofpu(top16, rd, rn));
1863             }
1864             &Inst::LoadFpuConst64 { rd, const_data } => {
1865                 let inst = Inst::FpuLoad64 {
1866                     rd,
1867                     mem: AMode::Label(MemLabel::PCRel(8)),
1868                     flags: MemFlags::trusted(),
1869                 };
1870                 inst.emit(sink, emit_info, state);
1871                 let inst = Inst::Jump {
1872                     dest: BranchTarget::ResolvedOffset(12),
1873                 };
1874                 inst.emit(sink, emit_info, state);
1875                 sink.put8(const_data);
1876             }
1877             &Inst::LoadFpuConst128 { rd, const_data } => {
1878                 let inst = Inst::FpuLoad128 {
1879                     rd,
1880                     mem: AMode::Label(MemLabel::PCRel(8)),
1881                     flags: MemFlags::trusted(),
1882                 };
1883                 inst.emit(sink, emit_info, state);
1884                 let inst = Inst::Jump {
1885                     dest: BranchTarget::ResolvedOffset(20),
1886                 };
1887                 inst.emit(sink, emit_info, state);
1888 
1889                 for i in const_data.to_le_bytes().iter() {
1890                     sink.put1(*i);
1891                 }
1892             }
1893             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1894                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
1895             }
1896             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1897                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
1898             }
1899             &Inst::FpuRound { op, rd, rn } => {
1900                 let top22 = match op {
1901                     FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
1902                     FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
1903                     FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
1904                     FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
1905                     FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
1906                     FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
1907                     FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
1908                     FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
1909                 };
1910                 sink.put4(enc_fround(top22, rd, rn));
1911             }
1912             &Inst::MovToFpu { rd, rn, size } => {
1913                 let template = match size {
1914                     ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
1915                     ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
1916                     _ => unreachable!(),
1917                 };
1918                 sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
1919             }
1920             &Inst::MovToVec { rd, rn, idx, size } => {
1921                 let (imm5, shift) = match size.lane_size() {
1922                     ScalarSize::Size8 => (0b00001, 1),
1923                     ScalarSize::Size16 => (0b00010, 2),
1924                     ScalarSize::Size32 => (0b00100, 3),
1925                     ScalarSize::Size64 => (0b01000, 4),
1926                     _ => unreachable!(),
1927                 };
1928                 debug_assert_eq!(idx & (0b11111 >> shift), idx);
1929                 let imm5 = imm5 | ((idx as u32) << shift);
1930                 sink.put4(
1931                     0b010_01110000_00000_0_0011_1_00000_00000
1932                         | (imm5 << 16)
1933                         | (machreg_to_gpr(rn) << 5)
1934                         | machreg_to_vec(rd.to_reg()),
1935                 );
1936             }
1937             &Inst::MovFromVec { rd, rn, idx, size } => {
1938                 let (q, imm5, shift, mask) = match size {
1939                     VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
1940                     VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
1941                     VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
1942                     VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
1943                     _ => unreachable!(),
1944                 };
1945                 debug_assert_eq!(idx & mask, idx);
1946                 let imm5 = imm5 | ((idx as u32) << shift);
1947                 sink.put4(
1948                     0b000_01110000_00000_0_0111_1_00000_00000
1949                         | (q << 30)
1950                         | (imm5 << 16)
1951                         | (machreg_to_vec(rn) << 5)
1952                         | machreg_to_gpr(rd.to_reg()),
1953                 );
1954             }
1955             &Inst::MovFromVecSigned {
1956                 rd,
1957                 rn,
1958                 idx,
1959                 size,
1960                 scalar_size,
1961             } => {
1962                 let (imm5, shift, half) = match size {
1963                     VectorSize::Size8x8 => (0b00001, 1, true),
1964                     VectorSize::Size8x16 => (0b00001, 1, false),
1965                     VectorSize::Size16x4 => (0b00010, 2, true),
1966                     VectorSize::Size16x8 => (0b00010, 2, false),
1967                     VectorSize::Size32x2 => {
1968                         debug_assert_ne!(scalar_size, OperandSize::Size32);
1969                         (0b00100, 3, true)
1970                     }
1971                     VectorSize::Size32x4 => {
1972                         debug_assert_ne!(scalar_size, OperandSize::Size32);
1973                         (0b00100, 3, false)
1974                     }
1975                     _ => panic!("Unexpected vector operand size"),
1976                 };
1977                 debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
1978                 let imm5 = imm5 | ((idx as u32) << shift);
1979                 sink.put4(
1980                     0b000_01110000_00000_0_0101_1_00000_00000
1981                         | (scalar_size.is64() as u32) << 30
1982                         | (imm5 << 16)
1983                         | (machreg_to_vec(rn) << 5)
1984                         | machreg_to_gpr(rd.to_reg()),
1985                 );
1986             }
1987             &Inst::VecDup { rd, rn, size } => {
1988                 let imm5 = match size {
1989                     VectorSize::Size8x16 => 0b00001,
1990                     VectorSize::Size16x8 => 0b00010,
1991                     VectorSize::Size32x4 => 0b00100,
1992                     VectorSize::Size64x2 => 0b01000,
1993                     _ => unimplemented!(),
1994                 };
1995                 sink.put4(
1996                     0b010_01110000_00000_000011_00000_00000
1997                         | (imm5 << 16)
1998                         | (machreg_to_gpr(rn) << 5)
1999                         | machreg_to_vec(rd.to_reg()),
2000                 );
2001             }
2002             &Inst::VecDupFromFpu { rd, rn, size } => {
2003                 let imm5 = match size {
2004                     VectorSize::Size32x4 => 0b00100,
2005                     VectorSize::Size64x2 => 0b01000,
2006                     _ => unimplemented!(),
2007                 };
2008                 sink.put4(
2009                     0b010_01110000_00000_000001_00000_00000
2010                         | (imm5 << 16)
2011                         | (machreg_to_vec(rn) << 5)
2012                         | machreg_to_vec(rd.to_reg()),
2013                 );
2014             }
2015             &Inst::VecDupFPImm { rd, imm, size } => {
2016                 let imm = imm.enc_bits();
2017                 let op = match size.lane_size() {
2018                     ScalarSize::Size32 => 0,
2019                     ScalarSize::Size64 => 1,
2020                     _ => unimplemented!(),
2021                 };
2022                 let q_op = op | ((size.is_128bits() as u32) << 1);
2023 
2024                 sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2025             }
2026             &Inst::VecDupImm {
2027                 rd,
2028                 imm,
2029                 invert,
2030                 size,
2031             } => {
2032                 let (imm, shift, shift_ones) = imm.value();
2033                 let (op, cmode) = match size.lane_size() {
2034                     ScalarSize::Size8 => {
2035                         assert!(!invert);
2036                         assert_eq!(shift, 0);
2037 
2038                         (0, 0b1110)
2039                     }
2040                     ScalarSize::Size16 => {
2041                         let s = shift & 8;
2042 
2043                         assert!(!shift_ones);
2044                         assert_eq!(s, shift);
2045 
2046                         (invert as u32, 0b1000 | (s >> 2))
2047                     }
2048                     ScalarSize::Size32 => {
2049                         if shift_ones {
2050                             assert!(shift == 8 || shift == 16);
2051 
2052                             (invert as u32, 0b1100 | (shift >> 4))
2053                         } else {
2054                             let s = shift & 24;
2055 
2056                             assert_eq!(s, shift);
2057 
2058                             (invert as u32, 0b0000 | (s >> 2))
2059                         }
2060                     }
2061                     ScalarSize::Size64 => {
2062                         assert!(!invert);
2063                         assert_eq!(shift, 0);
2064 
2065                         (1, 0b1110)
2066                     }
2067                     _ => unreachable!(),
2068                 };
2069                 let q_op = op | ((size.is_128bits() as u32) << 1);
2070 
2071                 sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2072             }
2073             &Inst::VecExtend {
2074                 t,
2075                 rd,
2076                 rn,
2077                 high_half,
2078             } => {
2079                 let (u, immh) = match t {
2080                     VecExtendOp::Sxtl8 => (0b0, 0b001),
2081                     VecExtendOp::Sxtl16 => (0b0, 0b010),
2082                     VecExtendOp::Sxtl32 => (0b0, 0b100),
2083                     VecExtendOp::Uxtl8 => (0b1, 0b001),
2084                     VecExtendOp::Uxtl16 => (0b1, 0b010),
2085                     VecExtendOp::Uxtl32 => (0b1, 0b100),
2086                 };
2087                 sink.put4(
2088                     0b000_011110_0000_000_101001_00000_00000
2089                         | ((high_half as u32) << 30)
2090                         | (u << 29)
2091                         | (immh << 19)
2092                         | (machreg_to_vec(rn) << 5)
2093                         | machreg_to_vec(rd.to_reg()),
2094                 );
2095             }
2096             &Inst::VecRRLong {
2097                 op,
2098                 rd,
2099                 rn,
2100                 high_half,
2101             } => {
2102                 let (u, size, bits_12_16) = match op {
2103                     VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2104                     VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2105                     VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2106                     VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2107                     VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2108                 };
2109 
2110                 sink.put4(enc_vec_rr_misc(
2111                     ((high_half as u32) << 1) | u,
2112                     size,
2113                     bits_12_16,
2114                     rd,
2115                     rn,
2116                 ));
2117             }
2118             &Inst::VecRRNarrow {
2119                 op,
2120                 rd,
2121                 rn,
2122                 high_half,
2123             } => {
2124                 let (u, size, bits_12_16) = match op {
2125                     VecRRNarrowOp::Xtn16 => (0b0, 0b00, 0b10010),
2126                     VecRRNarrowOp::Xtn32 => (0b0, 0b01, 0b10010),
2127                     VecRRNarrowOp::Xtn64 => (0b0, 0b10, 0b10010),
2128                     VecRRNarrowOp::Sqxtn16 => (0b0, 0b00, 0b10100),
2129                     VecRRNarrowOp::Sqxtn32 => (0b0, 0b01, 0b10100),
2130                     VecRRNarrowOp::Sqxtn64 => (0b0, 0b10, 0b10100),
2131                     VecRRNarrowOp::Sqxtun16 => (0b1, 0b00, 0b10010),
2132                     VecRRNarrowOp::Sqxtun32 => (0b1, 0b01, 0b10010),
2133                     VecRRNarrowOp::Sqxtun64 => (0b1, 0b10, 0b10010),
2134                     VecRRNarrowOp::Uqxtn16 => (0b1, 0b00, 0b10100),
2135                     VecRRNarrowOp::Uqxtn32 => (0b1, 0b01, 0b10100),
2136                     VecRRNarrowOp::Uqxtn64 => (0b1, 0b10, 0b10100),
2137                     VecRRNarrowOp::Fcvtn32 => (0b0, 0b00, 0b10110),
2138                     VecRRNarrowOp::Fcvtn64 => (0b0, 0b01, 0b10110),
2139                 };
2140 
2141                 sink.put4(enc_vec_rr_misc(
2142                     ((high_half as u32) << 1) | u,
2143                     size,
2144                     bits_12_16,
2145                     rd,
2146                     rn,
2147                 ));
2148             }
2149             &Inst::VecMovElement {
2150                 rd,
2151                 rn,
2152                 dest_idx,
2153                 src_idx,
2154                 size,
2155             } => {
2156                 let (imm5, shift) = match size.lane_size() {
2157                     ScalarSize::Size8 => (0b00001, 1),
2158                     ScalarSize::Size16 => (0b00010, 2),
2159                     ScalarSize::Size32 => (0b00100, 3),
2160                     ScalarSize::Size64 => (0b01000, 4),
2161                     _ => unreachable!(),
2162                 };
2163                 let mask = 0b11111 >> shift;
2164                 debug_assert_eq!(dest_idx & mask, dest_idx);
2165                 debug_assert_eq!(src_idx & mask, src_idx);
2166                 let imm4 = (src_idx as u32) << (shift - 1);
2167                 let imm5 = imm5 | ((dest_idx as u32) << shift);
2168                 sink.put4(
2169                     0b011_01110000_00000_0_0000_1_00000_00000
2170                         | (imm5 << 16)
2171                         | (imm4 << 11)
2172                         | (machreg_to_vec(rn) << 5)
2173                         | machreg_to_vec(rd.to_reg()),
2174                 );
2175             }
2176             &Inst::VecRRPair { op, rd, rn } => {
2177                 let bits_12_16 = match op {
2178                     VecPairOp::Addp => 0b11011,
2179                 };
2180 
2181                 sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2182             }
2183             &Inst::VecRRRLong {
2184                 rd,
2185                 rn,
2186                 rm,
2187                 alu_op,
2188                 high_half,
2189             } => {
2190                 let (u, size, bit14) = match alu_op {
2191                     VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2192                     VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2193                     VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2194                     VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2195                     VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2196                     VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2197                     VecRRRLongOp::Umlal8 => (0b1, 0b00, 0b0),
2198                     VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
2199                     VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
2200                 };
2201                 sink.put4(enc_vec_rrr_long(
2202                     high_half as u32,
2203                     u,
2204                     size,
2205                     bit14,
2206                     rm,
2207                     rn,
2208                     rd,
2209                 ));
2210             }
2211             &Inst::VecRRPairLong { op, rd, rn } => {
2212                 let (u, size) = match op {
2213                     VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2214                     VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2215                     VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2216                     VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2217                 };
2218 
2219                 sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2220             }
2221             &Inst::VecRRR {
2222                 rd,
2223                 rn,
2224                 rm,
2225                 alu_op,
2226                 size,
2227             } => {
2228                 let (q, enc_size) = size.enc_size();
2229                 let is_float = match alu_op {
2230                     VecALUOp::Fcmeq
2231                     | VecALUOp::Fcmgt
2232                     | VecALUOp::Fcmge
2233                     | VecALUOp::Fadd
2234                     | VecALUOp::Fsub
2235                     | VecALUOp::Fdiv
2236                     | VecALUOp::Fmax
2237                     | VecALUOp::Fmin
2238                     | VecALUOp::Fmul => true,
2239                     _ => false,
2240                 };
2241                 let enc_float_size = match (is_float, size) {
2242                     (true, VectorSize::Size32x2) => 0b0,
2243                     (true, VectorSize::Size32x4) => 0b0,
2244                     (true, VectorSize::Size64x2) => 0b1,
2245                     (true, _) => unimplemented!(),
2246                     _ => 0,
2247                 };
2248 
2249                 let (top11, bit15_10) = match alu_op {
2250                     VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2251                     VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2252                     VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2253                     VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2254                     VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2255                     VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2256                     VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2257                     VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2258                     VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2259                     VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2260                     VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2261                     VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2262                     // The following logical instructions operate on bytes, so are not encoded differently
2263                     // for the different vector types.
2264                     VecALUOp::And => (0b000_01110_00_1, 0b000111),
2265                     VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2266                     VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2267                     VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2268                     VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
2269                     VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
2270                     VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2271                     VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2272                     VecALUOp::Mul => {
2273                         debug_assert_ne!(size, VectorSize::Size64x2);
2274                         (0b000_01110_00_1 | enc_size << 1, 0b100111)
2275                     }
2276                     VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2277                     VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2278                     VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
2279                     VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
2280                     VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
2281                     VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
2282                     VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
2283                     VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2284                     VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2285                     VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2286                     VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2287                     VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2288                     VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2289                     VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2290                     VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2291                     VecALUOp::Sqrdmulh => {
2292                         debug_assert!(
2293                             size.lane_size() == ScalarSize::Size16
2294                                 || size.lane_size() == ScalarSize::Size32
2295                         );
2296 
2297                         (0b001_01110_00_1 | enc_size << 1, 0b101101)
2298                     }
2299                 };
2300                 let top11 = if is_float {
2301                     top11 | enc_float_size << 1
2302                 } else {
2303                     top11
2304                 };
2305                 sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2306             }
2307             &Inst::VecLoadReplicate { rd, rn, size } => {
2308                 let (q, size) = size.enc_size();
2309 
2310                 let srcloc = state.cur_srcloc();
2311                 if srcloc != SourceLoc::default() {
2312                     // Register the offset at which the actual load instruction starts.
2313                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
2314                 }
2315 
2316                 sink.put4(enc_ldst_vec(q, size, rn, rd));
2317             }
2318             &Inst::VecCSel { rd, rn, rm, cond } => {
2319                 /* Emit this:
2320                       b.cond  else
2321                       mov     rd, rm
2322                       b       out
2323                      else:
2324                       mov     rd, rn
2325                      out:
2326 
2327                    Note, we could do better in the cases where rd == rn or rd == rm.
2328                 */
2329                 let else_label = sink.get_label();
2330                 let out_label = sink.get_label();
2331 
2332                 // b.cond else
2333                 let br_else_offset = sink.cur_offset();
2334                 sink.put4(enc_conditional_br(
2335                     BranchTarget::Label(else_label),
2336                     CondBrKind::Cond(cond),
2337                 ));
2338                 sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2339 
2340                 // mov rd, rm
2341                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2342 
2343                 // b out
2344                 let b_out_offset = sink.cur_offset();
2345                 sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2346                 sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2347                 sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2348 
2349                 // else:
2350                 sink.bind_label(else_label);
2351 
2352                 // mov rd, rn
2353                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2354 
2355                 // out:
2356                 sink.bind_label(out_label);
2357             }
2358             &Inst::MovToNZCV { rn } => {
2359                 sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2360             }
2361             &Inst::MovFromNZCV { rd } => {
2362                 sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2363             }
2364             &Inst::Extend {
2365                 rd,
2366                 rn,
2367                 signed: false,
2368                 from_bits: 1,
2369                 to_bits,
2370             } => {
2371                 assert!(to_bits <= 64);
2372                 // Reduce zero-extend-from-1-bit to:
2373                 // - and rd, rn, #1
2374                 // Note: This is special cased as UBFX may take more cycles
2375                 // than AND on smaller cores.
2376                 let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2377                 Inst::AluRRImmLogic {
2378                     alu_op: ALUOp::And32,
2379                     rd,
2380                     rn,
2381                     imml,
2382                 }
2383                 .emit(sink, emit_info, state);
2384             }
2385             &Inst::Extend {
2386                 rd,
2387                 rn,
2388                 signed: false,
2389                 from_bits: 32,
2390                 to_bits: 64,
2391             } => {
2392                 let mov = Inst::Mov32 { rd, rm: rn };
2393                 mov.emit(sink, emit_info, state);
2394             }
2395             &Inst::Extend {
2396                 rd,
2397                 rn,
2398                 signed,
2399                 from_bits,
2400                 to_bits,
2401             } => {
2402                 let (opc, size) = if signed {
2403                     (0b00, OperandSize::from_bits(to_bits))
2404                 } else {
2405                     (0b10, OperandSize::Size32)
2406                 };
2407                 sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2408             }
2409             &Inst::Jump { ref dest } => {
2410                 let off = sink.cur_offset();
2411                 // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2412                 if let Some(l) = dest.as_label() {
2413                     sink.use_label_at_offset(off, l, LabelUse::Branch26);
2414                     sink.add_uncond_branch(off, off + 4, l);
2415                 }
2416                 // Emit the jump itself.
2417                 sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2418             }
2419             &Inst::Ret => {
2420                 sink.put4(0xd65f03c0);
2421             }
2422             &Inst::EpiloguePlaceholder => {
2423                 // Noop; this is just a placeholder for epilogues.
2424             }
2425             &Inst::Call { ref info } => {
2426                 if let Some(s) = state.take_stack_map() {
2427                     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2428                 }
2429                 let loc = state.cur_srcloc();
2430                 sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0);
2431                 sink.put4(enc_jump26(0b100101, 0));
2432                 if info.opcode.is_call() {
2433                     sink.add_call_site(loc, info.opcode);
2434                 }
2435             }
2436             &Inst::CallInd { ref info } => {
2437                 if let Some(s) = state.take_stack_map() {
2438                     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2439                 }
2440                 sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
2441                 let loc = state.cur_srcloc();
2442                 if info.opcode.is_call() {
2443                     sink.add_call_site(loc, info.opcode);
2444                 }
2445             }
2446             &Inst::CondBr {
2447                 taken,
2448                 not_taken,
2449                 kind,
2450             } => {
2451                 // Conditional part first.
2452                 let cond_off = sink.cur_offset();
2453                 if let Some(l) = taken.as_label() {
2454                     sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
2455                     let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
2456                     sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
2457                 }
2458                 sink.put4(enc_conditional_br(taken, kind));
2459 
2460                 // Unconditional part next.
2461                 let uncond_off = sink.cur_offset();
2462                 if let Some(l) = not_taken.as_label() {
2463                     sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
2464                     sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
2465                 }
2466                 sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
2467             }
2468             &Inst::TrapIf { kind, trap_code } => {
2469                 // condbr KIND, LABEL
2470                 let off = sink.cur_offset();
2471                 let label = sink.get_label();
2472                 sink.put4(enc_conditional_br(
2473                     BranchTarget::Label(label),
2474                     kind.invert(),
2475                 ));
2476                 sink.use_label_at_offset(off, label, LabelUse::Branch19);
2477                 // udf
2478                 let trap = Inst::Udf { trap_code };
2479                 trap.emit(sink, emit_info, state);
2480                 // LABEL:
2481                 sink.bind_label(label);
2482             }
2483             &Inst::IndirectBr { rn, .. } => {
2484                 sink.put4(enc_br(rn));
2485             }
2486             &Inst::Nop0 => {}
2487             &Inst::Nop4 => {
2488                 sink.put4(0xd503201f);
2489             }
2490             &Inst::Brk => {
2491                 sink.put4(0xd4200000);
2492             }
2493             &Inst::Udf { trap_code } => {
2494                 let srcloc = state.cur_srcloc();
2495                 sink.add_trap(srcloc, trap_code);
2496                 if let Some(s) = state.take_stack_map() {
2497                     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2498                 }
2499                 sink.put4(0xd4a00000);
2500             }
2501             &Inst::Adr { rd, off } => {
2502                 assert!(off > -(1 << 20));
2503                 assert!(off < (1 << 20));
2504                 sink.put4(enc_adr(off, rd));
2505             }
2506             &Inst::Word4 { data } => {
2507                 sink.put4(data);
2508             }
2509             &Inst::Word8 { data } => {
2510                 sink.put8(data);
2511             }
2512             &Inst::JTSequence {
2513                 ridx,
2514                 rtmp1,
2515                 rtmp2,
2516                 ref info,
2517                 ..
2518             } => {
2519                 // This sequence is *one* instruction in the vcode, and is expanded only here at
2520                 // emission time, because we cannot allow the regalloc to insert spills/reloads in
2521                 // the middle; we depend on hardcoded PC-rel addressing below.
2522 
2523                 // Branch to default when condition code from prior comparison indicates.
2524                 let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
2525                 // No need to inform the sink's branch folding logic about this branch, because it
2526                 // will not be merged with any other branch, flipped, or elided (it is not preceded
2527                 // or succeeded by any other branch). Just emit it with the label use.
2528                 let default_br_offset = sink.cur_offset();
2529                 if let BranchTarget::Label(l) = info.default_target {
2530                     sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
2531                 }
2532                 sink.put4(br);
2533 
2534                 // Save index in a tmp (the live range of ridx only goes to start of this
2535                 // sequence; rtmp1 or rtmp2 may overwrite it).
2536                 let inst = Inst::gen_move(rtmp2, ridx, I64);
2537                 inst.emit(sink, emit_info, state);
2538                 // Load address of jump table
2539                 let inst = Inst::Adr { rd: rtmp1, off: 16 };
2540                 inst.emit(sink, emit_info, state);
2541                 // Load value out of jump table
2542                 let inst = Inst::SLoad32 {
2543                     rd: rtmp2,
2544                     mem: AMode::reg_plus_reg_scaled_extended(
2545                         rtmp1.to_reg(),
2546                         rtmp2.to_reg(),
2547                         I32,
2548                         ExtendOp::UXTW,
2549                     ),
2550                     flags: MemFlags::trusted(),
2551                 };
2552                 inst.emit(sink, emit_info, state);
2553                 // Add base of jump table to jump-table-sourced block offset
2554                 let inst = Inst::AluRRR {
2555                     alu_op: ALUOp::Add64,
2556                     rd: rtmp1,
2557                     rn: rtmp1.to_reg(),
2558                     rm: rtmp2.to_reg(),
2559                 };
2560                 inst.emit(sink, emit_info, state);
2561                 // Branch to computed address. (`targets` here is only used for successor queries
2562                 // and is not needed for emission.)
2563                 let inst = Inst::IndirectBr {
2564                     rn: rtmp1.to_reg(),
2565                     targets: vec![],
2566                 };
2567                 inst.emit(sink, emit_info, state);
2568                 // Emit jump table (table of 32-bit offsets).
2569                 let jt_off = sink.cur_offset();
2570                 for &target in info.targets.iter() {
2571                     let word_off = sink.cur_offset();
2572                     // off_into_table is an addend here embedded in the label to be later patched
2573                     // at the end of codegen. The offset is initially relative to this jump table
2574                     // entry; with the extra addend, it'll be relative to the jump table's start,
2575                     // after patching.
2576                     let off_into_table = word_off - jt_off;
2577                     sink.use_label_at_offset(
2578                         word_off,
2579                         target.as_label().unwrap(),
2580                         LabelUse::PCRel32,
2581                     );
2582                     sink.put4(off_into_table);
2583                 }
2584 
2585                 // Lowering produces an EmitIsland before using a JTSequence, so we can safely
2586                 // disable the worst-case-size check in this case.
2587                 start_off = sink.cur_offset();
2588             }
2589             &Inst::LoadExtName {
2590                 rd,
2591                 ref name,
2592                 offset,
2593             } => {
2594                 let inst = Inst::ULoad64 {
2595                     rd,
2596                     mem: AMode::Label(MemLabel::PCRel(8)),
2597                     flags: MemFlags::trusted(),
2598                 };
2599                 inst.emit(sink, emit_info, state);
2600                 let inst = Inst::Jump {
2601                     dest: BranchTarget::ResolvedOffset(12),
2602                 };
2603                 inst.emit(sink, emit_info, state);
2604                 let srcloc = state.cur_srcloc();
2605                 sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
2606                 if emit_info.flags().emit_all_ones_funcaddrs() {
2607                     sink.put8(u64::max_value());
2608                 } else {
2609                     sink.put8(0);
2610                 }
2611             }
2612             &Inst::LoadAddr { rd, ref mem } => {
2613                 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
2614                 for inst in mem_insts.into_iter() {
2615                     inst.emit(sink, emit_info, state);
2616                 }
2617 
2618                 let (reg, index_reg, offset) = match mem {
2619                     AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
2620                     AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
2621                     AMode::UnsignedOffset(r, uimm12scaled) => {
2622                         (r, None, uimm12scaled.value() as i32)
2623                     }
2624                     _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
2625                 };
2626                 let abs_offset = if offset < 0 {
2627                     -offset as u64
2628                 } else {
2629                     offset as u64
2630                 };
2631                 let alu_op = if offset < 0 {
2632                     ALUOp::Sub64
2633                 } else {
2634                     ALUOp::Add64
2635                 };
2636 
2637                 if let Some((idx, extendop)) = index_reg {
2638                     let add = Inst::AluRRRExtend {
2639                         alu_op: ALUOp::Add64,
2640                         rd,
2641                         rn: reg,
2642                         rm: idx,
2643                         extendop,
2644                     };
2645 
2646                     add.emit(sink, emit_info, state);
2647                 } else if offset == 0 {
2648                     if reg != rd.to_reg() {
2649                         let mov = Inst::Mov64 { rd, rm: reg };
2650 
2651                         mov.emit(sink, emit_info, state);
2652                     }
2653                 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2654                     let add = Inst::AluRRImm12 {
2655                         alu_op,
2656                         rd,
2657                         rn: reg,
2658                         imm12,
2659                     };
2660                     add.emit(sink, emit_info, state);
2661                 } else {
2662                     // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
2663                     // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
2664                     // that no other instructions will be inserted here (we're emitting directly),
2665                     // and a live range of `tmp2` should not span this instruction, so this use
2666                     // should otherwise be correct.
2667                     debug_assert!(rd.to_reg() != tmp2_reg());
2668                     debug_assert!(reg != tmp2_reg());
2669                     let tmp = writable_tmp2_reg();
2670                     for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
2671                         insn.emit(sink, emit_info, state);
2672                     }
2673                     let add = Inst::AluRRR {
2674                         alu_op,
2675                         rd,
2676                         rn: reg,
2677                         rm: tmp.to_reg(),
2678                     };
2679                     add.emit(sink, emit_info, state);
2680                 }
2681             }
2682             &Inst::VirtualSPOffsetAdj { offset } => {
2683                 log::trace!(
2684                     "virtual sp offset adjusted by {} -> {}",
2685                     offset,
2686                     state.virtual_sp_offset + offset,
2687                 );
2688                 state.virtual_sp_offset += offset;
2689             }
2690             &Inst::EmitIsland { needed_space } => {
2691                 if sink.island_needed(needed_space + 4) {
2692                     let jump_around_label = sink.get_label();
2693                     let jmp = Inst::Jump {
2694                         dest: BranchTarget::Label(jump_around_label),
2695                     };
2696                     jmp.emit(sink, emit_info, state);
2697                     sink.emit_island();
2698                     sink.bind_label(jump_around_label);
2699                 }
2700             }
2701 
2702             &Inst::ElfTlsGetAddr { ref symbol } => {
2703                 // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
2704                 // See: https://gcc.godbolt.org/z/KhMh5Gvra
2705 
2706                 // adrp x0, <label>
2707                 sink.add_reloc(state.cur_srcloc(), Reloc::Aarch64TlsGdAdrPage21, symbol, 0);
2708                 sink.put4(0x90000000);
2709 
2710                 // add x0, x0, <label>
2711                 sink.add_reloc(state.cur_srcloc(), Reloc::Aarch64TlsGdAddLo12Nc, symbol, 0);
2712                 sink.put4(0x91000000);
2713 
2714                 // bl __tls_get_addr
2715                 sink.add_reloc(
2716                     state.cur_srcloc(),
2717                     Reloc::Arm64Call,
2718                     &ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2719                     0,
2720                 );
2721                 sink.put4(0x94000000);
2722 
2723                 // nop
2724                 sink.put4(0xd503201f);
2725             }
2726 
2727             &Inst::ValueLabelMarker { .. } => {
2728                 // Nothing; this is only used to compute debug info.
2729             }
2730 
2731             &Inst::Unwind { ref inst } => {
2732                 sink.add_unwind(inst.clone());
2733             }
2734         }
2735 
2736         let end_off = sink.cur_offset();
2737         debug_assert!((end_off - start_off) <= Inst::worst_case_size());
2738 
2739         state.clear_post_insn();
2740     }
2741 
pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String2742     fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
2743         self.print_with_state(mb_rru, state)
2744     }
2745 }
2746