1 //! AArch64 ISA: binary code emission.
2 
3 use crate::binemit::{CodeOffset, Reloc, StackMap};
4 use crate::ir::constant::ConstantData;
5 use crate::ir::types::*;
6 use crate::ir::{MemFlags, TrapCode};
7 use crate::isa::aarch64::inst::*;
8 use crate::machinst::ty_bits;
9 
10 use regalloc::{Reg, RegClass, Writable};
11 
12 use core::convert::TryFrom;
13 use log::debug;
14 
15 /// Memory label/reference finalization: convert a MemLabel to a PC-relative
16 /// offset, possibly emitting relocation(s) as necessary.
memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i3217 pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
18     match label {
19         &MemLabel::PCRel(rel) => rel,
20     }
21 }
22 
23 /// Memory addressing mode finalization: convert "special" modes (e.g.,
24 /// generic arbitrary stack offset) into real addressing modes, possibly by
25 /// emitting some helper instructions that come immediately before the use
26 /// of this amode.
mem_finalize( insn_off: CodeOffset, mem: &AMode, state: &EmitState, ) -> (SmallVec<[Inst; 4]>, AMode)27 pub fn mem_finalize(
28     insn_off: CodeOffset,
29     mem: &AMode,
30     state: &EmitState,
31 ) -> (SmallVec<[Inst; 4]>, AMode) {
32     match mem {
33         &AMode::RegOffset(_, off, ty)
34         | &AMode::SPOffset(off, ty)
35         | &AMode::FPOffset(off, ty)
36         | &AMode::NominalSPOffset(off, ty) => {
37             let basereg = match mem {
38                 &AMode::RegOffset(reg, _, _) => reg,
39                 &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(),
40                 &AMode::FPOffset(..) => fp_reg(),
41                 _ => unreachable!(),
42             };
43             let adj = match mem {
44                 &AMode::NominalSPOffset(..) => {
45                     debug!(
46                         "mem_finalize: nominal SP offset {} + adj {} -> {}",
47                         off,
48                         state.virtual_sp_offset,
49                         off + state.virtual_sp_offset
50                     );
51                     state.virtual_sp_offset
52                 }
53                 _ => 0,
54             };
55             let off = off + adj;
56 
57             if let Some(simm9) = SImm9::maybe_from_i64(off) {
58                 let mem = AMode::Unscaled(basereg, simm9);
59                 (smallvec![], mem)
60             } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
61                 let mem = AMode::UnsignedOffset(basereg, uimm12s);
62                 (smallvec![], mem)
63             } else {
64                 let tmp = writable_spilltmp_reg();
65                 let mut const_insts = Inst::load_constant(tmp, off as u64);
66                 // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
67                 // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
68                 // is a valid base (for SPOffset) which we must handle here.
69                 // Also, SP needs to be the first arg, not second.
70                 let add_inst = Inst::AluRRRExtend {
71                     alu_op: ALUOp::Add64,
72                     rd: tmp,
73                     rn: basereg,
74                     rm: tmp.to_reg(),
75                     extendop: ExtendOp::UXTX,
76                 };
77                 const_insts.push(add_inst);
78                 (const_insts, AMode::reg(tmp.to_reg()))
79             }
80         }
81 
82         &AMode::Label(ref label) => {
83             let off = memlabel_finalize(insn_off, label);
84             (smallvec![], AMode::Label(MemLabel::PCRel(off)))
85         }
86 
87         _ => (smallvec![], mem.clone()),
88     }
89 }
90 
91 /// Helper: get a ConstantData from a u64.
u64_constant(bits: u64) -> ConstantData92 pub fn u64_constant(bits: u64) -> ConstantData {
93     let data = bits.to_le_bytes();
94     ConstantData::from(&data[..])
95 }
96 
97 //=============================================================================
98 // Instructions and subcomponents: emission
99 
machreg_to_gpr(m: Reg) -> u32100 fn machreg_to_gpr(m: Reg) -> u32 {
101     assert_eq!(m.get_class(), RegClass::I64);
102     u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
103 }
104 
machreg_to_vec(m: Reg) -> u32105 fn machreg_to_vec(m: Reg) -> u32 {
106     assert_eq!(m.get_class(), RegClass::V128);
107     u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
108 }
109 
machreg_to_gpr_or_vec(m: Reg) -> u32110 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
111     u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
112 }
113 
enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32114 fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
115     (bits_31_21 << 21)
116         | (bits_15_10 << 10)
117         | machreg_to_gpr(rd.to_reg())
118         | (machreg_to_gpr(rn) << 5)
119         | (machreg_to_gpr(rm) << 16)
120 }
121 
enc_arith_rr_imm12( bits_31_24: u32, immshift: u32, imm12: u32, rn: Reg, rd: Writable<Reg>, ) -> u32122 fn enc_arith_rr_imm12(
123     bits_31_24: u32,
124     immshift: u32,
125     imm12: u32,
126     rn: Reg,
127     rd: Writable<Reg>,
128 ) -> u32 {
129     (bits_31_24 << 24)
130         | (immshift << 22)
131         | (imm12 << 10)
132         | (machreg_to_gpr(rn) << 5)
133         | machreg_to_gpr(rd.to_reg())
134 }
135 
enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32136 fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
137     (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
138 }
139 
enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32140 fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
141     (top11 << 21)
142         | (machreg_to_gpr(rm) << 16)
143         | (bit15 << 15)
144         | (machreg_to_gpr(ra) << 10)
145         | (machreg_to_gpr(rn) << 5)
146         | machreg_to_gpr(rd.to_reg())
147 }
148 
enc_jump26(op_31_26: u32, off_26_0: u32) -> u32149 fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
150     assert!(off_26_0 < (1 << 26));
151     (op_31_26 << 26) | off_26_0
152 }
153 
enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32154 fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
155     assert!(off_18_0 < (1 << 19));
156     (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
157 }
158 
enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32159 fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
160     assert!(off_18_0 < (1 << 19));
161     assert!(cond < (1 << 4));
162     (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
163 }
164 
enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32165 fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
166     match kind {
167         CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
168         CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
169         CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
170     }
171 }
172 
173 const MOVE_WIDE_FIXED: u32 = 0x12800000;
174 
175 #[repr(u32)]
176 enum MoveWideOpcode {
177     MOVN = 0b00,
178     MOVZ = 0b10,
179     MOVK = 0b11,
180 }
181 
enc_move_wide( op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize, ) -> u32182 fn enc_move_wide(
183     op: MoveWideOpcode,
184     rd: Writable<Reg>,
185     imm: MoveWideConst,
186     size: OperandSize,
187 ) -> u32 {
188     assert!(imm.shift <= 0b11);
189     MOVE_WIDE_FIXED
190         | size.sf_bit() << 31
191         | (op as u32) << 29
192         | u32::from(imm.shift) << 21
193         | u32::from(imm.bits) << 5
194         | machreg_to_gpr(rd.to_reg())
195 }
196 
enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32197 fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
198     (op_31_22 << 22)
199         | (simm7.bits() << 15)
200         | (machreg_to_gpr(rt2) << 10)
201         | (machreg_to_gpr(rn) << 5)
202         | machreg_to_gpr(rt)
203 }
204 
enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32205 fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
206     (op_31_22 << 22)
207         | (simm9.bits() << 12)
208         | (op_11_10 << 10)
209         | (machreg_to_gpr(rn) << 5)
210         | machreg_to_gpr_or_vec(rd)
211 }
212 
enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32213 fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
214     (op_31_22 << 22)
215         | (0b1 << 24)
216         | (uimm12.bits() << 10)
217         | (machreg_to_gpr(rn) << 5)
218         | machreg_to_gpr_or_vec(rd)
219 }
220 
enc_ldst_reg( op_31_22: u32, rn: Reg, rm: Reg, s_bit: bool, extendop: Option<ExtendOp>, rd: Reg, ) -> u32221 fn enc_ldst_reg(
222     op_31_22: u32,
223     rn: Reg,
224     rm: Reg,
225     s_bit: bool,
226     extendop: Option<ExtendOp>,
227     rd: Reg,
228 ) -> u32 {
229     let s_bit = if s_bit { 1 } else { 0 };
230     let extend_bits = match extendop {
231         Some(ExtendOp::UXTW) => 0b010,
232         Some(ExtendOp::SXTW) => 0b110,
233         Some(ExtendOp::SXTX) => 0b111,
234         None => 0b011, // LSL
235         _ => panic!("bad extend mode for ld/st AMode"),
236     };
237     (op_31_22 << 22)
238         | (1 << 21)
239         | (machreg_to_gpr(rm) << 16)
240         | (extend_bits << 13)
241         | (s_bit << 12)
242         | (0b10 << 10)
243         | (machreg_to_gpr(rn) << 5)
244         | machreg_to_gpr_or_vec(rd)
245 }
246 
enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32247 fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
248     (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
249 }
250 
enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32251 fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
252     debug_assert_eq!(q & 0b1, q);
253     debug_assert_eq!(size & 0b11, size);
254     0b0_0_0011010_10_00000_110_0_00_00000_00000
255         | q << 30
256         | size << 10
257         | machreg_to_gpr(rn) << 5
258         | machreg_to_vec(rt.to_reg())
259 }
260 
enc_ldst_vec_pair( opc: u32, amode: u32, is_load: bool, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg, ) -> u32261 fn enc_ldst_vec_pair(
262     opc: u32,
263     amode: u32,
264     is_load: bool,
265     simm7: SImm7Scaled,
266     rn: Reg,
267     rt: Reg,
268     rt2: Reg,
269 ) -> u32 {
270     debug_assert_eq!(opc & 0b11, opc);
271     debug_assert_eq!(amode & 0b11, amode);
272 
273     0b00_10110_00_0_0000000_00000_00000_00000
274         | opc << 30
275         | amode << 23
276         | (is_load as u32) << 22
277         | simm7.bits() << 15
278         | machreg_to_vec(rt2) << 10
279         | machreg_to_gpr(rn) << 5
280         | machreg_to_vec(rt)
281 }
282 
enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32283 fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
284     (top11 << 21)
285         | (machreg_to_vec(rm) << 16)
286         | (bit15_10 << 10)
287         | (machreg_to_vec(rn) << 5)
288         | machreg_to_vec(rd.to_reg())
289 }
290 
enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32291 fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
292     (0b01011010110 << 21)
293         | size << 31
294         | opcode2 << 16
295         | opcode1 << 10
296         | machreg_to_gpr(rn) << 5
297         | machreg_to_gpr(rd.to_reg())
298 }
299 
enc_br(rn: Reg) -> u32300 fn enc_br(rn: Reg) -> u32 {
301     0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
302 }
303 
enc_adr(off: i32, rd: Writable<Reg>) -> u32304 fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
305     let off = u32::try_from(off).unwrap();
306     let immlo = off & 3;
307     let immhi = (off >> 2) & ((1 << 19) - 1);
308     (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
309 }
310 
enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32311 fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
312     0b100_11010100_00000_0000_00_00000_00000
313         | (machreg_to_gpr(rm) << 16)
314         | (machreg_to_gpr(rn) << 5)
315         | machreg_to_gpr(rd.to_reg())
316         | (cond.bits() << 12)
317 }
318 
enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32319 fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
320     0b000_11110_00_1_00000_0000_11_00000_00000
321         | (size.ftype() << 22)
322         | (machreg_to_vec(rm) << 16)
323         | (machreg_to_vec(rn) << 5)
324         | machreg_to_vec(rd.to_reg())
325         | (cond.bits() << 12)
326 }
327 
enc_cset(rd: Writable<Reg>, cond: Cond) -> u32328 fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
329     0b100_11010100_11111_0000_01_11111_00000
330         | machreg_to_gpr(rd.to_reg())
331         | (cond.invert().bits() << 12)
332 }
333 
enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32334 fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
335     0b110_11010100_11111_0000_00_11111_00000
336         | machreg_to_gpr(rd.to_reg())
337         | (cond.invert().bits() << 12)
338 }
339 
enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32340 fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
341     0b0_1_1_11010010_00000_0000_10_00000_0_0000
342         | size.sf_bit() << 31
343         | imm.bits() << 16
344         | cond.bits() << 12
345         | machreg_to_gpr(rn) << 5
346         | nzcv.bits()
347 }
348 
enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32349 fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
350     match size {
351         OperandSize::Size64 => {
352             debug_assert!(immr <= 63);
353             debug_assert!(imms <= 63);
354         }
355         OperandSize::Size32 => {
356             debug_assert!(immr <= 31);
357             debug_assert!(imms <= 31);
358         }
359     }
360     debug_assert_eq!(opc & 0b11, opc);
361     let n_bit = size.sf_bit();
362     0b0_00_100110_0_000000_000000_00000_00000
363         | size.sf_bit() << 31
364         | u32::from(opc) << 29
365         | n_bit << 22
366         | u32::from(immr) << 16
367         | u32::from(imms) << 10
368         | machreg_to_gpr(rn) << 5
369         | machreg_to_gpr(rd.to_reg())
370 }
371 
enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32372 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
373     0b00001110_101_00000_00011_1_00000_00000
374         | ((is_16b as u32) << 30)
375         | machreg_to_vec(rd.to_reg())
376         | (machreg_to_vec(rn) << 16)
377         | (machreg_to_vec(rn) << 5)
378 }
379 
enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32380 fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
381     (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
382 }
383 
enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32384 fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
385     (top22 << 10)
386         | (machreg_to_vec(rm) << 16)
387         | (machreg_to_vec(rn) << 5)
388         | machreg_to_vec(rd.to_reg())
389 }
390 
enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32391 fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
392     (top17 << 15)
393         | (machreg_to_vec(rm) << 16)
394         | (machreg_to_vec(ra) << 10)
395         | (machreg_to_vec(rn) << 5)
396         | machreg_to_vec(rd.to_reg())
397 }
398 
enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32399 fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
400     0b000_11110_00_1_00000_00_1000_00000_00000
401         | (size.ftype() << 22)
402         | (machreg_to_vec(rm) << 16)
403         | (machreg_to_vec(rn) << 5)
404 }
405 
enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32406 fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
407     (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
408 }
409 
enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32410 fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
411     (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
412 }
413 
enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32414 fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
415     (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
416 }
417 
enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32418 fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
419     debug_assert_eq!(qu & 0b11, qu);
420     debug_assert_eq!(size & 0b11, size);
421     debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
422     let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
423     bits | qu << 29
424         | size << 22
425         | bits_12_16 << 12
426         | machreg_to_vec(rn) << 5
427         | machreg_to_vec(rd.to_reg())
428 }
429 
enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32430 fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
431     debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
432 
433     0b010_11110_11_11000_11011_10_00000_00000
434         | bits_12_16 << 12
435         | machreg_to_vec(rn) << 5
436         | machreg_to_vec(rd.to_reg())
437 }
438 
enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32439 fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
440     debug_assert_eq!(q & 0b1, q);
441     debug_assert_eq!(u & 0b1, u);
442     debug_assert_eq!(size & 0b11, size);
443     debug_assert_eq!(opcode & 0b11111, opcode);
444     0b0_0_0_01110_00_11000_0_0000_10_00000_00000
445         | q << 30
446         | u << 29
447         | size << 22
448         | opcode << 12
449         | machreg_to_vec(rn) << 5
450         | machreg_to_vec(rd.to_reg())
451 }
452 
enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32453 fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
454     debug_assert_eq!(len & 0b11, len);
455     0b0_1_001110_000_00000_0_00_0_00_00000_00000
456         | (machreg_to_vec(rm) << 16)
457         | len << 13
458         | (is_extension as u32) << 12
459         | (machreg_to_vec(rn) << 5)
460         | machreg_to_vec(rd.to_reg())
461 }
462 
enc_dmb_ish() -> u32463 fn enc_dmb_ish() -> u32 {
464     0xD5033BBF
465 }
466 
enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32467 fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
468     let sz = match ty {
469         I64 => 0b11,
470         I32 => 0b10,
471         I16 => 0b01,
472         I8 => 0b00,
473         _ => unreachable!(),
474     };
475     0b00001000_01011111_01111100_00000000
476         | (sz << 30)
477         | (machreg_to_gpr(rn) << 5)
478         | machreg_to_gpr(rt.to_reg())
479 }
480 
enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32481 fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
482     let sz = match ty {
483         I64 => 0b11,
484         I32 => 0b10,
485         I16 => 0b01,
486         I8 => 0b00,
487         _ => unreachable!(),
488     };
489     0b00001000_00000000_01111100_00000000
490         | (sz << 30)
491         | (machreg_to_gpr(rs.to_reg()) << 16)
492         | (machreg_to_gpr(rn) << 5)
493         | machreg_to_gpr(rt)
494 }
495 
enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32496 fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
497     debug_assert_eq!(size & 0b11, size);
498 
499     0b00_0010001_1_1_00000_1_11111_00000_00000
500         | size << 30
501         | machreg_to_gpr(rs.to_reg()) << 16
502         | machreg_to_gpr(rn) << 5
503         | machreg_to_gpr(rt)
504 }
505 
enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32506 fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
507     let abc = (imm >> 5) as u32;
508     let defgh = (imm & 0b11111) as u32;
509 
510     debug_assert_eq!(cmode & 0b1111, cmode);
511     debug_assert_eq!(q_op & 0b11, q_op);
512 
513     0b0_0_0_0111100000_000_0000_01_00000_00000
514         | (q_op << 29)
515         | (abc << 16)
516         | (cmode << 12)
517         | (defgh << 5)
518         | machreg_to_vec(rd.to_reg())
519 }
520 
521 /// State carried between emissions of a sequence of instructions.
522 #[derive(Default, Clone, Debug)]
523 pub struct EmitState {
524     /// Addend to convert nominal-SP offsets to real-SP offsets at the current
525     /// program point.
526     pub(crate) virtual_sp_offset: i64,
527     /// Offset of FP from nominal-SP.
528     pub(crate) nominal_sp_to_fp: i64,
529     /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
530     stack_map: Option<StackMap>,
531     /// Current source-code location corresponding to instruction to be emitted.
532     cur_srcloc: SourceLoc,
533 }
534 
535 impl MachInstEmitState<Inst> for EmitState {
new(abi: &dyn ABICallee<I = Inst>) -> Self536     fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
537         EmitState {
538             virtual_sp_offset: 0,
539             nominal_sp_to_fp: abi.frame_size() as i64,
540             stack_map: None,
541             cur_srcloc: SourceLoc::default(),
542         }
543     }
544 
pre_safepoint(&mut self, stack_map: StackMap)545     fn pre_safepoint(&mut self, stack_map: StackMap) {
546         self.stack_map = Some(stack_map);
547     }
548 
pre_sourceloc(&mut self, srcloc: SourceLoc)549     fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
550         self.cur_srcloc = srcloc;
551     }
552 }
553 
554 impl EmitState {
take_stack_map(&mut self) -> Option<StackMap>555     fn take_stack_map(&mut self) -> Option<StackMap> {
556         self.stack_map.take()
557     }
558 
clear_post_insn(&mut self)559     fn clear_post_insn(&mut self) {
560         self.stack_map = None;
561     }
562 
cur_srcloc(&self) -> SourceLoc563     fn cur_srcloc(&self) -> SourceLoc {
564         self.cur_srcloc
565     }
566 }
567 
568 /// Constant state used during function compilation.
569 pub struct EmitInfo(settings::Flags);
570 
571 impl EmitInfo {
new(flags: settings::Flags) -> Self572     pub(crate) fn new(flags: settings::Flags) -> Self {
573         Self(flags)
574     }
575 }
576 
577 impl MachInstEmitInfo for EmitInfo {
flags(&self) -> &settings::Flags578     fn flags(&self) -> &settings::Flags {
579         &self.0
580     }
581 }
582 
583 impl MachInstEmit for Inst {
584     type State = EmitState;
585     type Info = EmitInfo;
586 
emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState)587     fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
588         // N.B.: we *must* not exceed the "worst-case size" used to compute
589         // where to insert islands, except when islands are explicitly triggered
590         // (with an `EmitIsland`). We check this in debug builds. This is `mut`
591         // to allow disabling the check for `JTSequence`, which is always
592         // emitted following an `EmitIsland`.
593         let mut start_off = sink.cur_offset();
594 
595         match self {
596             &Inst::AluRRR { alu_op, rd, rn, rm } => {
597                 let top11 = match alu_op {
598                     ALUOp::Add32 => 0b00001011_000,
599                     ALUOp::Add64 => 0b10001011_000,
600                     ALUOp::Sub32 => 0b01001011_000,
601                     ALUOp::Sub64 => 0b11001011_000,
602                     ALUOp::Orr32 => 0b00101010_000,
603                     ALUOp::Orr64 => 0b10101010_000,
604                     ALUOp::And32 => 0b00001010_000,
605                     ALUOp::And64 => 0b10001010_000,
606                     ALUOp::Eor32 => 0b01001010_000,
607                     ALUOp::Eor64 => 0b11001010_000,
608                     ALUOp::OrrNot32 => 0b00101010_001,
609                     ALUOp::OrrNot64 => 0b10101010_001,
610                     ALUOp::AndNot32 => 0b00001010_001,
611                     ALUOp::AndNot64 => 0b10001010_001,
612                     ALUOp::EorNot32 => 0b01001010_001,
613                     ALUOp::EorNot64 => 0b11001010_001,
614                     ALUOp::AddS32 => 0b00101011_000,
615                     ALUOp::AddS64 => 0b10101011_000,
616                     ALUOp::SubS32 => 0b01101011_000,
617                     ALUOp::SubS64 => 0b11101011_000,
618                     ALUOp::SDiv64 => 0b10011010_110,
619                     ALUOp::UDiv64 => 0b10011010_110,
620                     ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
621                     ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
622                     ALUOp::SMulH => 0b10011011_010,
623                     ALUOp::UMulH => 0b10011011_110,
624                 };
625                 let bit15_10 = match alu_op {
626                     ALUOp::SDiv64 => 0b000011,
627                     ALUOp::UDiv64 => 0b000010,
628                     ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
629                     ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
630                     ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
631                     ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
632                     ALUOp::SMulH | ALUOp::UMulH => 0b011111,
633                     _ => 0b000000,
634                 };
635                 debug_assert_ne!(writable_stack_reg(), rd);
636                 // The stack pointer is the zero register in this context, so this might be an
637                 // indication that something is wrong.
638                 debug_assert_ne!(stack_reg(), rn);
639                 debug_assert_ne!(stack_reg(), rm);
640                 sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
641             }
642             &Inst::AluRRRR {
643                 alu_op,
644                 rd,
645                 rm,
646                 rn,
647                 ra,
648             } => {
649                 let (top11, bit15) = match alu_op {
650                     ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
651                     ALUOp3::MSub32 => (0b0_00_11011_000, 1),
652                     ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
653                     ALUOp3::MSub64 => (0b1_00_11011_000, 1),
654                 };
655                 sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
656             }
657             &Inst::AluRRImm12 {
658                 alu_op,
659                 rd,
660                 rn,
661                 ref imm12,
662             } => {
663                 let top8 = match alu_op {
664                     ALUOp::Add32 => 0b000_10001,
665                     ALUOp::Add64 => 0b100_10001,
666                     ALUOp::Sub32 => 0b010_10001,
667                     ALUOp::Sub64 => 0b110_10001,
668                     ALUOp::AddS32 => 0b001_10001,
669                     ALUOp::AddS64 => 0b101_10001,
670                     ALUOp::SubS32 => 0b011_10001,
671                     ALUOp::SubS64 => 0b111_10001,
672                     _ => unimplemented!("{:?}", alu_op),
673                 };
674                 sink.put4(enc_arith_rr_imm12(
675                     top8,
676                     imm12.shift_bits(),
677                     imm12.imm_bits(),
678                     rn,
679                     rd,
680                 ));
681             }
682             &Inst::AluRRImmLogic {
683                 alu_op,
684                 rd,
685                 rn,
686                 ref imml,
687             } => {
688                 let (top9, inv) = match alu_op {
689                     ALUOp::Orr32 => (0b001_100100, false),
690                     ALUOp::Orr64 => (0b101_100100, false),
691                     ALUOp::And32 => (0b000_100100, false),
692                     ALUOp::And64 => (0b100_100100, false),
693                     ALUOp::Eor32 => (0b010_100100, false),
694                     ALUOp::Eor64 => (0b110_100100, false),
695                     ALUOp::OrrNot32 => (0b001_100100, true),
696                     ALUOp::OrrNot64 => (0b101_100100, true),
697                     ALUOp::AndNot32 => (0b000_100100, true),
698                     ALUOp::AndNot64 => (0b100_100100, true),
699                     ALUOp::EorNot32 => (0b010_100100, true),
700                     ALUOp::EorNot64 => (0b110_100100, true),
701                     _ => unimplemented!("{:?}", alu_op),
702                 };
703                 let imml = if inv { imml.invert() } else { imml.clone() };
704                 sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
705             }
706 
707             &Inst::AluRRImmShift {
708                 alu_op,
709                 rd,
710                 rn,
711                 ref immshift,
712             } => {
713                 let amt = immshift.value();
714                 let (top10, immr, imms) = match alu_op {
715                     ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
716                     ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
717                     ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
718                     ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
719                     ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
720                     ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
721                     ALUOp::Lsl32 => (
722                         0b0101001100,
723                         u32::from((32 - amt) % 32),
724                         u32::from(31 - amt),
725                     ),
726                     ALUOp::Lsl64 => (
727                         0b1101001101,
728                         u32::from((64 - amt) % 64),
729                         u32::from(63 - amt),
730                     ),
731                     _ => unimplemented!("{:?}", alu_op),
732                 };
733                 sink.put4(
734                     (top10 << 22)
735                         | (immr << 16)
736                         | (imms << 10)
737                         | (machreg_to_gpr(rn) << 5)
738                         | machreg_to_gpr(rd.to_reg()),
739                 );
740             }
741 
742             &Inst::AluRRRShift {
743                 alu_op,
744                 rd,
745                 rn,
746                 rm,
747                 ref shiftop,
748             } => {
749                 let top11: u32 = match alu_op {
750                     ALUOp::Add32 => 0b000_01011000,
751                     ALUOp::Add64 => 0b100_01011000,
752                     ALUOp::AddS32 => 0b001_01011000,
753                     ALUOp::AddS64 => 0b101_01011000,
754                     ALUOp::Sub32 => 0b010_01011000,
755                     ALUOp::Sub64 => 0b110_01011000,
756                     ALUOp::SubS32 => 0b011_01011000,
757                     ALUOp::SubS64 => 0b111_01011000,
758                     ALUOp::Orr32 => 0b001_01010000,
759                     ALUOp::Orr64 => 0b101_01010000,
760                     ALUOp::And32 => 0b000_01010000,
761                     ALUOp::And64 => 0b100_01010000,
762                     ALUOp::Eor32 => 0b010_01010000,
763                     ALUOp::Eor64 => 0b110_01010000,
764                     ALUOp::OrrNot32 => 0b001_01010001,
765                     ALUOp::OrrNot64 => 0b101_01010001,
766                     ALUOp::EorNot32 => 0b010_01010001,
767                     ALUOp::EorNot64 => 0b110_01010001,
768                     ALUOp::AndNot32 => 0b000_01010001,
769                     ALUOp::AndNot64 => 0b100_01010001,
770                     _ => unimplemented!("{:?}", alu_op),
771                 };
772                 let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
773                 let bits_15_10 = u32::from(shiftop.amt().value());
774                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
775             }
776 
777             &Inst::AluRRRExtend {
778                 alu_op,
779                 rd,
780                 rn,
781                 rm,
782                 extendop,
783             } => {
784                 let top11: u32 = match alu_op {
785                     ALUOp::Add32 => 0b00001011001,
786                     ALUOp::Add64 => 0b10001011001,
787                     ALUOp::Sub32 => 0b01001011001,
788                     ALUOp::Sub64 => 0b11001011001,
789                     ALUOp::AddS32 => 0b00101011001,
790                     ALUOp::AddS64 => 0b10101011001,
791                     ALUOp::SubS32 => 0b01101011001,
792                     ALUOp::SubS64 => 0b11101011001,
793                     _ => unimplemented!("{:?}", alu_op),
794                 };
795                 let bits_15_10 = u32::from(extendop.bits()) << 3;
796                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
797             }
798 
799             &Inst::BitRR { op, rd, rn, .. } => {
800                 let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
801                 let (op1, op2) = match op {
802                     BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
803                     BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
804                     BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
805                 };
806                 sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
807             }
808 
809             &Inst::ULoad8 { rd, ref mem, flags }
810             | &Inst::SLoad8 { rd, ref mem, flags }
811             | &Inst::ULoad16 { rd, ref mem, flags }
812             | &Inst::SLoad16 { rd, ref mem, flags }
813             | &Inst::ULoad32 { rd, ref mem, flags }
814             | &Inst::SLoad32 { rd, ref mem, flags }
815             | &Inst::ULoad64 {
816                 rd, ref mem, flags, ..
817             }
818             | &Inst::FpuLoad32 { rd, ref mem, flags }
819             | &Inst::FpuLoad64 { rd, ref mem, flags }
820             | &Inst::FpuLoad128 { rd, ref mem, flags } => {
821                 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
822 
823                 for inst in mem_insts.into_iter() {
824                     inst.emit(sink, emit_info, state);
825                 }
826 
827                 // ldst encoding helpers take Reg, not Writable<Reg>.
828                 let rd = rd.to_reg();
829 
830                 // This is the base opcode (top 10 bits) for the "unscaled
831                 // immediate" form (Unscaled). Other addressing modes will OR in
832                 // other values for bits 24/25 (bits 1/2 of this constant).
833                 let (op, bits) = match self {
834                     &Inst::ULoad8 { .. } => (0b0011100001, 8),
835                     &Inst::SLoad8 { .. } => (0b0011100010, 8),
836                     &Inst::ULoad16 { .. } => (0b0111100001, 16),
837                     &Inst::SLoad16 { .. } => (0b0111100010, 16),
838                     &Inst::ULoad32 { .. } => (0b1011100001, 32),
839                     &Inst::SLoad32 { .. } => (0b1011100010, 32),
840                     &Inst::ULoad64 { .. } => (0b1111100001, 64),
841                     &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
842                     &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
843                     &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
844                     _ => unreachable!(),
845                 };
846 
847                 let srcloc = state.cur_srcloc();
848                 if srcloc != SourceLoc::default() && !flags.notrap() {
849                     // Register the offset at which the actual load instruction starts.
850                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
851                 }
852 
853                 match &mem {
854                     &AMode::Unscaled(reg, simm9) => {
855                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
856                     }
857                     &AMode::UnsignedOffset(reg, uimm12scaled) => {
858                         if uimm12scaled.value() != 0 {
859                             assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
860                         }
861                         sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
862                     }
863                     &AMode::RegReg(r1, r2) => {
864                         sink.put4(enc_ldst_reg(
865                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
866                         ));
867                     }
868                     &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
869                         assert_eq!(bits, ty_bits(ty));
870                         let extendop = match &mem {
871                             &AMode::RegScaled(..) => None,
872                             &AMode::RegScaledExtended(_, _, _, op) => Some(op),
873                             _ => unreachable!(),
874                         };
875                         sink.put4(enc_ldst_reg(
876                             op, r1, r2, /* scaled = */ true, extendop, rd,
877                         ));
878                     }
879                     &AMode::RegExtended(r1, r2, extendop) => {
880                         sink.put4(enc_ldst_reg(
881                             op,
882                             r1,
883                             r2,
884                             /* scaled = */ false,
885                             Some(extendop),
886                             rd,
887                         ));
888                     }
889                     &AMode::Label(ref label) => {
890                         let offset = match label {
891                             // cast i32 to u32 (two's-complement)
892                             &MemLabel::PCRel(off) => off as u32,
893                         } / 4;
894                         assert!(offset < (1 << 19));
895                         match self {
896                             &Inst::ULoad32 { .. } => {
897                                 sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
898                             }
899                             &Inst::SLoad32 { .. } => {
900                                 sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
901                             }
902                             &Inst::FpuLoad32 { .. } => {
903                                 sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
904                             }
905                             &Inst::ULoad64 { .. } => {
906                                 sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
907                             }
908                             &Inst::FpuLoad64 { .. } => {
909                                 sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
910                             }
911                             &Inst::FpuLoad128 { .. } => {
912                                 sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
913                             }
914                             _ => panic!("Unspported size for LDR from constant pool!"),
915                         }
916                     }
917                     &AMode::PreIndexed(reg, simm9) => {
918                         sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
919                     }
920                     &AMode::PostIndexed(reg, simm9) => {
921                         sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
922                     }
923                     // Eliminated by `mem_finalize()` above.
924                     &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
925                         panic!("Should not see stack-offset here!")
926                     }
927                     &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
928                 }
929             }
930 
931             &Inst::Store8 { rd, ref mem, flags }
932             | &Inst::Store16 { rd, ref mem, flags }
933             | &Inst::Store32 { rd, ref mem, flags }
934             | &Inst::Store64 { rd, ref mem, flags }
935             | &Inst::FpuStore32 { rd, ref mem, flags }
936             | &Inst::FpuStore64 { rd, ref mem, flags }
937             | &Inst::FpuStore128 { rd, ref mem, flags } => {
938                 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
939 
940                 for inst in mem_insts.into_iter() {
941                     inst.emit(sink, emit_info, state);
942                 }
943 
944                 let (op, bits) = match self {
945                     &Inst::Store8 { .. } => (0b0011100000, 8),
946                     &Inst::Store16 { .. } => (0b0111100000, 16),
947                     &Inst::Store32 { .. } => (0b1011100000, 32),
948                     &Inst::Store64 { .. } => (0b1111100000, 64),
949                     &Inst::FpuStore32 { .. } => (0b1011110000, 32),
950                     &Inst::FpuStore64 { .. } => (0b1111110000, 64),
951                     &Inst::FpuStore128 { .. } => (0b0011110010, 128),
952                     _ => unreachable!(),
953                 };
954 
955                 let srcloc = state.cur_srcloc();
956                 if srcloc != SourceLoc::default() && !flags.notrap() {
957                     // Register the offset at which the actual store instruction starts.
958                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
959                 }
960 
961                 match &mem {
962                     &AMode::Unscaled(reg, simm9) => {
963                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
964                     }
965                     &AMode::UnsignedOffset(reg, uimm12scaled) => {
966                         if uimm12scaled.value() != 0 {
967                             assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
968                         }
969                         sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
970                     }
971                     &AMode::RegReg(r1, r2) => {
972                         sink.put4(enc_ldst_reg(
973                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
974                         ));
975                     }
976                     &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
977                         let extendop = match &mem {
978                             &AMode::RegScaled(..) => None,
979                             &AMode::RegScaledExtended(_, _, _, op) => Some(op),
980                             _ => unreachable!(),
981                         };
982                         sink.put4(enc_ldst_reg(
983                             op, r1, r2, /* scaled = */ true, extendop, rd,
984                         ));
985                     }
986                     &AMode::RegExtended(r1, r2, extendop) => {
987                         sink.put4(enc_ldst_reg(
988                             op,
989                             r1,
990                             r2,
991                             /* scaled = */ false,
992                             Some(extendop),
993                             rd,
994                         ));
995                     }
996                     &AMode::Label(..) => {
997                         panic!("Store to a MemLabel not implemented!");
998                     }
999                     &AMode::PreIndexed(reg, simm9) => {
1000                         sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
1001                     }
1002                     &AMode::PostIndexed(reg, simm9) => {
1003                         sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
1004                     }
1005                     // Eliminated by `mem_finalize()` above.
1006                     &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
1007                         panic!("Should not see stack-offset here!")
1008                     }
1009                     &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
1010                 }
1011             }
1012 
1013             &Inst::StoreP64 {
1014                 rt,
1015                 rt2,
1016                 ref mem,
1017                 flags,
1018             } => {
1019                 let srcloc = state.cur_srcloc();
1020                 if srcloc != SourceLoc::default() && !flags.notrap() {
1021                     // Register the offset at which the actual store instruction starts.
1022                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1023                 }
1024                 match mem {
1025                     &PairAMode::SignedOffset(reg, simm7) => {
1026                         assert_eq!(simm7.scale_ty, I64);
1027                         sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1028                     }
1029                     &PairAMode::PreIndexed(reg, simm7) => {
1030                         assert_eq!(simm7.scale_ty, I64);
1031                         sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
1032                     }
1033                     &PairAMode::PostIndexed(reg, simm7) => {
1034                         assert_eq!(simm7.scale_ty, I64);
1035                         sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
1036                     }
1037                 }
1038             }
1039             &Inst::LoadP64 {
1040                 rt,
1041                 rt2,
1042                 ref mem,
1043                 flags,
1044             } => {
1045                 let srcloc = state.cur_srcloc();
1046                 if srcloc != SourceLoc::default() && !flags.notrap() {
1047                     // Register the offset at which the actual load instruction starts.
1048                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1049                 }
1050 
1051                 let rt = rt.to_reg();
1052                 let rt2 = rt2.to_reg();
1053                 match mem {
1054                     &PairAMode::SignedOffset(reg, simm7) => {
1055                         assert_eq!(simm7.scale_ty, I64);
1056                         sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1057                     }
1058                     &PairAMode::PreIndexed(reg, simm7) => {
1059                         assert_eq!(simm7.scale_ty, I64);
1060                         sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
1061                     }
1062                     &PairAMode::PostIndexed(reg, simm7) => {
1063                         assert_eq!(simm7.scale_ty, I64);
1064                         sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
1065                     }
1066                 }
1067             }
1068             &Inst::FpuLoadP64 {
1069                 rt,
1070                 rt2,
1071                 ref mem,
1072                 flags,
1073             }
1074             | &Inst::FpuLoadP128 {
1075                 rt,
1076                 rt2,
1077                 ref mem,
1078                 flags,
1079             } => {
1080                 let srcloc = state.cur_srcloc();
1081 
1082                 if srcloc != SourceLoc::default() && !flags.notrap() {
1083                     // Register the offset at which the actual load instruction starts.
1084                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1085                 }
1086 
1087                 let opc = match self {
1088                     &Inst::FpuLoadP64 { .. } => 0b01,
1089                     &Inst::FpuLoadP128 { .. } => 0b10,
1090                     _ => unreachable!(),
1091                 };
1092                 let rt = rt.to_reg();
1093                 let rt2 = rt2.to_reg();
1094 
1095                 match mem {
1096                     &PairAMode::SignedOffset(reg, simm7) => {
1097                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1098                         sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1099                     }
1100                     &PairAMode::PreIndexed(reg, simm7) => {
1101                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1102                         sink.put4(enc_ldst_vec_pair(
1103                             opc,
1104                             0b11,
1105                             true,
1106                             simm7,
1107                             reg.to_reg(),
1108                             rt,
1109                             rt2,
1110                         ));
1111                     }
1112                     &PairAMode::PostIndexed(reg, simm7) => {
1113                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1114                         sink.put4(enc_ldst_vec_pair(
1115                             opc,
1116                             0b01,
1117                             true,
1118                             simm7,
1119                             reg.to_reg(),
1120                             rt,
1121                             rt2,
1122                         ));
1123                     }
1124                 }
1125             }
1126             &Inst::FpuStoreP64 {
1127                 rt,
1128                 rt2,
1129                 ref mem,
1130                 flags,
1131             }
1132             | &Inst::FpuStoreP128 {
1133                 rt,
1134                 rt2,
1135                 ref mem,
1136                 flags,
1137             } => {
1138                 let srcloc = state.cur_srcloc();
1139 
1140                 if srcloc != SourceLoc::default() && !flags.notrap() {
1141                     // Register the offset at which the actual store instruction starts.
1142                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1143                 }
1144 
1145                 let opc = match self {
1146                     &Inst::FpuStoreP64 { .. } => 0b01,
1147                     &Inst::FpuStoreP128 { .. } => 0b10,
1148                     _ => unreachable!(),
1149                 };
1150 
1151                 match mem {
1152                     &PairAMode::SignedOffset(reg, simm7) => {
1153                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1154                         sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1155                     }
1156                     &PairAMode::PreIndexed(reg, simm7) => {
1157                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1158                         sink.put4(enc_ldst_vec_pair(
1159                             opc,
1160                             0b11,
1161                             false,
1162                             simm7,
1163                             reg.to_reg(),
1164                             rt,
1165                             rt2,
1166                         ));
1167                     }
1168                     &PairAMode::PostIndexed(reg, simm7) => {
1169                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1170                         sink.put4(enc_ldst_vec_pair(
1171                             opc,
1172                             0b01,
1173                             false,
1174                             simm7,
1175                             reg.to_reg(),
1176                             rt,
1177                             rt2,
1178                         ));
1179                     }
1180                 }
1181             }
1182             &Inst::Mov64 { rd, rm } => {
1183                 assert!(rd.to_reg().get_class() == rm.get_class());
1184                 assert!(rm.get_class() == RegClass::I64);
1185 
1186                 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1187                 // should never MOV to XZR.
1188                 assert!(rd.to_reg() != stack_reg());
1189 
1190                 if rm == stack_reg() {
1191                     // We can't use ORR here, so use an `add rd, sp, #0` instead.
1192                     let imm12 = Imm12::maybe_from_u64(0).unwrap();
1193                     sink.put4(enc_arith_rr_imm12(
1194                         0b100_10001,
1195                         imm12.shift_bits(),
1196                         imm12.imm_bits(),
1197                         rm,
1198                         rd,
1199                     ));
1200                 } else {
1201                     // Encoded as ORR rd, rm, zero.
1202                     sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1203                 }
1204             }
1205             &Inst::Mov32 { rd, rm } => {
1206                 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1207                 // should never MOV to XZR.
1208                 assert!(machreg_to_gpr(rd.to_reg()) != 31);
1209                 // Encoded as ORR rd, rm, zero.
1210                 sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1211             }
1212             &Inst::MovZ { rd, imm, size } => {
1213                 sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
1214             }
1215             &Inst::MovN { rd, imm, size } => {
1216                 sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
1217             }
1218             &Inst::MovK { rd, imm, size } => {
1219                 sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
1220             }
1221             &Inst::CSel { rd, rn, rm, cond } => {
1222                 sink.put4(enc_csel(rd, rn, rm, cond));
1223             }
1224             &Inst::CSet { rd, cond } => {
1225                 sink.put4(enc_cset(rd, cond));
1226             }
1227             &Inst::CSetm { rd, cond } => {
1228                 sink.put4(enc_csetm(rd, cond));
1229             }
1230             &Inst::CCmpImm {
1231                 size,
1232                 rn,
1233                 imm,
1234                 nzcv,
1235                 cond,
1236             } => {
1237                 sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1238             }
1239             &Inst::AtomicRMW { ty, op } => {
1240                 /* Emit this:
1241                       dmb         ish
1242                      again:
1243                       ldxr{,b,h}  x/w27, [x25]
1244                       op          x28, x27, x26 // op is add,sub,and,orr,eor
1245                       stxr{,b,h}  w24, x/w28, [x25]
1246                       cbnz        x24, again
1247                       dmb         ish
1248 
1249                    Operand conventions:
1250                       IN:  x25 (addr), x26 (2nd arg for op)
1251                       OUT: x27 (old value), x24 (trashed), x28 (trashed)
1252 
1253                    It is unfortunate that, per the ARM documentation, x28 cannot be used for
1254                    both the store-data and success-flag operands of stxr.  This causes the
1255                    instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1256                    instead for the success-flag.
1257 
1258                    In the case where the operation is 'xchg', the second insn is instead
1259                      mov          x28, x26
1260                    so that we simply write in the destination, the "2nd arg for op".
1261                 */
1262                 let xzr = zero_reg();
1263                 let x24 = xreg(24);
1264                 let x25 = xreg(25);
1265                 let x26 = xreg(26);
1266                 let x27 = xreg(27);
1267                 let x28 = xreg(28);
1268                 let x24wr = writable_xreg(24);
1269                 let x27wr = writable_xreg(27);
1270                 let x28wr = writable_xreg(28);
1271                 let again_label = sink.get_label();
1272 
1273                 sink.put4(enc_dmb_ish()); // dmb ish
1274 
1275                 // again:
1276                 sink.bind_label(again_label);
1277                 let srcloc = state.cur_srcloc();
1278                 if srcloc != SourceLoc::default() {
1279                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1280                 }
1281                 sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
1282 
1283                 if op == inst_common::AtomicRmwOp::Xchg {
1284                     // mov x28, x26
1285                     sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26))
1286                 } else {
1287                     // add/sub/and/orr/eor x28, x27, x26
1288                     let bits_31_21 = match op {
1289                         inst_common::AtomicRmwOp::Add => 0b100_01011_00_0,
1290                         inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0,
1291                         inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
1292                         inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
1293                         inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
1294                         inst_common::AtomicRmwOp::Nand
1295                         | inst_common::AtomicRmwOp::Umin
1296                         | inst_common::AtomicRmwOp::Umax
1297                         | inst_common::AtomicRmwOp::Smin
1298                         | inst_common::AtomicRmwOp::Smax => todo!("{:?}", op),
1299                         inst_common::AtomicRmwOp::Xchg => unreachable!(),
1300                     };
1301                     sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
1302                 }
1303 
1304                 let srcloc = state.cur_srcloc();
1305                 if srcloc != SourceLoc::default() {
1306                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1307                 }
1308                 sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
1309 
1310                 // cbnz w24, again
1311                 // Note, we're actually testing x24, and relying on the default zero-high-half
1312                 // rule in the assignment that `stxr` does.
1313                 let br_offset = sink.cur_offset();
1314                 sink.put4(enc_conditional_br(
1315                     BranchTarget::Label(again_label),
1316                     CondBrKind::NotZero(x24),
1317                 ));
1318                 sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1319 
1320                 sink.put4(enc_dmb_ish()); // dmb ish
1321             }
1322             &Inst::AtomicCAS { rs, rt, rn, ty } => {
1323                 let size = match ty {
1324                     I8 => 0b00,
1325                     I16 => 0b01,
1326                     I32 => 0b10,
1327                     I64 => 0b11,
1328                     _ => panic!("Unsupported type: {}", ty),
1329                 };
1330 
1331                 sink.put4(enc_cas(size, rs, rt, rn));
1332             }
1333             &Inst::AtomicCASLoop { ty } => {
1334                 /* Emit this:
1335                      dmb         ish
1336                     again:
1337                      ldxr{,b,h}  x/w27, [x25]
1338                      and         x24, x26, MASK (= 2^size_bits - 1)
1339                      cmp         x27, x24
1340                      b.ne        out
1341                      stxr{,b,h}  w24, x/w28, [x25]
1342                      cbnz        x24, again
1343                     out:
1344                      dmb         ish
1345 
1346                   Operand conventions:
1347                      IN:  x25 (addr), x26 (expected value), x28 (replacement value)
1348                      OUT: x27 (old value), x24 (trashed)
1349                 */
1350                 let xzr = zero_reg();
1351                 let x24 = xreg(24);
1352                 let x25 = xreg(25);
1353                 let x26 = xreg(26);
1354                 let x27 = xreg(27);
1355                 let x28 = xreg(28);
1356                 let xzrwr = writable_zero_reg();
1357                 let x24wr = writable_xreg(24);
1358                 let x27wr = writable_xreg(27);
1359                 let again_label = sink.get_label();
1360                 let out_label = sink.get_label();
1361 
1362                 sink.put4(enc_dmb_ish()); // dmb ish
1363 
1364                 // again:
1365                 sink.bind_label(again_label);
1366                 let srcloc = state.cur_srcloc();
1367                 if srcloc != SourceLoc::default() {
1368                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1369                 }
1370                 sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
1371 
1372                 if ty == I64 {
1373                     // mov x24, x26
1374                     sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26))
1375                 } else {
1376                     // and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF
1377                     let (mask, s) = match ty {
1378                         I8 => (0xFF, 7),
1379                         I16 => (0xFFFF, 15),
1380                         I32 => (0xFFFFFFFF, 31),
1381                         _ => unreachable!(),
1382                     };
1383                     sink.put4(enc_arith_rr_imml(
1384                         0b100_100100,
1385                         ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(),
1386                         x26,
1387                         x24wr,
1388                     ))
1389                 }
1390 
1391                 // cmp x27, x24 (== subs xzr, x27, x24)
1392                 sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24));
1393 
1394                 // b.ne out
1395                 let br_out_offset = sink.cur_offset();
1396                 sink.put4(enc_conditional_br(
1397                     BranchTarget::Label(out_label),
1398                     CondBrKind::Cond(Cond::Ne),
1399                 ));
1400                 sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1401 
1402                 let srcloc = state.cur_srcloc();
1403                 if srcloc != SourceLoc::default() {
1404                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1405                 }
1406                 sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
1407 
1408                 // cbnz w24, again.
1409                 // Note, we're actually testing x24, and relying on the default zero-high-half
1410                 // rule in the assignment that `stxr` does.
1411                 let br_again_offset = sink.cur_offset();
1412                 sink.put4(enc_conditional_br(
1413                     BranchTarget::Label(again_label),
1414                     CondBrKind::NotZero(x24),
1415                 ));
1416                 sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1417 
1418                 // out:
1419                 sink.bind_label(out_label);
1420                 sink.put4(enc_dmb_ish()); // dmb ish
1421             }
1422             &Inst::AtomicLoad { ty, r_data, r_addr } => {
1423                 let op = match ty {
1424                     I8 => 0b0011100001,
1425                     I16 => 0b0111100001,
1426                     I32 => 0b1011100001,
1427                     I64 => 0b1111100001,
1428                     _ => unreachable!(),
1429                 };
1430                 sink.put4(enc_dmb_ish()); // dmb ish
1431 
1432                 let srcloc = state.cur_srcloc();
1433                 if srcloc != SourceLoc::default() {
1434                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1435                 }
1436                 let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
1437                 sink.put4(enc_ldst_uimm12(
1438                     op,
1439                     uimm12scaled_zero,
1440                     r_addr,
1441                     r_data.to_reg(),
1442                 ));
1443             }
1444             &Inst::AtomicStore { ty, r_data, r_addr } => {
1445                 let op = match ty {
1446                     I8 => 0b0011100000,
1447                     I16 => 0b0111100000,
1448                     I32 => 0b1011100000,
1449                     I64 => 0b1111100000,
1450                     _ => unreachable!(),
1451                 };
1452 
1453                 let srcloc = state.cur_srcloc();
1454                 if srcloc != SourceLoc::default() {
1455                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1456                 }
1457                 let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
1458                 sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data));
1459                 sink.put4(enc_dmb_ish()); // dmb ish
1460             }
1461             &Inst::Fence {} => {
1462                 sink.put4(enc_dmb_ish()); // dmb ish
1463             }
1464             &Inst::FpuMove64 { rd, rn } => {
1465                 sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1466             }
1467             &Inst::FpuMove128 { rd, rn } => {
1468                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1469             }
1470             &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1471                 let (imm5, shift, mask) = match size.lane_size() {
1472                     ScalarSize::Size32 => (0b00100, 3, 0b011),
1473                     ScalarSize::Size64 => (0b01000, 4, 0b001),
1474                     _ => unimplemented!(),
1475                 };
1476                 debug_assert_eq!(idx & mask, idx);
1477                 let imm5 = imm5 | ((idx as u32) << shift);
1478                 sink.put4(
1479                     0b010_11110000_00000_000001_00000_00000
1480                         | (imm5 << 16)
1481                         | (machreg_to_vec(rn) << 5)
1482                         | machreg_to_vec(rd.to_reg()),
1483                 );
1484             }
1485             &Inst::FpuExtend { rd, rn, size } => {
1486                 sink.put4(enc_fpurr(
1487                     0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
1488                     rd,
1489                     rn,
1490                 ));
1491             }
1492             &Inst::FpuRR { fpu_op, rd, rn } => {
1493                 let top22 = match fpu_op {
1494                     FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
1495                     FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
1496                     FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
1497                     FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
1498                     FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
1499                     FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
1500                     FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
1501                     FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
1502                 };
1503                 sink.put4(enc_fpurr(top22, rd, rn));
1504             }
1505             &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
1506                 let top22 = match fpu_op {
1507                     FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
1508                     FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
1509                     FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
1510                     FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
1511                     FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
1512                     FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
1513                     FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
1514                     FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
1515                     FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
1516                     FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
1517                     FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
1518                     FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
1519                     FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
1520                     FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
1521                     FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
1522                     FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
1523                 };
1524                 sink.put4(enc_fpurrr(top22, rd, rn, rm));
1525             }
1526             &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1527                 FPUOpRI::UShr32(imm) => {
1528                     debug_assert_eq!(32, imm.lane_size_in_bits);
1529                     sink.put4(
1530                         0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1531                             | imm.enc() << 16
1532                             | machreg_to_vec(rn) << 5
1533                             | machreg_to_vec(rd.to_reg()),
1534                     )
1535                 }
1536                 FPUOpRI::UShr64(imm) => {
1537                     debug_assert_eq!(64, imm.lane_size_in_bits);
1538                     sink.put4(
1539                         0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1540                             | imm.enc() << 16
1541                             | machreg_to_vec(rn) << 5
1542                             | machreg_to_vec(rd.to_reg()),
1543                     )
1544                 }
1545                 FPUOpRI::Sli64(imm) => {
1546                     debug_assert_eq!(64, imm.lane_size_in_bits);
1547                     sink.put4(
1548                         0b01_1_111110_0000000_010101_00000_00000
1549                             | imm.enc() << 16
1550                             | machreg_to_vec(rn) << 5
1551                             | machreg_to_vec(rd.to_reg()),
1552                     )
1553                 }
1554                 FPUOpRI::Sli32(imm) => {
1555                     debug_assert_eq!(32, imm.lane_size_in_bits);
1556                     sink.put4(
1557                         0b0_0_1_011110_0000000_010101_00000_00000
1558                             | imm.enc() << 16
1559                             | machreg_to_vec(rn) << 5
1560                             | machreg_to_vec(rd.to_reg()),
1561                     )
1562                 }
1563             },
1564             &Inst::FpuRRRR {
1565                 fpu_op,
1566                 rd,
1567                 rn,
1568                 rm,
1569                 ra,
1570             } => {
1571                 let top17 = match fpu_op {
1572                     FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
1573                     FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
1574                 };
1575                 sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1576             }
1577             &Inst::VecMisc { op, rd, rn, size } => {
1578                 let (q, enc_size) = size.enc_size();
1579                 let (u, bits_12_16, size) = match op {
1580                     VecMisc2::Not => (0b1, 0b00101, 0b00),
1581                     VecMisc2::Neg => (0b1, 0b01011, enc_size),
1582                     VecMisc2::Abs => (0b0, 0b01011, enc_size),
1583                     VecMisc2::Fabs => {
1584                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1585                         (0b0, 0b01111, enc_size)
1586                     }
1587                     VecMisc2::Fneg => {
1588                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1589                         (0b1, 0b01111, enc_size)
1590                     }
1591                     VecMisc2::Fsqrt => {
1592                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1593                         (0b1, 0b11111, enc_size)
1594                     }
1595                     VecMisc2::Rev64 => {
1596                         debug_assert_ne!(VectorSize::Size64x2, size);
1597                         (0b0, 0b00000, enc_size)
1598                     }
1599                     VecMisc2::Shll => {
1600                         debug_assert_ne!(VectorSize::Size64x2, size);
1601                         debug_assert!(!size.is_128bits());
1602                         (0b1, 0b10011, enc_size)
1603                     }
1604                     VecMisc2::Fcvtzs => {
1605                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1606                         (0b0, 0b11011, enc_size)
1607                     }
1608                     VecMisc2::Fcvtzu => {
1609                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1610                         (0b1, 0b11011, enc_size)
1611                     }
1612                     VecMisc2::Scvtf => {
1613                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1614                         (0b0, 0b11101, enc_size & 0b1)
1615                     }
1616                     VecMisc2::Ucvtf => {
1617                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1618                         (0b1, 0b11101, enc_size & 0b1)
1619                     }
1620                     VecMisc2::Frintn => {
1621                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1622                         (0b0, 0b11000, enc_size & 0b01)
1623                     }
1624                     VecMisc2::Frintz => {
1625                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1626                         (0b0, 0b11001, enc_size | 0b10)
1627                     }
1628                     VecMisc2::Frintm => {
1629                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1630                         (0b0, 0b11001, enc_size & 0b01)
1631                     }
1632                     VecMisc2::Frintp => {
1633                         debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1634                         (0b0, 0b11000, enc_size | 0b10)
1635                     }
1636                     VecMisc2::Cnt => {
1637                         debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1638                         (0b0, 0b00101, enc_size)
1639                     }
1640                     VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
1641                 };
1642                 sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
1643             }
1644             &Inst::VecLanes { op, rd, rn, size } => {
1645                 let (q, size) = match size {
1646                     VectorSize::Size8x8 => (0b0, 0b00),
1647                     VectorSize::Size8x16 => (0b1, 0b00),
1648                     VectorSize::Size16x4 => (0b0, 0b01),
1649                     VectorSize::Size16x8 => (0b1, 0b01),
1650                     VectorSize::Size32x4 => (0b1, 0b10),
1651                     _ => unreachable!(),
1652                 };
1653                 let (u, opcode) = match op {
1654                     VecLanesOp::Uminv => (0b1, 0b11010),
1655                     VecLanesOp::Addv => (0b0, 0b11011),
1656                 };
1657                 sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
1658             }
1659             &Inst::VecShiftImm {
1660                 op,
1661                 rd,
1662                 rn,
1663                 size,
1664                 imm,
1665             } => {
1666                 let (is_shr, template) = match op {
1667                     VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
1668                     VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
1669                     VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
1670                 };
1671                 let imm = imm as u32;
1672                 // Deal with the somewhat strange encoding scheme for, and limits on,
1673                 // the shift amount.
1674                 let immh_immb = match (size, is_shr) {
1675                     (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
1676                         0b_1000_000_u32 | (64 - imm)
1677                     }
1678                     (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
1679                         0b_0100_000_u32 | (32 - imm)
1680                     }
1681                     (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
1682                         0b_0010_000_u32 | (16 - imm)
1683                     }
1684                     (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
1685                         0b_0001_000_u32 | (8 - imm)
1686                     }
1687                     (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
1688                     (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
1689                     (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
1690                     (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
1691                     _ => panic!(
1692                         "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
1693                         op, size, imm
1694                     ),
1695                 };
1696                 let rn_enc = machreg_to_vec(rn);
1697                 let rd_enc = machreg_to_vec(rd.to_reg());
1698                 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
1699             }
1700             &Inst::VecExtract { rd, rn, rm, imm4 } => {
1701                 if imm4 < 16 {
1702                     let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
1703                     let rm_enc = machreg_to_vec(rm);
1704                     let rn_enc = machreg_to_vec(rn);
1705                     let rd_enc = machreg_to_vec(rd.to_reg());
1706                     sink.put4(
1707                         template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
1708                     );
1709                 } else {
1710                     panic!(
1711                         "aarch64: Inst::VecExtract: emit: invalid extract index {}",
1712                         imm4
1713                     );
1714                 }
1715             }
1716             &Inst::VecTbl {
1717                 rd,
1718                 rn,
1719                 rm,
1720                 is_extension,
1721             } => {
1722                 sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
1723             }
1724             &Inst::VecTbl2 {
1725                 rd,
1726                 rn,
1727                 rn2,
1728                 rm,
1729                 is_extension,
1730             } => {
1731                 assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
1732                 sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
1733             }
1734             &Inst::FpuCmp32 { rn, rm } => {
1735                 sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
1736             }
1737             &Inst::FpuCmp64 { rn, rm } => {
1738                 sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
1739             }
1740             &Inst::FpuToInt { op, rd, rn } => {
1741                 let top16 = match op {
1742                     // FCVTZS (32/32-bit)
1743                     FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
1744                     // FCVTZU (32/32-bit)
1745                     FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
1746                     // FCVTZS (32/64-bit)
1747                     FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
1748                     // FCVTZU (32/64-bit)
1749                     FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
1750                     // FCVTZS (64/32-bit)
1751                     FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
1752                     // FCVTZU (64/32-bit)
1753                     FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
1754                     // FCVTZS (64/64-bit)
1755                     FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
1756                     // FCVTZU (64/64-bit)
1757                     FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
1758                 };
1759                 sink.put4(enc_fputoint(top16, rd, rn));
1760             }
1761             &Inst::IntToFpu { op, rd, rn } => {
1762                 let top16 = match op {
1763                     // SCVTF (32/32-bit)
1764                     IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
1765                     // UCVTF (32/32-bit)
1766                     IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
1767                     // SCVTF (64/32-bit)
1768                     IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
1769                     // UCVTF (64/32-bit)
1770                     IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
1771                     // SCVTF (32/64-bit)
1772                     IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
1773                     // UCVTF (32/64-bit)
1774                     IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
1775                     // SCVTF (64/64-bit)
1776                     IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
1777                     // UCVTF (64/64-bit)
1778                     IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
1779                 };
1780                 sink.put4(enc_inttofpu(top16, rd, rn));
1781             }
1782             &Inst::LoadFpuConst64 { rd, const_data } => {
1783                 let inst = Inst::FpuLoad64 {
1784                     rd,
1785                     mem: AMode::Label(MemLabel::PCRel(8)),
1786                     flags: MemFlags::trusted(),
1787                 };
1788                 inst.emit(sink, emit_info, state);
1789                 let inst = Inst::Jump {
1790                     dest: BranchTarget::ResolvedOffset(12),
1791                 };
1792                 inst.emit(sink, emit_info, state);
1793                 sink.put8(const_data);
1794             }
1795             &Inst::LoadFpuConst128 { rd, const_data } => {
1796                 let inst = Inst::FpuLoad128 {
1797                     rd,
1798                     mem: AMode::Label(MemLabel::PCRel(8)),
1799                     flags: MemFlags::trusted(),
1800                 };
1801                 inst.emit(sink, emit_info, state);
1802                 let inst = Inst::Jump {
1803                     dest: BranchTarget::ResolvedOffset(20),
1804                 };
1805                 inst.emit(sink, emit_info, state);
1806 
1807                 for i in const_data.to_le_bytes().iter() {
1808                     sink.put1(*i);
1809                 }
1810             }
1811             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1812                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
1813             }
1814             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1815                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
1816             }
1817             &Inst::FpuRound { op, rd, rn } => {
1818                 let top22 = match op {
1819                     FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
1820                     FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
1821                     FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
1822                     FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
1823                     FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
1824                     FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
1825                     FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
1826                     FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
1827                 };
1828                 sink.put4(enc_fround(top22, rd, rn));
1829             }
1830             &Inst::MovToFpu { rd, rn, size } => {
1831                 let template = match size {
1832                     ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
1833                     ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
1834                     _ => unreachable!(),
1835                 };
1836                 sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
1837             }
1838             &Inst::MovToVec { rd, rn, idx, size } => {
1839                 let (imm5, shift) = match size.lane_size() {
1840                     ScalarSize::Size8 => (0b00001, 1),
1841                     ScalarSize::Size16 => (0b00010, 2),
1842                     ScalarSize::Size32 => (0b00100, 3),
1843                     ScalarSize::Size64 => (0b01000, 4),
1844                     _ => unreachable!(),
1845                 };
1846                 debug_assert_eq!(idx & (0b11111 >> shift), idx);
1847                 let imm5 = imm5 | ((idx as u32) << shift);
1848                 sink.put4(
1849                     0b010_01110000_00000_0_0011_1_00000_00000
1850                         | (imm5 << 16)
1851                         | (machreg_to_gpr(rn) << 5)
1852                         | machreg_to_vec(rd.to_reg()),
1853                 );
1854             }
1855             &Inst::MovFromVec { rd, rn, idx, size } => {
1856                 let (q, imm5, shift, mask) = match size {
1857                     VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
1858                     VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
1859                     VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
1860                     VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
1861                     _ => unreachable!(),
1862                 };
1863                 debug_assert_eq!(idx & mask, idx);
1864                 let imm5 = imm5 | ((idx as u32) << shift);
1865                 sink.put4(
1866                     0b000_01110000_00000_0_0111_1_00000_00000
1867                         | (q << 30)
1868                         | (imm5 << 16)
1869                         | (machreg_to_vec(rn) << 5)
1870                         | machreg_to_gpr(rd.to_reg()),
1871                 );
1872             }
1873             &Inst::MovFromVecSigned {
1874                 rd,
1875                 rn,
1876                 idx,
1877                 size,
1878                 scalar_size,
1879             } => {
1880                 let (imm5, shift, half) = match size {
1881                     VectorSize::Size8x8 => (0b00001, 1, true),
1882                     VectorSize::Size8x16 => (0b00001, 1, false),
1883                     VectorSize::Size16x4 => (0b00010, 2, true),
1884                     VectorSize::Size16x8 => (0b00010, 2, false),
1885                     VectorSize::Size32x2 => {
1886                         debug_assert_ne!(scalar_size, OperandSize::Size32);
1887                         (0b00100, 3, true)
1888                     }
1889                     VectorSize::Size32x4 => {
1890                         debug_assert_ne!(scalar_size, OperandSize::Size32);
1891                         (0b00100, 3, false)
1892                     }
1893                     _ => panic!("Unexpected vector operand size"),
1894                 };
1895                 debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
1896                 let imm5 = imm5 | ((idx as u32) << shift);
1897                 sink.put4(
1898                     0b000_01110000_00000_0_0101_1_00000_00000
1899                         | (scalar_size.is64() as u32) << 30
1900                         | (imm5 << 16)
1901                         | (machreg_to_vec(rn) << 5)
1902                         | machreg_to_gpr(rd.to_reg()),
1903                 );
1904             }
1905             &Inst::VecDup { rd, rn, size } => {
1906                 let imm5 = match size {
1907                     VectorSize::Size8x16 => 0b00001,
1908                     VectorSize::Size16x8 => 0b00010,
1909                     VectorSize::Size32x4 => 0b00100,
1910                     VectorSize::Size64x2 => 0b01000,
1911                     _ => unimplemented!(),
1912                 };
1913                 sink.put4(
1914                     0b010_01110000_00000_000011_00000_00000
1915                         | (imm5 << 16)
1916                         | (machreg_to_gpr(rn) << 5)
1917                         | machreg_to_vec(rd.to_reg()),
1918                 );
1919             }
1920             &Inst::VecDupFromFpu { rd, rn, size } => {
1921                 let imm5 = match size {
1922                     VectorSize::Size32x4 => 0b00100,
1923                     VectorSize::Size64x2 => 0b01000,
1924                     _ => unimplemented!(),
1925                 };
1926                 sink.put4(
1927                     0b010_01110000_00000_000001_00000_00000
1928                         | (imm5 << 16)
1929                         | (machreg_to_vec(rn) << 5)
1930                         | machreg_to_vec(rd.to_reg()),
1931                 );
1932             }
1933             &Inst::VecDupFPImm { rd, imm, size } => {
1934                 let imm = imm.enc_bits();
1935                 let op = match size.lane_size() {
1936                     ScalarSize::Size32 => 0,
1937                     ScalarSize::Size64 => 1,
1938                     _ => unimplemented!(),
1939                 };
1940                 let q_op = op | ((size.is_128bits() as u32) << 1);
1941 
1942                 sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
1943             }
1944             &Inst::VecDupImm {
1945                 rd,
1946                 imm,
1947                 invert,
1948                 size,
1949             } => {
1950                 let (imm, shift, shift_ones) = imm.value();
1951                 let (op, cmode) = match size.lane_size() {
1952                     ScalarSize::Size8 => {
1953                         assert!(!invert);
1954                         assert_eq!(shift, 0);
1955 
1956                         (0, 0b1110)
1957                     }
1958                     ScalarSize::Size16 => {
1959                         let s = shift & 8;
1960 
1961                         assert!(!shift_ones);
1962                         assert_eq!(s, shift);
1963 
1964                         (invert as u32, 0b1000 | (s >> 2))
1965                     }
1966                     ScalarSize::Size32 => {
1967                         if shift_ones {
1968                             assert!(shift == 8 || shift == 16);
1969 
1970                             (invert as u32, 0b1100 | (shift >> 4))
1971                         } else {
1972                             let s = shift & 24;
1973 
1974                             assert_eq!(s, shift);
1975 
1976                             (invert as u32, 0b0000 | (s >> 2))
1977                         }
1978                     }
1979                     ScalarSize::Size64 => {
1980                         assert!(!invert);
1981                         assert_eq!(shift, 0);
1982 
1983                         (1, 0b1110)
1984                     }
1985                     _ => unreachable!(),
1986                 };
1987                 let q_op = op | ((size.is_128bits() as u32) << 1);
1988 
1989                 sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
1990             }
1991             &Inst::VecExtend {
1992                 t,
1993                 rd,
1994                 rn,
1995                 high_half,
1996             } => {
1997                 let (u, immh) = match t {
1998                     VecExtendOp::Sxtl8 => (0b0, 0b001),
1999                     VecExtendOp::Sxtl16 => (0b0, 0b010),
2000                     VecExtendOp::Sxtl32 => (0b0, 0b100),
2001                     VecExtendOp::Uxtl8 => (0b1, 0b001),
2002                     VecExtendOp::Uxtl16 => (0b1, 0b010),
2003                     VecExtendOp::Uxtl32 => (0b1, 0b100),
2004                 };
2005                 sink.put4(
2006                     0b000_011110_0000_000_101001_00000_00000
2007                         | ((high_half as u32) << 30)
2008                         | (u << 29)
2009                         | (immh << 19)
2010                         | (machreg_to_vec(rn) << 5)
2011                         | machreg_to_vec(rd.to_reg()),
2012                 );
2013             }
2014             &Inst::VecMiscNarrow {
2015                 op,
2016                 rd,
2017                 rn,
2018                 size,
2019                 high_half,
2020             } => {
2021                 let size = match size.lane_size() {
2022                     ScalarSize::Size8 => 0b00,
2023                     ScalarSize::Size16 => 0b01,
2024                     ScalarSize::Size32 => 0b10,
2025                     _ => panic!("Unexpected vector operand lane size!"),
2026                 };
2027                 let (u, bits_12_16) = match op {
2028                     VecMiscNarrowOp::Xtn => (0b0, 0b10010),
2029                     VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
2030                     VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
2031                 };
2032                 sink.put4(enc_vec_rr_misc(
2033                     ((high_half as u32) << 1) | u,
2034                     size,
2035                     bits_12_16,
2036                     rd,
2037                     rn,
2038                 ));
2039             }
2040             &Inst::VecMovElement {
2041                 rd,
2042                 rn,
2043                 dest_idx,
2044                 src_idx,
2045                 size,
2046             } => {
2047                 let (imm5, shift) = match size.lane_size() {
2048                     ScalarSize::Size8 => (0b00001, 1),
2049                     ScalarSize::Size16 => (0b00010, 2),
2050                     ScalarSize::Size32 => (0b00100, 3),
2051                     ScalarSize::Size64 => (0b01000, 4),
2052                     _ => unreachable!(),
2053                 };
2054                 let mask = 0b11111 >> shift;
2055                 debug_assert_eq!(dest_idx & mask, dest_idx);
2056                 debug_assert_eq!(src_idx & mask, src_idx);
2057                 let imm4 = (src_idx as u32) << (shift - 1);
2058                 let imm5 = imm5 | ((dest_idx as u32) << shift);
2059                 sink.put4(
2060                     0b011_01110000_00000_0_0000_1_00000_00000
2061                         | (imm5 << 16)
2062                         | (imm4 << 11)
2063                         | (machreg_to_vec(rn) << 5)
2064                         | machreg_to_vec(rd.to_reg()),
2065                 );
2066             }
2067             &Inst::VecRRPair { op, rd, rn } => {
2068                 let bits_12_16 = match op {
2069                     VecPairOp::Addp => 0b11011,
2070                 };
2071 
2072                 sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2073             }
2074             &Inst::VecRRR {
2075                 rd,
2076                 rn,
2077                 rm,
2078                 alu_op,
2079                 size,
2080             } => {
2081                 let (q, enc_size) = size.enc_size();
2082                 let is_float = match alu_op {
2083                     VecALUOp::Fcmeq
2084                     | VecALUOp::Fcmgt
2085                     | VecALUOp::Fcmge
2086                     | VecALUOp::Fadd
2087                     | VecALUOp::Fsub
2088                     | VecALUOp::Fdiv
2089                     | VecALUOp::Fmax
2090                     | VecALUOp::Fmin
2091                     | VecALUOp::Fmul => true,
2092                     _ => false,
2093                 };
2094                 let enc_float_size = match (is_float, size) {
2095                     (true, VectorSize::Size32x2) => 0b0,
2096                     (true, VectorSize::Size32x4) => 0b0,
2097                     (true, VectorSize::Size64x2) => 0b1,
2098                     (true, _) => unimplemented!(),
2099                     _ => 0,
2100                 };
2101 
2102                 let (top11, bit15_10) = match alu_op {
2103                     VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2104                     VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2105                     VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2106                     VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2107                     VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2108                     VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2109                     VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2110                     VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2111                     VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2112                     VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2113                     VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2114                     VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2115                     // The following logical instructions operate on bytes, so are not encoded differently
2116                     // for the different vector types.
2117                     VecALUOp::And => (0b000_01110_00_1, 0b000111),
2118                     VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2119                     VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2120                     VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2121                     VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
2122                     VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
2123                     VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2124                     VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2125                     VecALUOp::Mul => {
2126                         debug_assert_ne!(size, VectorSize::Size64x2);
2127                         (0b000_01110_00_1 | enc_size << 1, 0b100111)
2128                     }
2129                     VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2130                     VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2131                     VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
2132                     VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
2133                     VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
2134                     VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
2135                     VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
2136                     VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2137                     VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2138                     VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2139                     VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2140                     VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2141                     VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2142                     VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2143                     VecALUOp::Umlal => {
2144                         debug_assert!(!size.is_128bits());
2145                         (0b001_01110_00_1 | enc_size << 1, 0b100000)
2146                     }
2147                     VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2148                     VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
2149                     VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
2150                 };
2151                 let top11 = match alu_op {
2152                     VecALUOp::Smull | VecALUOp::Smull2 => top11,
2153                     _ if is_float => top11 | (q << 9) | enc_float_size << 1,
2154                     _ => top11 | (q << 9),
2155                 };
2156                 sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2157             }
2158             &Inst::VecLoadReplicate { rd, rn, size } => {
2159                 let (q, size) = size.enc_size();
2160 
2161                 let srcloc = state.cur_srcloc();
2162                 if srcloc != SourceLoc::default() {
2163                     // Register the offset at which the actual load instruction starts.
2164                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
2165                 }
2166 
2167                 sink.put4(enc_ldst_vec(q, size, rn, rd));
2168             }
2169             &Inst::VecCSel { rd, rn, rm, cond } => {
2170                 /* Emit this:
2171                       b.cond  else
2172                       mov     rd, rm
2173                       b       out
2174                      else:
2175                       mov     rd, rn
2176                      out:
2177 
2178                    Note, we could do better in the cases where rd == rn or rd == rm.
2179                 */
2180                 let else_label = sink.get_label();
2181                 let out_label = sink.get_label();
2182 
2183                 // b.cond else
2184                 let br_else_offset = sink.cur_offset();
2185                 sink.put4(enc_conditional_br(
2186                     BranchTarget::Label(else_label),
2187                     CondBrKind::Cond(cond),
2188                 ));
2189                 sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2190 
2191                 // mov rd, rm
2192                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2193 
2194                 // b out
2195                 let b_out_offset = sink.cur_offset();
2196                 sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2197                 sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2198                 sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2199 
2200                 // else:
2201                 sink.bind_label(else_label);
2202 
2203                 // mov rd, rn
2204                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2205 
2206                 // out:
2207                 sink.bind_label(out_label);
2208             }
2209             &Inst::MovToNZCV { rn } => {
2210                 sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2211             }
2212             &Inst::MovFromNZCV { rd } => {
2213                 sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2214             }
2215             &Inst::Extend {
2216                 rd,
2217                 rn,
2218                 signed: false,
2219                 from_bits: 1,
2220                 to_bits,
2221             } => {
2222                 assert!(to_bits <= 64);
2223                 // Reduce zero-extend-from-1-bit to:
2224                 // - and rd, rn, #1
2225                 // Note: This is special cased as UBFX may take more cycles
2226                 // than AND on smaller cores.
2227                 let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2228                 Inst::AluRRImmLogic {
2229                     alu_op: ALUOp::And32,
2230                     rd,
2231                     rn,
2232                     imml,
2233                 }
2234                 .emit(sink, emit_info, state);
2235             }
2236             &Inst::Extend {
2237                 rd,
2238                 rn,
2239                 signed: false,
2240                 from_bits: 32,
2241                 to_bits: 64,
2242             } => {
2243                 let mov = Inst::Mov32 { rd, rm: rn };
2244                 mov.emit(sink, emit_info, state);
2245             }
2246             &Inst::Extend {
2247                 rd,
2248                 rn,
2249                 signed,
2250                 from_bits,
2251                 to_bits,
2252             } => {
2253                 let (opc, size) = if signed {
2254                     (0b00, OperandSize::from_bits(to_bits))
2255                 } else {
2256                     (0b10, OperandSize::Size32)
2257                 };
2258                 sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2259             }
2260             &Inst::Jump { ref dest } => {
2261                 let off = sink.cur_offset();
2262                 // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2263                 if let Some(l) = dest.as_label() {
2264                     sink.use_label_at_offset(off, l, LabelUse::Branch26);
2265                     sink.add_uncond_branch(off, off + 4, l);
2266                 }
2267                 // Emit the jump itself.
2268                 sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2269             }
2270             &Inst::Ret => {
2271                 sink.put4(0xd65f03c0);
2272             }
2273             &Inst::EpiloguePlaceholder => {
2274                 // Noop; this is just a placeholder for epilogues.
2275             }
2276             &Inst::Call { ref info } => {
2277                 if let Some(s) = state.take_stack_map() {
2278                     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2279                 }
2280                 let loc = state.cur_srcloc();
2281                 sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0);
2282                 sink.put4(enc_jump26(0b100101, 0));
2283                 if info.opcode.is_call() {
2284                     sink.add_call_site(loc, info.opcode);
2285                 }
2286             }
2287             &Inst::CallInd { ref info } => {
2288                 if let Some(s) = state.take_stack_map() {
2289                     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2290                 }
2291                 sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
2292                 let loc = state.cur_srcloc();
2293                 if info.opcode.is_call() {
2294                     sink.add_call_site(loc, info.opcode);
2295                 }
2296             }
2297             &Inst::CondBr {
2298                 taken,
2299                 not_taken,
2300                 kind,
2301             } => {
2302                 // Conditional part first.
2303                 let cond_off = sink.cur_offset();
2304                 if let Some(l) = taken.as_label() {
2305                     sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
2306                     let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
2307                     sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
2308                 }
2309                 sink.put4(enc_conditional_br(taken, kind));
2310 
2311                 // Unconditional part next.
2312                 let uncond_off = sink.cur_offset();
2313                 if let Some(l) = not_taken.as_label() {
2314                     sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
2315                     sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
2316                 }
2317                 sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
2318             }
2319             &Inst::TrapIf { kind, trap_code } => {
2320                 // condbr KIND, LABEL
2321                 let off = sink.cur_offset();
2322                 let label = sink.get_label();
2323                 sink.put4(enc_conditional_br(
2324                     BranchTarget::Label(label),
2325                     kind.invert(),
2326                 ));
2327                 sink.use_label_at_offset(off, label, LabelUse::Branch19);
2328                 // udf
2329                 let trap = Inst::Udf { trap_code };
2330                 trap.emit(sink, emit_info, state);
2331                 // LABEL:
2332                 sink.bind_label(label);
2333             }
2334             &Inst::IndirectBr { rn, .. } => {
2335                 sink.put4(enc_br(rn));
2336             }
2337             &Inst::Nop0 => {}
2338             &Inst::Nop4 => {
2339                 sink.put4(0xd503201f);
2340             }
2341             &Inst::Brk => {
2342                 sink.put4(0xd4200000);
2343             }
2344             &Inst::Udf { trap_code } => {
2345                 let srcloc = state.cur_srcloc();
2346                 sink.add_trap(srcloc, trap_code);
2347                 if let Some(s) = state.take_stack_map() {
2348                     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2349                 }
2350                 sink.put4(0xd4a00000);
2351             }
2352             &Inst::Adr { rd, off } => {
2353                 assert!(off > -(1 << 20));
2354                 assert!(off < (1 << 20));
2355                 sink.put4(enc_adr(off, rd));
2356             }
2357             &Inst::Word4 { data } => {
2358                 sink.put4(data);
2359             }
2360             &Inst::Word8 { data } => {
2361                 sink.put8(data);
2362             }
2363             &Inst::JTSequence {
2364                 ridx,
2365                 rtmp1,
2366                 rtmp2,
2367                 ref info,
2368                 ..
2369             } => {
2370                 // This sequence is *one* instruction in the vcode, and is expanded only here at
2371                 // emission time, because we cannot allow the regalloc to insert spills/reloads in
2372                 // the middle; we depend on hardcoded PC-rel addressing below.
2373 
2374                 // Branch to default when condition code from prior comparison indicates.
2375                 let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
2376                 // No need to inform the sink's branch folding logic about this branch, because it
2377                 // will not be merged with any other branch, flipped, or elided (it is not preceded
2378                 // or succeeded by any other branch). Just emit it with the label use.
2379                 let default_br_offset = sink.cur_offset();
2380                 if let BranchTarget::Label(l) = info.default_target {
2381                     sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
2382                 }
2383                 sink.put4(br);
2384 
2385                 // Save index in a tmp (the live range of ridx only goes to start of this
2386                 // sequence; rtmp1 or rtmp2 may overwrite it).
2387                 let inst = Inst::gen_move(rtmp2, ridx, I64);
2388                 inst.emit(sink, emit_info, state);
2389                 // Load address of jump table
2390                 let inst = Inst::Adr { rd: rtmp1, off: 16 };
2391                 inst.emit(sink, emit_info, state);
2392                 // Load value out of jump table
2393                 let inst = Inst::SLoad32 {
2394                     rd: rtmp2,
2395                     mem: AMode::reg_plus_reg_scaled_extended(
2396                         rtmp1.to_reg(),
2397                         rtmp2.to_reg(),
2398                         I32,
2399                         ExtendOp::UXTW,
2400                     ),
2401                     flags: MemFlags::trusted(),
2402                 };
2403                 inst.emit(sink, emit_info, state);
2404                 // Add base of jump table to jump-table-sourced block offset
2405                 let inst = Inst::AluRRR {
2406                     alu_op: ALUOp::Add64,
2407                     rd: rtmp1,
2408                     rn: rtmp1.to_reg(),
2409                     rm: rtmp2.to_reg(),
2410                 };
2411                 inst.emit(sink, emit_info, state);
2412                 // Branch to computed address. (`targets` here is only used for successor queries
2413                 // and is not needed for emission.)
2414                 let inst = Inst::IndirectBr {
2415                     rn: rtmp1.to_reg(),
2416                     targets: vec![],
2417                 };
2418                 inst.emit(sink, emit_info, state);
2419                 // Emit jump table (table of 32-bit offsets).
2420                 let jt_off = sink.cur_offset();
2421                 for &target in info.targets.iter() {
2422                     let word_off = sink.cur_offset();
2423                     // off_into_table is an addend here embedded in the label to be later patched
2424                     // at the end of codegen. The offset is initially relative to this jump table
2425                     // entry; with the extra addend, it'll be relative to the jump table's start,
2426                     // after patching.
2427                     let off_into_table = word_off - jt_off;
2428                     sink.use_label_at_offset(
2429                         word_off,
2430                         target.as_label().unwrap(),
2431                         LabelUse::PCRel32,
2432                     );
2433                     sink.put4(off_into_table);
2434                 }
2435 
2436                 // Lowering produces an EmitIsland before using a JTSequence, so we can safely
2437                 // disable the worst-case-size check in this case.
2438                 start_off = sink.cur_offset();
2439             }
2440             &Inst::LoadExtName {
2441                 rd,
2442                 ref name,
2443                 offset,
2444             } => {
2445                 let inst = Inst::ULoad64 {
2446                     rd,
2447                     mem: AMode::Label(MemLabel::PCRel(8)),
2448                     flags: MemFlags::trusted(),
2449                 };
2450                 inst.emit(sink, emit_info, state);
2451                 let inst = Inst::Jump {
2452                     dest: BranchTarget::ResolvedOffset(12),
2453                 };
2454                 inst.emit(sink, emit_info, state);
2455                 let srcloc = state.cur_srcloc();
2456                 sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
2457                 if emit_info.flags().emit_all_ones_funcaddrs() {
2458                     sink.put8(u64::max_value());
2459                 } else {
2460                     sink.put8(0);
2461                 }
2462             }
2463             &Inst::LoadAddr { rd, ref mem } => {
2464                 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
2465                 for inst in mem_insts.into_iter() {
2466                     inst.emit(sink, emit_info, state);
2467                 }
2468 
2469                 let (reg, index_reg, offset) = match mem {
2470                     AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
2471                     AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
2472                     AMode::UnsignedOffset(r, uimm12scaled) => {
2473                         (r, None, uimm12scaled.value() as i32)
2474                     }
2475                     _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
2476                 };
2477                 let abs_offset = if offset < 0 {
2478                     -offset as u64
2479                 } else {
2480                     offset as u64
2481                 };
2482                 let alu_op = if offset < 0 {
2483                     ALUOp::Sub64
2484                 } else {
2485                     ALUOp::Add64
2486                 };
2487 
2488                 if let Some((idx, extendop)) = index_reg {
2489                     let add = Inst::AluRRRExtend {
2490                         alu_op: ALUOp::Add64,
2491                         rd,
2492                         rn: reg,
2493                         rm: idx,
2494                         extendop,
2495                     };
2496 
2497                     add.emit(sink, emit_info, state);
2498                 } else if offset == 0 {
2499                     if reg != rd.to_reg() {
2500                         let mov = Inst::Mov64 { rd, rm: reg };
2501 
2502                         mov.emit(sink, emit_info, state);
2503                     }
2504                 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2505                     let add = Inst::AluRRImm12 {
2506                         alu_op,
2507                         rd,
2508                         rn: reg,
2509                         imm12,
2510                     };
2511                     add.emit(sink, emit_info, state);
2512                 } else {
2513                     // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
2514                     // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
2515                     // that no other instructions will be inserted here (we're emitting directly),
2516                     // and a live range of `tmp2` should not span this instruction, so this use
2517                     // should otherwise be correct.
2518                     debug_assert!(rd.to_reg() != tmp2_reg());
2519                     debug_assert!(reg != tmp2_reg());
2520                     let tmp = writable_tmp2_reg();
2521                     for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
2522                         insn.emit(sink, emit_info, state);
2523                     }
2524                     let add = Inst::AluRRR {
2525                         alu_op,
2526                         rd,
2527                         rn: reg,
2528                         rm: tmp.to_reg(),
2529                     };
2530                     add.emit(sink, emit_info, state);
2531                 }
2532             }
2533             &Inst::VirtualSPOffsetAdj { offset } => {
2534                 debug!(
2535                     "virtual sp offset adjusted by {} -> {}",
2536                     offset,
2537                     state.virtual_sp_offset + offset,
2538                 );
2539                 state.virtual_sp_offset += offset;
2540             }
2541             &Inst::EmitIsland { needed_space } => {
2542                 if sink.island_needed(needed_space + 4) {
2543                     let jump_around_label = sink.get_label();
2544                     let jmp = Inst::Jump {
2545                         dest: BranchTarget::Label(jump_around_label),
2546                     };
2547                     jmp.emit(sink, emit_info, state);
2548                     sink.emit_island();
2549                     sink.bind_label(jump_around_label);
2550                 }
2551             }
2552             &Inst::ValueLabelMarker { .. } => {
2553                 // Nothing; this is only used to compute debug info.
2554             }
2555 
2556             &Inst::Unwind { ref inst } => {
2557                 sink.add_unwind(inst.clone());
2558             }
2559         }
2560 
2561         let end_off = sink.cur_offset();
2562         debug_assert!((end_off - start_off) <= Inst::worst_case_size());
2563 
2564         state.clear_post_insn();
2565     }
2566 
pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String2567     fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
2568         self.print_with_state(mb_rru, state)
2569     }
2570 }
2571