1 //! AArch64 ISA: binary code emission.
2
3 use crate::binemit::{CodeOffset, Reloc, StackMap};
4 use crate::ir::constant::ConstantData;
5 use crate::ir::types::*;
6 use crate::ir::{MemFlags, TrapCode};
7 use crate::isa::aarch64::inst::*;
8 use crate::machinst::ty_bits;
9
10 use regalloc::{Reg, RegClass, Writable};
11
12 use core::convert::TryFrom;
13 use log::debug;
14
15 /// Memory label/reference finalization: convert a MemLabel to a PC-relative
16 /// offset, possibly emitting relocation(s) as necessary.
memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i3217 pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
18 match label {
19 &MemLabel::PCRel(rel) => rel,
20 }
21 }
22
23 /// Memory addressing mode finalization: convert "special" modes (e.g.,
24 /// generic arbitrary stack offset) into real addressing modes, possibly by
25 /// emitting some helper instructions that come immediately before the use
26 /// of this amode.
mem_finalize( insn_off: CodeOffset, mem: &AMode, state: &EmitState, ) -> (SmallVec<[Inst; 4]>, AMode)27 pub fn mem_finalize(
28 insn_off: CodeOffset,
29 mem: &AMode,
30 state: &EmitState,
31 ) -> (SmallVec<[Inst; 4]>, AMode) {
32 match mem {
33 &AMode::RegOffset(_, off, ty)
34 | &AMode::SPOffset(off, ty)
35 | &AMode::FPOffset(off, ty)
36 | &AMode::NominalSPOffset(off, ty) => {
37 let basereg = match mem {
38 &AMode::RegOffset(reg, _, _) => reg,
39 &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(),
40 &AMode::FPOffset(..) => fp_reg(),
41 _ => unreachable!(),
42 };
43 let adj = match mem {
44 &AMode::NominalSPOffset(..) => {
45 debug!(
46 "mem_finalize: nominal SP offset {} + adj {} -> {}",
47 off,
48 state.virtual_sp_offset,
49 off + state.virtual_sp_offset
50 );
51 state.virtual_sp_offset
52 }
53 _ => 0,
54 };
55 let off = off + adj;
56
57 if let Some(simm9) = SImm9::maybe_from_i64(off) {
58 let mem = AMode::Unscaled(basereg, simm9);
59 (smallvec![], mem)
60 } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
61 let mem = AMode::UnsignedOffset(basereg, uimm12s);
62 (smallvec![], mem)
63 } else {
64 let tmp = writable_spilltmp_reg();
65 let mut const_insts = Inst::load_constant(tmp, off as u64);
66 // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
67 // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
68 // is a valid base (for SPOffset) which we must handle here.
69 // Also, SP needs to be the first arg, not second.
70 let add_inst = Inst::AluRRRExtend {
71 alu_op: ALUOp::Add64,
72 rd: tmp,
73 rn: basereg,
74 rm: tmp.to_reg(),
75 extendop: ExtendOp::UXTX,
76 };
77 const_insts.push(add_inst);
78 (const_insts, AMode::reg(tmp.to_reg()))
79 }
80 }
81
82 &AMode::Label(ref label) => {
83 let off = memlabel_finalize(insn_off, label);
84 (smallvec![], AMode::Label(MemLabel::PCRel(off)))
85 }
86
87 _ => (smallvec![], mem.clone()),
88 }
89 }
90
91 /// Helper: get a ConstantData from a u64.
u64_constant(bits: u64) -> ConstantData92 pub fn u64_constant(bits: u64) -> ConstantData {
93 let data = bits.to_le_bytes();
94 ConstantData::from(&data[..])
95 }
96
97 //=============================================================================
98 // Instructions and subcomponents: emission
99
machreg_to_gpr(m: Reg) -> u32100 fn machreg_to_gpr(m: Reg) -> u32 {
101 assert_eq!(m.get_class(), RegClass::I64);
102 u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
103 }
104
machreg_to_vec(m: Reg) -> u32105 fn machreg_to_vec(m: Reg) -> u32 {
106 assert_eq!(m.get_class(), RegClass::V128);
107 u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
108 }
109
machreg_to_gpr_or_vec(m: Reg) -> u32110 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
111 u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
112 }
113
enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32114 fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
115 (bits_31_21 << 21)
116 | (bits_15_10 << 10)
117 | machreg_to_gpr(rd.to_reg())
118 | (machreg_to_gpr(rn) << 5)
119 | (machreg_to_gpr(rm) << 16)
120 }
121
enc_arith_rr_imm12( bits_31_24: u32, immshift: u32, imm12: u32, rn: Reg, rd: Writable<Reg>, ) -> u32122 fn enc_arith_rr_imm12(
123 bits_31_24: u32,
124 immshift: u32,
125 imm12: u32,
126 rn: Reg,
127 rd: Writable<Reg>,
128 ) -> u32 {
129 (bits_31_24 << 24)
130 | (immshift << 22)
131 | (imm12 << 10)
132 | (machreg_to_gpr(rn) << 5)
133 | machreg_to_gpr(rd.to_reg())
134 }
135
enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32136 fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
137 (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
138 }
139
enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32140 fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
141 (top11 << 21)
142 | (machreg_to_gpr(rm) << 16)
143 | (bit15 << 15)
144 | (machreg_to_gpr(ra) << 10)
145 | (machreg_to_gpr(rn) << 5)
146 | machreg_to_gpr(rd.to_reg())
147 }
148
enc_jump26(op_31_26: u32, off_26_0: u32) -> u32149 fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
150 assert!(off_26_0 < (1 << 26));
151 (op_31_26 << 26) | off_26_0
152 }
153
enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32154 fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
155 assert!(off_18_0 < (1 << 19));
156 (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
157 }
158
enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32159 fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
160 assert!(off_18_0 < (1 << 19));
161 assert!(cond < (1 << 4));
162 (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
163 }
164
enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32165 fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
166 match kind {
167 CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
168 CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
169 CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
170 }
171 }
172
173 const MOVE_WIDE_FIXED: u32 = 0x12800000;
174
175 #[repr(u32)]
176 enum MoveWideOpcode {
177 MOVN = 0b00,
178 MOVZ = 0b10,
179 MOVK = 0b11,
180 }
181
enc_move_wide( op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize, ) -> u32182 fn enc_move_wide(
183 op: MoveWideOpcode,
184 rd: Writable<Reg>,
185 imm: MoveWideConst,
186 size: OperandSize,
187 ) -> u32 {
188 assert!(imm.shift <= 0b11);
189 MOVE_WIDE_FIXED
190 | size.sf_bit() << 31
191 | (op as u32) << 29
192 | u32::from(imm.shift) << 21
193 | u32::from(imm.bits) << 5
194 | machreg_to_gpr(rd.to_reg())
195 }
196
enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32197 fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
198 (op_31_22 << 22)
199 | (simm7.bits() << 15)
200 | (machreg_to_gpr(rt2) << 10)
201 | (machreg_to_gpr(rn) << 5)
202 | machreg_to_gpr(rt)
203 }
204
enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32205 fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
206 (op_31_22 << 22)
207 | (simm9.bits() << 12)
208 | (op_11_10 << 10)
209 | (machreg_to_gpr(rn) << 5)
210 | machreg_to_gpr_or_vec(rd)
211 }
212
enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32213 fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
214 (op_31_22 << 22)
215 | (0b1 << 24)
216 | (uimm12.bits() << 10)
217 | (machreg_to_gpr(rn) << 5)
218 | machreg_to_gpr_or_vec(rd)
219 }
220
enc_ldst_reg( op_31_22: u32, rn: Reg, rm: Reg, s_bit: bool, extendop: Option<ExtendOp>, rd: Reg, ) -> u32221 fn enc_ldst_reg(
222 op_31_22: u32,
223 rn: Reg,
224 rm: Reg,
225 s_bit: bool,
226 extendop: Option<ExtendOp>,
227 rd: Reg,
228 ) -> u32 {
229 let s_bit = if s_bit { 1 } else { 0 };
230 let extend_bits = match extendop {
231 Some(ExtendOp::UXTW) => 0b010,
232 Some(ExtendOp::SXTW) => 0b110,
233 Some(ExtendOp::SXTX) => 0b111,
234 None => 0b011, // LSL
235 _ => panic!("bad extend mode for ld/st AMode"),
236 };
237 (op_31_22 << 22)
238 | (1 << 21)
239 | (machreg_to_gpr(rm) << 16)
240 | (extend_bits << 13)
241 | (s_bit << 12)
242 | (0b10 << 10)
243 | (machreg_to_gpr(rn) << 5)
244 | machreg_to_gpr_or_vec(rd)
245 }
246
enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32247 fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
248 (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
249 }
250
enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32251 fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
252 debug_assert_eq!(q & 0b1, q);
253 debug_assert_eq!(size & 0b11, size);
254 0b0_0_0011010_10_00000_110_0_00_00000_00000
255 | q << 30
256 | size << 10
257 | machreg_to_gpr(rn) << 5
258 | machreg_to_vec(rt.to_reg())
259 }
260
enc_ldst_vec_pair( opc: u32, amode: u32, is_load: bool, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg, ) -> u32261 fn enc_ldst_vec_pair(
262 opc: u32,
263 amode: u32,
264 is_load: bool,
265 simm7: SImm7Scaled,
266 rn: Reg,
267 rt: Reg,
268 rt2: Reg,
269 ) -> u32 {
270 debug_assert_eq!(opc & 0b11, opc);
271 debug_assert_eq!(amode & 0b11, amode);
272
273 0b00_10110_00_0_0000000_00000_00000_00000
274 | opc << 30
275 | amode << 23
276 | (is_load as u32) << 22
277 | simm7.bits() << 15
278 | machreg_to_vec(rt2) << 10
279 | machreg_to_gpr(rn) << 5
280 | machreg_to_vec(rt)
281 }
282
enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32283 fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
284 (top11 << 21)
285 | (machreg_to_vec(rm) << 16)
286 | (bit15_10 << 10)
287 | (machreg_to_vec(rn) << 5)
288 | machreg_to_vec(rd.to_reg())
289 }
290
enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32291 fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
292 (0b01011010110 << 21)
293 | size << 31
294 | opcode2 << 16
295 | opcode1 << 10
296 | machreg_to_gpr(rn) << 5
297 | machreg_to_gpr(rd.to_reg())
298 }
299
enc_br(rn: Reg) -> u32300 fn enc_br(rn: Reg) -> u32 {
301 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
302 }
303
enc_adr(off: i32, rd: Writable<Reg>) -> u32304 fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
305 let off = u32::try_from(off).unwrap();
306 let immlo = off & 3;
307 let immhi = (off >> 2) & ((1 << 19) - 1);
308 (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
309 }
310
enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32311 fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
312 0b100_11010100_00000_0000_00_00000_00000
313 | (machreg_to_gpr(rm) << 16)
314 | (machreg_to_gpr(rn) << 5)
315 | machreg_to_gpr(rd.to_reg())
316 | (cond.bits() << 12)
317 }
318
enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32319 fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
320 0b000_11110_00_1_00000_0000_11_00000_00000
321 | (size.ftype() << 22)
322 | (machreg_to_vec(rm) << 16)
323 | (machreg_to_vec(rn) << 5)
324 | machreg_to_vec(rd.to_reg())
325 | (cond.bits() << 12)
326 }
327
enc_cset(rd: Writable<Reg>, cond: Cond) -> u32328 fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
329 0b100_11010100_11111_0000_01_11111_00000
330 | machreg_to_gpr(rd.to_reg())
331 | (cond.invert().bits() << 12)
332 }
333
enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32334 fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
335 0b110_11010100_11111_0000_00_11111_00000
336 | machreg_to_gpr(rd.to_reg())
337 | (cond.invert().bits() << 12)
338 }
339
enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32340 fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
341 0b0_1_1_11010010_00000_0000_10_00000_0_0000
342 | size.sf_bit() << 31
343 | imm.bits() << 16
344 | cond.bits() << 12
345 | machreg_to_gpr(rn) << 5
346 | nzcv.bits()
347 }
348
enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32349 fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
350 match size {
351 OperandSize::Size64 => {
352 debug_assert!(immr <= 63);
353 debug_assert!(imms <= 63);
354 }
355 OperandSize::Size32 => {
356 debug_assert!(immr <= 31);
357 debug_assert!(imms <= 31);
358 }
359 }
360 debug_assert_eq!(opc & 0b11, opc);
361 let n_bit = size.sf_bit();
362 0b0_00_100110_0_000000_000000_00000_00000
363 | size.sf_bit() << 31
364 | u32::from(opc) << 29
365 | n_bit << 22
366 | u32::from(immr) << 16
367 | u32::from(imms) << 10
368 | machreg_to_gpr(rn) << 5
369 | machreg_to_gpr(rd.to_reg())
370 }
371
enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32372 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
373 0b00001110_101_00000_00011_1_00000_00000
374 | ((is_16b as u32) << 30)
375 | machreg_to_vec(rd.to_reg())
376 | (machreg_to_vec(rn) << 16)
377 | (machreg_to_vec(rn) << 5)
378 }
379
enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32380 fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
381 (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
382 }
383
enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32384 fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
385 (top22 << 10)
386 | (machreg_to_vec(rm) << 16)
387 | (machreg_to_vec(rn) << 5)
388 | machreg_to_vec(rd.to_reg())
389 }
390
enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32391 fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
392 (top17 << 15)
393 | (machreg_to_vec(rm) << 16)
394 | (machreg_to_vec(ra) << 10)
395 | (machreg_to_vec(rn) << 5)
396 | machreg_to_vec(rd.to_reg())
397 }
398
enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32399 fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
400 0b000_11110_00_1_00000_00_1000_00000_00000
401 | (size.ftype() << 22)
402 | (machreg_to_vec(rm) << 16)
403 | (machreg_to_vec(rn) << 5)
404 }
405
enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32406 fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
407 (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
408 }
409
enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32410 fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
411 (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
412 }
413
enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32414 fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
415 (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
416 }
417
enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32418 fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
419 debug_assert_eq!(qu & 0b11, qu);
420 debug_assert_eq!(size & 0b11, size);
421 debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
422 let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
423 bits | qu << 29
424 | size << 22
425 | bits_12_16 << 12
426 | machreg_to_vec(rn) << 5
427 | machreg_to_vec(rd.to_reg())
428 }
429
enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32430 fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
431 debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
432
433 0b010_11110_11_11000_11011_10_00000_00000
434 | bits_12_16 << 12
435 | machreg_to_vec(rn) << 5
436 | machreg_to_vec(rd.to_reg())
437 }
438
enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32439 fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
440 debug_assert_eq!(q & 0b1, q);
441 debug_assert_eq!(u & 0b1, u);
442 debug_assert_eq!(size & 0b11, size);
443 debug_assert_eq!(opcode & 0b11111, opcode);
444 0b0_0_0_01110_00_11000_0_0000_10_00000_00000
445 | q << 30
446 | u << 29
447 | size << 22
448 | opcode << 12
449 | machreg_to_vec(rn) << 5
450 | machreg_to_vec(rd.to_reg())
451 }
452
enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32453 fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
454 debug_assert_eq!(len & 0b11, len);
455 0b0_1_001110_000_00000_0_00_0_00_00000_00000
456 | (machreg_to_vec(rm) << 16)
457 | len << 13
458 | (is_extension as u32) << 12
459 | (machreg_to_vec(rn) << 5)
460 | machreg_to_vec(rd.to_reg())
461 }
462
enc_dmb_ish() -> u32463 fn enc_dmb_ish() -> u32 {
464 0xD5033BBF
465 }
466
enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32467 fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
468 let sz = match ty {
469 I64 => 0b11,
470 I32 => 0b10,
471 I16 => 0b01,
472 I8 => 0b00,
473 _ => unreachable!(),
474 };
475 0b00001000_01011111_01111100_00000000
476 | (sz << 30)
477 | (machreg_to_gpr(rn) << 5)
478 | machreg_to_gpr(rt.to_reg())
479 }
480
enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32481 fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
482 let sz = match ty {
483 I64 => 0b11,
484 I32 => 0b10,
485 I16 => 0b01,
486 I8 => 0b00,
487 _ => unreachable!(),
488 };
489 0b00001000_00000000_01111100_00000000
490 | (sz << 30)
491 | (machreg_to_gpr(rs.to_reg()) << 16)
492 | (machreg_to_gpr(rn) << 5)
493 | machreg_to_gpr(rt)
494 }
495
enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32496 fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
497 debug_assert_eq!(size & 0b11, size);
498
499 0b00_0010001_1_1_00000_1_11111_00000_00000
500 | size << 30
501 | machreg_to_gpr(rs.to_reg()) << 16
502 | machreg_to_gpr(rn) << 5
503 | machreg_to_gpr(rt)
504 }
505
enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32506 fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
507 let abc = (imm >> 5) as u32;
508 let defgh = (imm & 0b11111) as u32;
509
510 debug_assert_eq!(cmode & 0b1111, cmode);
511 debug_assert_eq!(q_op & 0b11, q_op);
512
513 0b0_0_0_0111100000_000_0000_01_00000_00000
514 | (q_op << 29)
515 | (abc << 16)
516 | (cmode << 12)
517 | (defgh << 5)
518 | machreg_to_vec(rd.to_reg())
519 }
520
521 /// State carried between emissions of a sequence of instructions.
522 #[derive(Default, Clone, Debug)]
523 pub struct EmitState {
524 /// Addend to convert nominal-SP offsets to real-SP offsets at the current
525 /// program point.
526 pub(crate) virtual_sp_offset: i64,
527 /// Offset of FP from nominal-SP.
528 pub(crate) nominal_sp_to_fp: i64,
529 /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
530 stack_map: Option<StackMap>,
531 /// Current source-code location corresponding to instruction to be emitted.
532 cur_srcloc: SourceLoc,
533 }
534
535 impl MachInstEmitState<Inst> for EmitState {
new(abi: &dyn ABICallee<I = Inst>) -> Self536 fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
537 EmitState {
538 virtual_sp_offset: 0,
539 nominal_sp_to_fp: abi.frame_size() as i64,
540 stack_map: None,
541 cur_srcloc: SourceLoc::default(),
542 }
543 }
544
pre_safepoint(&mut self, stack_map: StackMap)545 fn pre_safepoint(&mut self, stack_map: StackMap) {
546 self.stack_map = Some(stack_map);
547 }
548
pre_sourceloc(&mut self, srcloc: SourceLoc)549 fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
550 self.cur_srcloc = srcloc;
551 }
552 }
553
554 impl EmitState {
take_stack_map(&mut self) -> Option<StackMap>555 fn take_stack_map(&mut self) -> Option<StackMap> {
556 self.stack_map.take()
557 }
558
clear_post_insn(&mut self)559 fn clear_post_insn(&mut self) {
560 self.stack_map = None;
561 }
562
cur_srcloc(&self) -> SourceLoc563 fn cur_srcloc(&self) -> SourceLoc {
564 self.cur_srcloc
565 }
566 }
567
568 /// Constant state used during function compilation.
569 pub struct EmitInfo(settings::Flags);
570
571 impl EmitInfo {
new(flags: settings::Flags) -> Self572 pub(crate) fn new(flags: settings::Flags) -> Self {
573 Self(flags)
574 }
575 }
576
577 impl MachInstEmitInfo for EmitInfo {
flags(&self) -> &settings::Flags578 fn flags(&self) -> &settings::Flags {
579 &self.0
580 }
581 }
582
583 impl MachInstEmit for Inst {
584 type State = EmitState;
585 type Info = EmitInfo;
586
emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState)587 fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
588 // N.B.: we *must* not exceed the "worst-case size" used to compute
589 // where to insert islands, except when islands are explicitly triggered
590 // (with an `EmitIsland`). We check this in debug builds. This is `mut`
591 // to allow disabling the check for `JTSequence`, which is always
592 // emitted following an `EmitIsland`.
593 let mut start_off = sink.cur_offset();
594
595 match self {
596 &Inst::AluRRR { alu_op, rd, rn, rm } => {
597 let top11 = match alu_op {
598 ALUOp::Add32 => 0b00001011_000,
599 ALUOp::Add64 => 0b10001011_000,
600 ALUOp::Sub32 => 0b01001011_000,
601 ALUOp::Sub64 => 0b11001011_000,
602 ALUOp::Orr32 => 0b00101010_000,
603 ALUOp::Orr64 => 0b10101010_000,
604 ALUOp::And32 => 0b00001010_000,
605 ALUOp::And64 => 0b10001010_000,
606 ALUOp::Eor32 => 0b01001010_000,
607 ALUOp::Eor64 => 0b11001010_000,
608 ALUOp::OrrNot32 => 0b00101010_001,
609 ALUOp::OrrNot64 => 0b10101010_001,
610 ALUOp::AndNot32 => 0b00001010_001,
611 ALUOp::AndNot64 => 0b10001010_001,
612 ALUOp::EorNot32 => 0b01001010_001,
613 ALUOp::EorNot64 => 0b11001010_001,
614 ALUOp::AddS32 => 0b00101011_000,
615 ALUOp::AddS64 => 0b10101011_000,
616 ALUOp::SubS32 => 0b01101011_000,
617 ALUOp::SubS64 => 0b11101011_000,
618 ALUOp::SDiv64 => 0b10011010_110,
619 ALUOp::UDiv64 => 0b10011010_110,
620 ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
621 ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
622 ALUOp::SMulH => 0b10011011_010,
623 ALUOp::UMulH => 0b10011011_110,
624 };
625 let bit15_10 = match alu_op {
626 ALUOp::SDiv64 => 0b000011,
627 ALUOp::UDiv64 => 0b000010,
628 ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
629 ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
630 ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
631 ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
632 ALUOp::SMulH | ALUOp::UMulH => 0b011111,
633 _ => 0b000000,
634 };
635 debug_assert_ne!(writable_stack_reg(), rd);
636 // The stack pointer is the zero register in this context, so this might be an
637 // indication that something is wrong.
638 debug_assert_ne!(stack_reg(), rn);
639 debug_assert_ne!(stack_reg(), rm);
640 sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
641 }
642 &Inst::AluRRRR {
643 alu_op,
644 rd,
645 rm,
646 rn,
647 ra,
648 } => {
649 let (top11, bit15) = match alu_op {
650 ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
651 ALUOp3::MSub32 => (0b0_00_11011_000, 1),
652 ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
653 ALUOp3::MSub64 => (0b1_00_11011_000, 1),
654 };
655 sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
656 }
657 &Inst::AluRRImm12 {
658 alu_op,
659 rd,
660 rn,
661 ref imm12,
662 } => {
663 let top8 = match alu_op {
664 ALUOp::Add32 => 0b000_10001,
665 ALUOp::Add64 => 0b100_10001,
666 ALUOp::Sub32 => 0b010_10001,
667 ALUOp::Sub64 => 0b110_10001,
668 ALUOp::AddS32 => 0b001_10001,
669 ALUOp::AddS64 => 0b101_10001,
670 ALUOp::SubS32 => 0b011_10001,
671 ALUOp::SubS64 => 0b111_10001,
672 _ => unimplemented!("{:?}", alu_op),
673 };
674 sink.put4(enc_arith_rr_imm12(
675 top8,
676 imm12.shift_bits(),
677 imm12.imm_bits(),
678 rn,
679 rd,
680 ));
681 }
682 &Inst::AluRRImmLogic {
683 alu_op,
684 rd,
685 rn,
686 ref imml,
687 } => {
688 let (top9, inv) = match alu_op {
689 ALUOp::Orr32 => (0b001_100100, false),
690 ALUOp::Orr64 => (0b101_100100, false),
691 ALUOp::And32 => (0b000_100100, false),
692 ALUOp::And64 => (0b100_100100, false),
693 ALUOp::Eor32 => (0b010_100100, false),
694 ALUOp::Eor64 => (0b110_100100, false),
695 ALUOp::OrrNot32 => (0b001_100100, true),
696 ALUOp::OrrNot64 => (0b101_100100, true),
697 ALUOp::AndNot32 => (0b000_100100, true),
698 ALUOp::AndNot64 => (0b100_100100, true),
699 ALUOp::EorNot32 => (0b010_100100, true),
700 ALUOp::EorNot64 => (0b110_100100, true),
701 _ => unimplemented!("{:?}", alu_op),
702 };
703 let imml = if inv { imml.invert() } else { imml.clone() };
704 sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
705 }
706
707 &Inst::AluRRImmShift {
708 alu_op,
709 rd,
710 rn,
711 ref immshift,
712 } => {
713 let amt = immshift.value();
714 let (top10, immr, imms) = match alu_op {
715 ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
716 ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
717 ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
718 ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
719 ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
720 ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
721 ALUOp::Lsl32 => (
722 0b0101001100,
723 u32::from((32 - amt) % 32),
724 u32::from(31 - amt),
725 ),
726 ALUOp::Lsl64 => (
727 0b1101001101,
728 u32::from((64 - amt) % 64),
729 u32::from(63 - amt),
730 ),
731 _ => unimplemented!("{:?}", alu_op),
732 };
733 sink.put4(
734 (top10 << 22)
735 | (immr << 16)
736 | (imms << 10)
737 | (machreg_to_gpr(rn) << 5)
738 | machreg_to_gpr(rd.to_reg()),
739 );
740 }
741
742 &Inst::AluRRRShift {
743 alu_op,
744 rd,
745 rn,
746 rm,
747 ref shiftop,
748 } => {
749 let top11: u32 = match alu_op {
750 ALUOp::Add32 => 0b000_01011000,
751 ALUOp::Add64 => 0b100_01011000,
752 ALUOp::AddS32 => 0b001_01011000,
753 ALUOp::AddS64 => 0b101_01011000,
754 ALUOp::Sub32 => 0b010_01011000,
755 ALUOp::Sub64 => 0b110_01011000,
756 ALUOp::SubS32 => 0b011_01011000,
757 ALUOp::SubS64 => 0b111_01011000,
758 ALUOp::Orr32 => 0b001_01010000,
759 ALUOp::Orr64 => 0b101_01010000,
760 ALUOp::And32 => 0b000_01010000,
761 ALUOp::And64 => 0b100_01010000,
762 ALUOp::Eor32 => 0b010_01010000,
763 ALUOp::Eor64 => 0b110_01010000,
764 ALUOp::OrrNot32 => 0b001_01010001,
765 ALUOp::OrrNot64 => 0b101_01010001,
766 ALUOp::EorNot32 => 0b010_01010001,
767 ALUOp::EorNot64 => 0b110_01010001,
768 ALUOp::AndNot32 => 0b000_01010001,
769 ALUOp::AndNot64 => 0b100_01010001,
770 _ => unimplemented!("{:?}", alu_op),
771 };
772 let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
773 let bits_15_10 = u32::from(shiftop.amt().value());
774 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
775 }
776
777 &Inst::AluRRRExtend {
778 alu_op,
779 rd,
780 rn,
781 rm,
782 extendop,
783 } => {
784 let top11: u32 = match alu_op {
785 ALUOp::Add32 => 0b00001011001,
786 ALUOp::Add64 => 0b10001011001,
787 ALUOp::Sub32 => 0b01001011001,
788 ALUOp::Sub64 => 0b11001011001,
789 ALUOp::AddS32 => 0b00101011001,
790 ALUOp::AddS64 => 0b10101011001,
791 ALUOp::SubS32 => 0b01101011001,
792 ALUOp::SubS64 => 0b11101011001,
793 _ => unimplemented!("{:?}", alu_op),
794 };
795 let bits_15_10 = u32::from(extendop.bits()) << 3;
796 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
797 }
798
799 &Inst::BitRR { op, rd, rn, .. } => {
800 let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
801 let (op1, op2) = match op {
802 BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
803 BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
804 BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
805 };
806 sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
807 }
808
809 &Inst::ULoad8 { rd, ref mem, flags }
810 | &Inst::SLoad8 { rd, ref mem, flags }
811 | &Inst::ULoad16 { rd, ref mem, flags }
812 | &Inst::SLoad16 { rd, ref mem, flags }
813 | &Inst::ULoad32 { rd, ref mem, flags }
814 | &Inst::SLoad32 { rd, ref mem, flags }
815 | &Inst::ULoad64 {
816 rd, ref mem, flags, ..
817 }
818 | &Inst::FpuLoad32 { rd, ref mem, flags }
819 | &Inst::FpuLoad64 { rd, ref mem, flags }
820 | &Inst::FpuLoad128 { rd, ref mem, flags } => {
821 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
822
823 for inst in mem_insts.into_iter() {
824 inst.emit(sink, emit_info, state);
825 }
826
827 // ldst encoding helpers take Reg, not Writable<Reg>.
828 let rd = rd.to_reg();
829
830 // This is the base opcode (top 10 bits) for the "unscaled
831 // immediate" form (Unscaled). Other addressing modes will OR in
832 // other values for bits 24/25 (bits 1/2 of this constant).
833 let (op, bits) = match self {
834 &Inst::ULoad8 { .. } => (0b0011100001, 8),
835 &Inst::SLoad8 { .. } => (0b0011100010, 8),
836 &Inst::ULoad16 { .. } => (0b0111100001, 16),
837 &Inst::SLoad16 { .. } => (0b0111100010, 16),
838 &Inst::ULoad32 { .. } => (0b1011100001, 32),
839 &Inst::SLoad32 { .. } => (0b1011100010, 32),
840 &Inst::ULoad64 { .. } => (0b1111100001, 64),
841 &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
842 &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
843 &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
844 _ => unreachable!(),
845 };
846
847 let srcloc = state.cur_srcloc();
848 if srcloc != SourceLoc::default() && !flags.notrap() {
849 // Register the offset at which the actual load instruction starts.
850 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
851 }
852
853 match &mem {
854 &AMode::Unscaled(reg, simm9) => {
855 sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
856 }
857 &AMode::UnsignedOffset(reg, uimm12scaled) => {
858 if uimm12scaled.value() != 0 {
859 assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
860 }
861 sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
862 }
863 &AMode::RegReg(r1, r2) => {
864 sink.put4(enc_ldst_reg(
865 op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
866 ));
867 }
868 &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
869 assert_eq!(bits, ty_bits(ty));
870 let extendop = match &mem {
871 &AMode::RegScaled(..) => None,
872 &AMode::RegScaledExtended(_, _, _, op) => Some(op),
873 _ => unreachable!(),
874 };
875 sink.put4(enc_ldst_reg(
876 op, r1, r2, /* scaled = */ true, extendop, rd,
877 ));
878 }
879 &AMode::RegExtended(r1, r2, extendop) => {
880 sink.put4(enc_ldst_reg(
881 op,
882 r1,
883 r2,
884 /* scaled = */ false,
885 Some(extendop),
886 rd,
887 ));
888 }
889 &AMode::Label(ref label) => {
890 let offset = match label {
891 // cast i32 to u32 (two's-complement)
892 &MemLabel::PCRel(off) => off as u32,
893 } / 4;
894 assert!(offset < (1 << 19));
895 match self {
896 &Inst::ULoad32 { .. } => {
897 sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
898 }
899 &Inst::SLoad32 { .. } => {
900 sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
901 }
902 &Inst::FpuLoad32 { .. } => {
903 sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
904 }
905 &Inst::ULoad64 { .. } => {
906 sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
907 }
908 &Inst::FpuLoad64 { .. } => {
909 sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
910 }
911 &Inst::FpuLoad128 { .. } => {
912 sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
913 }
914 _ => panic!("Unspported size for LDR from constant pool!"),
915 }
916 }
917 &AMode::PreIndexed(reg, simm9) => {
918 sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
919 }
920 &AMode::PostIndexed(reg, simm9) => {
921 sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
922 }
923 // Eliminated by `mem_finalize()` above.
924 &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
925 panic!("Should not see stack-offset here!")
926 }
927 &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
928 }
929 }
930
931 &Inst::Store8 { rd, ref mem, flags }
932 | &Inst::Store16 { rd, ref mem, flags }
933 | &Inst::Store32 { rd, ref mem, flags }
934 | &Inst::Store64 { rd, ref mem, flags }
935 | &Inst::FpuStore32 { rd, ref mem, flags }
936 | &Inst::FpuStore64 { rd, ref mem, flags }
937 | &Inst::FpuStore128 { rd, ref mem, flags } => {
938 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
939
940 for inst in mem_insts.into_iter() {
941 inst.emit(sink, emit_info, state);
942 }
943
944 let (op, bits) = match self {
945 &Inst::Store8 { .. } => (0b0011100000, 8),
946 &Inst::Store16 { .. } => (0b0111100000, 16),
947 &Inst::Store32 { .. } => (0b1011100000, 32),
948 &Inst::Store64 { .. } => (0b1111100000, 64),
949 &Inst::FpuStore32 { .. } => (0b1011110000, 32),
950 &Inst::FpuStore64 { .. } => (0b1111110000, 64),
951 &Inst::FpuStore128 { .. } => (0b0011110010, 128),
952 _ => unreachable!(),
953 };
954
955 let srcloc = state.cur_srcloc();
956 if srcloc != SourceLoc::default() && !flags.notrap() {
957 // Register the offset at which the actual store instruction starts.
958 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
959 }
960
961 match &mem {
962 &AMode::Unscaled(reg, simm9) => {
963 sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
964 }
965 &AMode::UnsignedOffset(reg, uimm12scaled) => {
966 if uimm12scaled.value() != 0 {
967 assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
968 }
969 sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
970 }
971 &AMode::RegReg(r1, r2) => {
972 sink.put4(enc_ldst_reg(
973 op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
974 ));
975 }
976 &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
977 let extendop = match &mem {
978 &AMode::RegScaled(..) => None,
979 &AMode::RegScaledExtended(_, _, _, op) => Some(op),
980 _ => unreachable!(),
981 };
982 sink.put4(enc_ldst_reg(
983 op, r1, r2, /* scaled = */ true, extendop, rd,
984 ));
985 }
986 &AMode::RegExtended(r1, r2, extendop) => {
987 sink.put4(enc_ldst_reg(
988 op,
989 r1,
990 r2,
991 /* scaled = */ false,
992 Some(extendop),
993 rd,
994 ));
995 }
996 &AMode::Label(..) => {
997 panic!("Store to a MemLabel not implemented!");
998 }
999 &AMode::PreIndexed(reg, simm9) => {
1000 sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
1001 }
1002 &AMode::PostIndexed(reg, simm9) => {
1003 sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
1004 }
1005 // Eliminated by `mem_finalize()` above.
1006 &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
1007 panic!("Should not see stack-offset here!")
1008 }
1009 &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
1010 }
1011 }
1012
1013 &Inst::StoreP64 {
1014 rt,
1015 rt2,
1016 ref mem,
1017 flags,
1018 } => {
1019 let srcloc = state.cur_srcloc();
1020 if srcloc != SourceLoc::default() && !flags.notrap() {
1021 // Register the offset at which the actual store instruction starts.
1022 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1023 }
1024 match mem {
1025 &PairAMode::SignedOffset(reg, simm7) => {
1026 assert_eq!(simm7.scale_ty, I64);
1027 sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1028 }
1029 &PairAMode::PreIndexed(reg, simm7) => {
1030 assert_eq!(simm7.scale_ty, I64);
1031 sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
1032 }
1033 &PairAMode::PostIndexed(reg, simm7) => {
1034 assert_eq!(simm7.scale_ty, I64);
1035 sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
1036 }
1037 }
1038 }
1039 &Inst::LoadP64 {
1040 rt,
1041 rt2,
1042 ref mem,
1043 flags,
1044 } => {
1045 let srcloc = state.cur_srcloc();
1046 if srcloc != SourceLoc::default() && !flags.notrap() {
1047 // Register the offset at which the actual load instruction starts.
1048 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1049 }
1050
1051 let rt = rt.to_reg();
1052 let rt2 = rt2.to_reg();
1053 match mem {
1054 &PairAMode::SignedOffset(reg, simm7) => {
1055 assert_eq!(simm7.scale_ty, I64);
1056 sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1057 }
1058 &PairAMode::PreIndexed(reg, simm7) => {
1059 assert_eq!(simm7.scale_ty, I64);
1060 sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
1061 }
1062 &PairAMode::PostIndexed(reg, simm7) => {
1063 assert_eq!(simm7.scale_ty, I64);
1064 sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
1065 }
1066 }
1067 }
1068 &Inst::FpuLoadP64 {
1069 rt,
1070 rt2,
1071 ref mem,
1072 flags,
1073 }
1074 | &Inst::FpuLoadP128 {
1075 rt,
1076 rt2,
1077 ref mem,
1078 flags,
1079 } => {
1080 let srcloc = state.cur_srcloc();
1081
1082 if srcloc != SourceLoc::default() && !flags.notrap() {
1083 // Register the offset at which the actual load instruction starts.
1084 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1085 }
1086
1087 let opc = match self {
1088 &Inst::FpuLoadP64 { .. } => 0b01,
1089 &Inst::FpuLoadP128 { .. } => 0b10,
1090 _ => unreachable!(),
1091 };
1092 let rt = rt.to_reg();
1093 let rt2 = rt2.to_reg();
1094
1095 match mem {
1096 &PairAMode::SignedOffset(reg, simm7) => {
1097 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1098 sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1099 }
1100 &PairAMode::PreIndexed(reg, simm7) => {
1101 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1102 sink.put4(enc_ldst_vec_pair(
1103 opc,
1104 0b11,
1105 true,
1106 simm7,
1107 reg.to_reg(),
1108 rt,
1109 rt2,
1110 ));
1111 }
1112 &PairAMode::PostIndexed(reg, simm7) => {
1113 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1114 sink.put4(enc_ldst_vec_pair(
1115 opc,
1116 0b01,
1117 true,
1118 simm7,
1119 reg.to_reg(),
1120 rt,
1121 rt2,
1122 ));
1123 }
1124 }
1125 }
1126 &Inst::FpuStoreP64 {
1127 rt,
1128 rt2,
1129 ref mem,
1130 flags,
1131 }
1132 | &Inst::FpuStoreP128 {
1133 rt,
1134 rt2,
1135 ref mem,
1136 flags,
1137 } => {
1138 let srcloc = state.cur_srcloc();
1139
1140 if srcloc != SourceLoc::default() && !flags.notrap() {
1141 // Register the offset at which the actual store instruction starts.
1142 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1143 }
1144
1145 let opc = match self {
1146 &Inst::FpuStoreP64 { .. } => 0b01,
1147 &Inst::FpuStoreP128 { .. } => 0b10,
1148 _ => unreachable!(),
1149 };
1150
1151 match mem {
1152 &PairAMode::SignedOffset(reg, simm7) => {
1153 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1154 sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1155 }
1156 &PairAMode::PreIndexed(reg, simm7) => {
1157 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1158 sink.put4(enc_ldst_vec_pair(
1159 opc,
1160 0b11,
1161 false,
1162 simm7,
1163 reg.to_reg(),
1164 rt,
1165 rt2,
1166 ));
1167 }
1168 &PairAMode::PostIndexed(reg, simm7) => {
1169 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1170 sink.put4(enc_ldst_vec_pair(
1171 opc,
1172 0b01,
1173 false,
1174 simm7,
1175 reg.to_reg(),
1176 rt,
1177 rt2,
1178 ));
1179 }
1180 }
1181 }
1182 &Inst::Mov64 { rd, rm } => {
1183 assert!(rd.to_reg().get_class() == rm.get_class());
1184 assert!(rm.get_class() == RegClass::I64);
1185
1186 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1187 // should never MOV to XZR.
1188 assert!(rd.to_reg() != stack_reg());
1189
1190 if rm == stack_reg() {
1191 // We can't use ORR here, so use an `add rd, sp, #0` instead.
1192 let imm12 = Imm12::maybe_from_u64(0).unwrap();
1193 sink.put4(enc_arith_rr_imm12(
1194 0b100_10001,
1195 imm12.shift_bits(),
1196 imm12.imm_bits(),
1197 rm,
1198 rd,
1199 ));
1200 } else {
1201 // Encoded as ORR rd, rm, zero.
1202 sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1203 }
1204 }
1205 &Inst::Mov32 { rd, rm } => {
1206 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1207 // should never MOV to XZR.
1208 assert!(machreg_to_gpr(rd.to_reg()) != 31);
1209 // Encoded as ORR rd, rm, zero.
1210 sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1211 }
1212 &Inst::MovZ { rd, imm, size } => {
1213 sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
1214 }
1215 &Inst::MovN { rd, imm, size } => {
1216 sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
1217 }
1218 &Inst::MovK { rd, imm, size } => {
1219 sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
1220 }
1221 &Inst::CSel { rd, rn, rm, cond } => {
1222 sink.put4(enc_csel(rd, rn, rm, cond));
1223 }
1224 &Inst::CSet { rd, cond } => {
1225 sink.put4(enc_cset(rd, cond));
1226 }
1227 &Inst::CSetm { rd, cond } => {
1228 sink.put4(enc_csetm(rd, cond));
1229 }
1230 &Inst::CCmpImm {
1231 size,
1232 rn,
1233 imm,
1234 nzcv,
1235 cond,
1236 } => {
1237 sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1238 }
1239 &Inst::AtomicRMW { ty, op } => {
1240 /* Emit this:
1241 dmb ish
1242 again:
1243 ldxr{,b,h} x/w27, [x25]
1244 op x28, x27, x26 // op is add,sub,and,orr,eor
1245 stxr{,b,h} w24, x/w28, [x25]
1246 cbnz x24, again
1247 dmb ish
1248
1249 Operand conventions:
1250 IN: x25 (addr), x26 (2nd arg for op)
1251 OUT: x27 (old value), x24 (trashed), x28 (trashed)
1252
1253 It is unfortunate that, per the ARM documentation, x28 cannot be used for
1254 both the store-data and success-flag operands of stxr. This causes the
1255 instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1256 instead for the success-flag.
1257
1258 In the case where the operation is 'xchg', the second insn is instead
1259 mov x28, x26
1260 so that we simply write in the destination, the "2nd arg for op".
1261 */
1262 let xzr = zero_reg();
1263 let x24 = xreg(24);
1264 let x25 = xreg(25);
1265 let x26 = xreg(26);
1266 let x27 = xreg(27);
1267 let x28 = xreg(28);
1268 let x24wr = writable_xreg(24);
1269 let x27wr = writable_xreg(27);
1270 let x28wr = writable_xreg(28);
1271 let again_label = sink.get_label();
1272
1273 sink.put4(enc_dmb_ish()); // dmb ish
1274
1275 // again:
1276 sink.bind_label(again_label);
1277 let srcloc = state.cur_srcloc();
1278 if srcloc != SourceLoc::default() {
1279 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1280 }
1281 sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
1282
1283 if op == inst_common::AtomicRmwOp::Xchg {
1284 // mov x28, x26
1285 sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26))
1286 } else {
1287 // add/sub/and/orr/eor x28, x27, x26
1288 let bits_31_21 = match op {
1289 inst_common::AtomicRmwOp::Add => 0b100_01011_00_0,
1290 inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0,
1291 inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
1292 inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
1293 inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
1294 inst_common::AtomicRmwOp::Nand
1295 | inst_common::AtomicRmwOp::Umin
1296 | inst_common::AtomicRmwOp::Umax
1297 | inst_common::AtomicRmwOp::Smin
1298 | inst_common::AtomicRmwOp::Smax => todo!("{:?}", op),
1299 inst_common::AtomicRmwOp::Xchg => unreachable!(),
1300 };
1301 sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
1302 }
1303
1304 let srcloc = state.cur_srcloc();
1305 if srcloc != SourceLoc::default() {
1306 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1307 }
1308 sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
1309
1310 // cbnz w24, again
1311 // Note, we're actually testing x24, and relying on the default zero-high-half
1312 // rule in the assignment that `stxr` does.
1313 let br_offset = sink.cur_offset();
1314 sink.put4(enc_conditional_br(
1315 BranchTarget::Label(again_label),
1316 CondBrKind::NotZero(x24),
1317 ));
1318 sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1319
1320 sink.put4(enc_dmb_ish()); // dmb ish
1321 }
1322 &Inst::AtomicCAS { rs, rt, rn, ty } => {
1323 let size = match ty {
1324 I8 => 0b00,
1325 I16 => 0b01,
1326 I32 => 0b10,
1327 I64 => 0b11,
1328 _ => panic!("Unsupported type: {}", ty),
1329 };
1330
1331 sink.put4(enc_cas(size, rs, rt, rn));
1332 }
1333 &Inst::AtomicCASLoop { ty } => {
1334 /* Emit this:
1335 dmb ish
1336 again:
1337 ldxr{,b,h} x/w27, [x25]
1338 and x24, x26, MASK (= 2^size_bits - 1)
1339 cmp x27, x24
1340 b.ne out
1341 stxr{,b,h} w24, x/w28, [x25]
1342 cbnz x24, again
1343 out:
1344 dmb ish
1345
1346 Operand conventions:
1347 IN: x25 (addr), x26 (expected value), x28 (replacement value)
1348 OUT: x27 (old value), x24 (trashed)
1349 */
1350 let xzr = zero_reg();
1351 let x24 = xreg(24);
1352 let x25 = xreg(25);
1353 let x26 = xreg(26);
1354 let x27 = xreg(27);
1355 let x28 = xreg(28);
1356 let xzrwr = writable_zero_reg();
1357 let x24wr = writable_xreg(24);
1358 let x27wr = writable_xreg(27);
1359 let again_label = sink.get_label();
1360 let out_label = sink.get_label();
1361
1362 sink.put4(enc_dmb_ish()); // dmb ish
1363
1364 // again:
1365 sink.bind_label(again_label);
1366 let srcloc = state.cur_srcloc();
1367 if srcloc != SourceLoc::default() {
1368 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1369 }
1370 sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
1371
1372 if ty == I64 {
1373 // mov x24, x26
1374 sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26))
1375 } else {
1376 // and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF
1377 let (mask, s) = match ty {
1378 I8 => (0xFF, 7),
1379 I16 => (0xFFFF, 15),
1380 I32 => (0xFFFFFFFF, 31),
1381 _ => unreachable!(),
1382 };
1383 sink.put4(enc_arith_rr_imml(
1384 0b100_100100,
1385 ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(),
1386 x26,
1387 x24wr,
1388 ))
1389 }
1390
1391 // cmp x27, x24 (== subs xzr, x27, x24)
1392 sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24));
1393
1394 // b.ne out
1395 let br_out_offset = sink.cur_offset();
1396 sink.put4(enc_conditional_br(
1397 BranchTarget::Label(out_label),
1398 CondBrKind::Cond(Cond::Ne),
1399 ));
1400 sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1401
1402 let srcloc = state.cur_srcloc();
1403 if srcloc != SourceLoc::default() {
1404 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1405 }
1406 sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
1407
1408 // cbnz w24, again.
1409 // Note, we're actually testing x24, and relying on the default zero-high-half
1410 // rule in the assignment that `stxr` does.
1411 let br_again_offset = sink.cur_offset();
1412 sink.put4(enc_conditional_br(
1413 BranchTarget::Label(again_label),
1414 CondBrKind::NotZero(x24),
1415 ));
1416 sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1417
1418 // out:
1419 sink.bind_label(out_label);
1420 sink.put4(enc_dmb_ish()); // dmb ish
1421 }
1422 &Inst::AtomicLoad { ty, r_data, r_addr } => {
1423 let op = match ty {
1424 I8 => 0b0011100001,
1425 I16 => 0b0111100001,
1426 I32 => 0b1011100001,
1427 I64 => 0b1111100001,
1428 _ => unreachable!(),
1429 };
1430 sink.put4(enc_dmb_ish()); // dmb ish
1431
1432 let srcloc = state.cur_srcloc();
1433 if srcloc != SourceLoc::default() {
1434 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1435 }
1436 let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
1437 sink.put4(enc_ldst_uimm12(
1438 op,
1439 uimm12scaled_zero,
1440 r_addr,
1441 r_data.to_reg(),
1442 ));
1443 }
1444 &Inst::AtomicStore { ty, r_data, r_addr } => {
1445 let op = match ty {
1446 I8 => 0b0011100000,
1447 I16 => 0b0111100000,
1448 I32 => 0b1011100000,
1449 I64 => 0b1111100000,
1450 _ => unreachable!(),
1451 };
1452
1453 let srcloc = state.cur_srcloc();
1454 if srcloc != SourceLoc::default() {
1455 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1456 }
1457 let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
1458 sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data));
1459 sink.put4(enc_dmb_ish()); // dmb ish
1460 }
1461 &Inst::Fence {} => {
1462 sink.put4(enc_dmb_ish()); // dmb ish
1463 }
1464 &Inst::FpuMove64 { rd, rn } => {
1465 sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1466 }
1467 &Inst::FpuMove128 { rd, rn } => {
1468 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1469 }
1470 &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1471 let (imm5, shift, mask) = match size.lane_size() {
1472 ScalarSize::Size32 => (0b00100, 3, 0b011),
1473 ScalarSize::Size64 => (0b01000, 4, 0b001),
1474 _ => unimplemented!(),
1475 };
1476 debug_assert_eq!(idx & mask, idx);
1477 let imm5 = imm5 | ((idx as u32) << shift);
1478 sink.put4(
1479 0b010_11110000_00000_000001_00000_00000
1480 | (imm5 << 16)
1481 | (machreg_to_vec(rn) << 5)
1482 | machreg_to_vec(rd.to_reg()),
1483 );
1484 }
1485 &Inst::FpuExtend { rd, rn, size } => {
1486 sink.put4(enc_fpurr(
1487 0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
1488 rd,
1489 rn,
1490 ));
1491 }
1492 &Inst::FpuRR { fpu_op, rd, rn } => {
1493 let top22 = match fpu_op {
1494 FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
1495 FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
1496 FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
1497 FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
1498 FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
1499 FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
1500 FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
1501 FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
1502 };
1503 sink.put4(enc_fpurr(top22, rd, rn));
1504 }
1505 &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
1506 let top22 = match fpu_op {
1507 FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
1508 FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
1509 FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
1510 FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
1511 FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
1512 FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
1513 FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
1514 FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
1515 FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
1516 FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
1517 FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
1518 FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
1519 FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
1520 FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
1521 FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
1522 FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
1523 };
1524 sink.put4(enc_fpurrr(top22, rd, rn, rm));
1525 }
1526 &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1527 FPUOpRI::UShr32(imm) => {
1528 debug_assert_eq!(32, imm.lane_size_in_bits);
1529 sink.put4(
1530 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1531 | imm.enc() << 16
1532 | machreg_to_vec(rn) << 5
1533 | machreg_to_vec(rd.to_reg()),
1534 )
1535 }
1536 FPUOpRI::UShr64(imm) => {
1537 debug_assert_eq!(64, imm.lane_size_in_bits);
1538 sink.put4(
1539 0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1540 | imm.enc() << 16
1541 | machreg_to_vec(rn) << 5
1542 | machreg_to_vec(rd.to_reg()),
1543 )
1544 }
1545 FPUOpRI::Sli64(imm) => {
1546 debug_assert_eq!(64, imm.lane_size_in_bits);
1547 sink.put4(
1548 0b01_1_111110_0000000_010101_00000_00000
1549 | imm.enc() << 16
1550 | machreg_to_vec(rn) << 5
1551 | machreg_to_vec(rd.to_reg()),
1552 )
1553 }
1554 FPUOpRI::Sli32(imm) => {
1555 debug_assert_eq!(32, imm.lane_size_in_bits);
1556 sink.put4(
1557 0b0_0_1_011110_0000000_010101_00000_00000
1558 | imm.enc() << 16
1559 | machreg_to_vec(rn) << 5
1560 | machreg_to_vec(rd.to_reg()),
1561 )
1562 }
1563 },
1564 &Inst::FpuRRRR {
1565 fpu_op,
1566 rd,
1567 rn,
1568 rm,
1569 ra,
1570 } => {
1571 let top17 = match fpu_op {
1572 FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
1573 FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
1574 };
1575 sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1576 }
1577 &Inst::VecMisc { op, rd, rn, size } => {
1578 let (q, enc_size) = size.enc_size();
1579 let (u, bits_12_16, size) = match op {
1580 VecMisc2::Not => (0b1, 0b00101, 0b00),
1581 VecMisc2::Neg => (0b1, 0b01011, enc_size),
1582 VecMisc2::Abs => (0b0, 0b01011, enc_size),
1583 VecMisc2::Fabs => {
1584 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1585 (0b0, 0b01111, enc_size)
1586 }
1587 VecMisc2::Fneg => {
1588 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1589 (0b1, 0b01111, enc_size)
1590 }
1591 VecMisc2::Fsqrt => {
1592 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1593 (0b1, 0b11111, enc_size)
1594 }
1595 VecMisc2::Rev64 => {
1596 debug_assert_ne!(VectorSize::Size64x2, size);
1597 (0b0, 0b00000, enc_size)
1598 }
1599 VecMisc2::Shll => {
1600 debug_assert_ne!(VectorSize::Size64x2, size);
1601 debug_assert!(!size.is_128bits());
1602 (0b1, 0b10011, enc_size)
1603 }
1604 VecMisc2::Fcvtzs => {
1605 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1606 (0b0, 0b11011, enc_size)
1607 }
1608 VecMisc2::Fcvtzu => {
1609 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1610 (0b1, 0b11011, enc_size)
1611 }
1612 VecMisc2::Scvtf => {
1613 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1614 (0b0, 0b11101, enc_size & 0b1)
1615 }
1616 VecMisc2::Ucvtf => {
1617 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1618 (0b1, 0b11101, enc_size & 0b1)
1619 }
1620 VecMisc2::Frintn => {
1621 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1622 (0b0, 0b11000, enc_size & 0b01)
1623 }
1624 VecMisc2::Frintz => {
1625 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1626 (0b0, 0b11001, enc_size | 0b10)
1627 }
1628 VecMisc2::Frintm => {
1629 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1630 (0b0, 0b11001, enc_size & 0b01)
1631 }
1632 VecMisc2::Frintp => {
1633 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1634 (0b0, 0b11000, enc_size | 0b10)
1635 }
1636 VecMisc2::Cnt => {
1637 debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1638 (0b0, 0b00101, enc_size)
1639 }
1640 VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
1641 };
1642 sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
1643 }
1644 &Inst::VecLanes { op, rd, rn, size } => {
1645 let (q, size) = match size {
1646 VectorSize::Size8x8 => (0b0, 0b00),
1647 VectorSize::Size8x16 => (0b1, 0b00),
1648 VectorSize::Size16x4 => (0b0, 0b01),
1649 VectorSize::Size16x8 => (0b1, 0b01),
1650 VectorSize::Size32x4 => (0b1, 0b10),
1651 _ => unreachable!(),
1652 };
1653 let (u, opcode) = match op {
1654 VecLanesOp::Uminv => (0b1, 0b11010),
1655 VecLanesOp::Addv => (0b0, 0b11011),
1656 };
1657 sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
1658 }
1659 &Inst::VecShiftImm {
1660 op,
1661 rd,
1662 rn,
1663 size,
1664 imm,
1665 } => {
1666 let (is_shr, template) = match op {
1667 VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
1668 VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
1669 VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
1670 };
1671 let imm = imm as u32;
1672 // Deal with the somewhat strange encoding scheme for, and limits on,
1673 // the shift amount.
1674 let immh_immb = match (size, is_shr) {
1675 (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
1676 0b_1000_000_u32 | (64 - imm)
1677 }
1678 (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
1679 0b_0100_000_u32 | (32 - imm)
1680 }
1681 (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
1682 0b_0010_000_u32 | (16 - imm)
1683 }
1684 (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
1685 0b_0001_000_u32 | (8 - imm)
1686 }
1687 (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
1688 (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
1689 (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
1690 (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
1691 _ => panic!(
1692 "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
1693 op, size, imm
1694 ),
1695 };
1696 let rn_enc = machreg_to_vec(rn);
1697 let rd_enc = machreg_to_vec(rd.to_reg());
1698 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
1699 }
1700 &Inst::VecExtract { rd, rn, rm, imm4 } => {
1701 if imm4 < 16 {
1702 let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
1703 let rm_enc = machreg_to_vec(rm);
1704 let rn_enc = machreg_to_vec(rn);
1705 let rd_enc = machreg_to_vec(rd.to_reg());
1706 sink.put4(
1707 template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
1708 );
1709 } else {
1710 panic!(
1711 "aarch64: Inst::VecExtract: emit: invalid extract index {}",
1712 imm4
1713 );
1714 }
1715 }
1716 &Inst::VecTbl {
1717 rd,
1718 rn,
1719 rm,
1720 is_extension,
1721 } => {
1722 sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
1723 }
1724 &Inst::VecTbl2 {
1725 rd,
1726 rn,
1727 rn2,
1728 rm,
1729 is_extension,
1730 } => {
1731 assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
1732 sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
1733 }
1734 &Inst::FpuCmp32 { rn, rm } => {
1735 sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
1736 }
1737 &Inst::FpuCmp64 { rn, rm } => {
1738 sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
1739 }
1740 &Inst::FpuToInt { op, rd, rn } => {
1741 let top16 = match op {
1742 // FCVTZS (32/32-bit)
1743 FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
1744 // FCVTZU (32/32-bit)
1745 FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
1746 // FCVTZS (32/64-bit)
1747 FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
1748 // FCVTZU (32/64-bit)
1749 FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
1750 // FCVTZS (64/32-bit)
1751 FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
1752 // FCVTZU (64/32-bit)
1753 FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
1754 // FCVTZS (64/64-bit)
1755 FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
1756 // FCVTZU (64/64-bit)
1757 FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
1758 };
1759 sink.put4(enc_fputoint(top16, rd, rn));
1760 }
1761 &Inst::IntToFpu { op, rd, rn } => {
1762 let top16 = match op {
1763 // SCVTF (32/32-bit)
1764 IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
1765 // UCVTF (32/32-bit)
1766 IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
1767 // SCVTF (64/32-bit)
1768 IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
1769 // UCVTF (64/32-bit)
1770 IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
1771 // SCVTF (32/64-bit)
1772 IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
1773 // UCVTF (32/64-bit)
1774 IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
1775 // SCVTF (64/64-bit)
1776 IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
1777 // UCVTF (64/64-bit)
1778 IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
1779 };
1780 sink.put4(enc_inttofpu(top16, rd, rn));
1781 }
1782 &Inst::LoadFpuConst64 { rd, const_data } => {
1783 let inst = Inst::FpuLoad64 {
1784 rd,
1785 mem: AMode::Label(MemLabel::PCRel(8)),
1786 flags: MemFlags::trusted(),
1787 };
1788 inst.emit(sink, emit_info, state);
1789 let inst = Inst::Jump {
1790 dest: BranchTarget::ResolvedOffset(12),
1791 };
1792 inst.emit(sink, emit_info, state);
1793 sink.put8(const_data);
1794 }
1795 &Inst::LoadFpuConst128 { rd, const_data } => {
1796 let inst = Inst::FpuLoad128 {
1797 rd,
1798 mem: AMode::Label(MemLabel::PCRel(8)),
1799 flags: MemFlags::trusted(),
1800 };
1801 inst.emit(sink, emit_info, state);
1802 let inst = Inst::Jump {
1803 dest: BranchTarget::ResolvedOffset(20),
1804 };
1805 inst.emit(sink, emit_info, state);
1806
1807 for i in const_data.to_le_bytes().iter() {
1808 sink.put1(*i);
1809 }
1810 }
1811 &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1812 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
1813 }
1814 &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1815 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
1816 }
1817 &Inst::FpuRound { op, rd, rn } => {
1818 let top22 = match op {
1819 FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
1820 FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
1821 FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
1822 FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
1823 FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
1824 FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
1825 FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
1826 FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
1827 };
1828 sink.put4(enc_fround(top22, rd, rn));
1829 }
1830 &Inst::MovToFpu { rd, rn, size } => {
1831 let template = match size {
1832 ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
1833 ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
1834 _ => unreachable!(),
1835 };
1836 sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
1837 }
1838 &Inst::MovToVec { rd, rn, idx, size } => {
1839 let (imm5, shift) = match size.lane_size() {
1840 ScalarSize::Size8 => (0b00001, 1),
1841 ScalarSize::Size16 => (0b00010, 2),
1842 ScalarSize::Size32 => (0b00100, 3),
1843 ScalarSize::Size64 => (0b01000, 4),
1844 _ => unreachable!(),
1845 };
1846 debug_assert_eq!(idx & (0b11111 >> shift), idx);
1847 let imm5 = imm5 | ((idx as u32) << shift);
1848 sink.put4(
1849 0b010_01110000_00000_0_0011_1_00000_00000
1850 | (imm5 << 16)
1851 | (machreg_to_gpr(rn) << 5)
1852 | machreg_to_vec(rd.to_reg()),
1853 );
1854 }
1855 &Inst::MovFromVec { rd, rn, idx, size } => {
1856 let (q, imm5, shift, mask) = match size {
1857 VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
1858 VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
1859 VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
1860 VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
1861 _ => unreachable!(),
1862 };
1863 debug_assert_eq!(idx & mask, idx);
1864 let imm5 = imm5 | ((idx as u32) << shift);
1865 sink.put4(
1866 0b000_01110000_00000_0_0111_1_00000_00000
1867 | (q << 30)
1868 | (imm5 << 16)
1869 | (machreg_to_vec(rn) << 5)
1870 | machreg_to_gpr(rd.to_reg()),
1871 );
1872 }
1873 &Inst::MovFromVecSigned {
1874 rd,
1875 rn,
1876 idx,
1877 size,
1878 scalar_size,
1879 } => {
1880 let (imm5, shift, half) = match size {
1881 VectorSize::Size8x8 => (0b00001, 1, true),
1882 VectorSize::Size8x16 => (0b00001, 1, false),
1883 VectorSize::Size16x4 => (0b00010, 2, true),
1884 VectorSize::Size16x8 => (0b00010, 2, false),
1885 VectorSize::Size32x2 => {
1886 debug_assert_ne!(scalar_size, OperandSize::Size32);
1887 (0b00100, 3, true)
1888 }
1889 VectorSize::Size32x4 => {
1890 debug_assert_ne!(scalar_size, OperandSize::Size32);
1891 (0b00100, 3, false)
1892 }
1893 _ => panic!("Unexpected vector operand size"),
1894 };
1895 debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
1896 let imm5 = imm5 | ((idx as u32) << shift);
1897 sink.put4(
1898 0b000_01110000_00000_0_0101_1_00000_00000
1899 | (scalar_size.is64() as u32) << 30
1900 | (imm5 << 16)
1901 | (machreg_to_vec(rn) << 5)
1902 | machreg_to_gpr(rd.to_reg()),
1903 );
1904 }
1905 &Inst::VecDup { rd, rn, size } => {
1906 let imm5 = match size {
1907 VectorSize::Size8x16 => 0b00001,
1908 VectorSize::Size16x8 => 0b00010,
1909 VectorSize::Size32x4 => 0b00100,
1910 VectorSize::Size64x2 => 0b01000,
1911 _ => unimplemented!(),
1912 };
1913 sink.put4(
1914 0b010_01110000_00000_000011_00000_00000
1915 | (imm5 << 16)
1916 | (machreg_to_gpr(rn) << 5)
1917 | machreg_to_vec(rd.to_reg()),
1918 );
1919 }
1920 &Inst::VecDupFromFpu { rd, rn, size } => {
1921 let imm5 = match size {
1922 VectorSize::Size32x4 => 0b00100,
1923 VectorSize::Size64x2 => 0b01000,
1924 _ => unimplemented!(),
1925 };
1926 sink.put4(
1927 0b010_01110000_00000_000001_00000_00000
1928 | (imm5 << 16)
1929 | (machreg_to_vec(rn) << 5)
1930 | machreg_to_vec(rd.to_reg()),
1931 );
1932 }
1933 &Inst::VecDupFPImm { rd, imm, size } => {
1934 let imm = imm.enc_bits();
1935 let op = match size.lane_size() {
1936 ScalarSize::Size32 => 0,
1937 ScalarSize::Size64 => 1,
1938 _ => unimplemented!(),
1939 };
1940 let q_op = op | ((size.is_128bits() as u32) << 1);
1941
1942 sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
1943 }
1944 &Inst::VecDupImm {
1945 rd,
1946 imm,
1947 invert,
1948 size,
1949 } => {
1950 let (imm, shift, shift_ones) = imm.value();
1951 let (op, cmode) = match size.lane_size() {
1952 ScalarSize::Size8 => {
1953 assert!(!invert);
1954 assert_eq!(shift, 0);
1955
1956 (0, 0b1110)
1957 }
1958 ScalarSize::Size16 => {
1959 let s = shift & 8;
1960
1961 assert!(!shift_ones);
1962 assert_eq!(s, shift);
1963
1964 (invert as u32, 0b1000 | (s >> 2))
1965 }
1966 ScalarSize::Size32 => {
1967 if shift_ones {
1968 assert!(shift == 8 || shift == 16);
1969
1970 (invert as u32, 0b1100 | (shift >> 4))
1971 } else {
1972 let s = shift & 24;
1973
1974 assert_eq!(s, shift);
1975
1976 (invert as u32, 0b0000 | (s >> 2))
1977 }
1978 }
1979 ScalarSize::Size64 => {
1980 assert!(!invert);
1981 assert_eq!(shift, 0);
1982
1983 (1, 0b1110)
1984 }
1985 _ => unreachable!(),
1986 };
1987 let q_op = op | ((size.is_128bits() as u32) << 1);
1988
1989 sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
1990 }
1991 &Inst::VecExtend {
1992 t,
1993 rd,
1994 rn,
1995 high_half,
1996 } => {
1997 let (u, immh) = match t {
1998 VecExtendOp::Sxtl8 => (0b0, 0b001),
1999 VecExtendOp::Sxtl16 => (0b0, 0b010),
2000 VecExtendOp::Sxtl32 => (0b0, 0b100),
2001 VecExtendOp::Uxtl8 => (0b1, 0b001),
2002 VecExtendOp::Uxtl16 => (0b1, 0b010),
2003 VecExtendOp::Uxtl32 => (0b1, 0b100),
2004 };
2005 sink.put4(
2006 0b000_011110_0000_000_101001_00000_00000
2007 | ((high_half as u32) << 30)
2008 | (u << 29)
2009 | (immh << 19)
2010 | (machreg_to_vec(rn) << 5)
2011 | machreg_to_vec(rd.to_reg()),
2012 );
2013 }
2014 &Inst::VecMiscNarrow {
2015 op,
2016 rd,
2017 rn,
2018 size,
2019 high_half,
2020 } => {
2021 let size = match size.lane_size() {
2022 ScalarSize::Size8 => 0b00,
2023 ScalarSize::Size16 => 0b01,
2024 ScalarSize::Size32 => 0b10,
2025 _ => panic!("Unexpected vector operand lane size!"),
2026 };
2027 let (u, bits_12_16) = match op {
2028 VecMiscNarrowOp::Xtn => (0b0, 0b10010),
2029 VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
2030 VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
2031 };
2032 sink.put4(enc_vec_rr_misc(
2033 ((high_half as u32) << 1) | u,
2034 size,
2035 bits_12_16,
2036 rd,
2037 rn,
2038 ));
2039 }
2040 &Inst::VecMovElement {
2041 rd,
2042 rn,
2043 dest_idx,
2044 src_idx,
2045 size,
2046 } => {
2047 let (imm5, shift) = match size.lane_size() {
2048 ScalarSize::Size8 => (0b00001, 1),
2049 ScalarSize::Size16 => (0b00010, 2),
2050 ScalarSize::Size32 => (0b00100, 3),
2051 ScalarSize::Size64 => (0b01000, 4),
2052 _ => unreachable!(),
2053 };
2054 let mask = 0b11111 >> shift;
2055 debug_assert_eq!(dest_idx & mask, dest_idx);
2056 debug_assert_eq!(src_idx & mask, src_idx);
2057 let imm4 = (src_idx as u32) << (shift - 1);
2058 let imm5 = imm5 | ((dest_idx as u32) << shift);
2059 sink.put4(
2060 0b011_01110000_00000_0_0000_1_00000_00000
2061 | (imm5 << 16)
2062 | (imm4 << 11)
2063 | (machreg_to_vec(rn) << 5)
2064 | machreg_to_vec(rd.to_reg()),
2065 );
2066 }
2067 &Inst::VecRRPair { op, rd, rn } => {
2068 let bits_12_16 = match op {
2069 VecPairOp::Addp => 0b11011,
2070 };
2071
2072 sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2073 }
2074 &Inst::VecRRR {
2075 rd,
2076 rn,
2077 rm,
2078 alu_op,
2079 size,
2080 } => {
2081 let (q, enc_size) = size.enc_size();
2082 let is_float = match alu_op {
2083 VecALUOp::Fcmeq
2084 | VecALUOp::Fcmgt
2085 | VecALUOp::Fcmge
2086 | VecALUOp::Fadd
2087 | VecALUOp::Fsub
2088 | VecALUOp::Fdiv
2089 | VecALUOp::Fmax
2090 | VecALUOp::Fmin
2091 | VecALUOp::Fmul => true,
2092 _ => false,
2093 };
2094 let enc_float_size = match (is_float, size) {
2095 (true, VectorSize::Size32x2) => 0b0,
2096 (true, VectorSize::Size32x4) => 0b0,
2097 (true, VectorSize::Size64x2) => 0b1,
2098 (true, _) => unimplemented!(),
2099 _ => 0,
2100 };
2101
2102 let (top11, bit15_10) = match alu_op {
2103 VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2104 VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2105 VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2106 VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2107 VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2108 VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2109 VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2110 VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2111 VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2112 VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2113 VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2114 VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2115 // The following logical instructions operate on bytes, so are not encoded differently
2116 // for the different vector types.
2117 VecALUOp::And => (0b000_01110_00_1, 0b000111),
2118 VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2119 VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2120 VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2121 VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
2122 VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
2123 VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2124 VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2125 VecALUOp::Mul => {
2126 debug_assert_ne!(size, VectorSize::Size64x2);
2127 (0b000_01110_00_1 | enc_size << 1, 0b100111)
2128 }
2129 VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2130 VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2131 VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
2132 VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
2133 VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
2134 VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
2135 VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
2136 VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2137 VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2138 VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2139 VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2140 VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2141 VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2142 VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2143 VecALUOp::Umlal => {
2144 debug_assert!(!size.is_128bits());
2145 (0b001_01110_00_1 | enc_size << 1, 0b100000)
2146 }
2147 VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2148 VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
2149 VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
2150 };
2151 let top11 = match alu_op {
2152 VecALUOp::Smull | VecALUOp::Smull2 => top11,
2153 _ if is_float => top11 | (q << 9) | enc_float_size << 1,
2154 _ => top11 | (q << 9),
2155 };
2156 sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2157 }
2158 &Inst::VecLoadReplicate { rd, rn, size } => {
2159 let (q, size) = size.enc_size();
2160
2161 let srcloc = state.cur_srcloc();
2162 if srcloc != SourceLoc::default() {
2163 // Register the offset at which the actual load instruction starts.
2164 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
2165 }
2166
2167 sink.put4(enc_ldst_vec(q, size, rn, rd));
2168 }
2169 &Inst::VecCSel { rd, rn, rm, cond } => {
2170 /* Emit this:
2171 b.cond else
2172 mov rd, rm
2173 b out
2174 else:
2175 mov rd, rn
2176 out:
2177
2178 Note, we could do better in the cases where rd == rn or rd == rm.
2179 */
2180 let else_label = sink.get_label();
2181 let out_label = sink.get_label();
2182
2183 // b.cond else
2184 let br_else_offset = sink.cur_offset();
2185 sink.put4(enc_conditional_br(
2186 BranchTarget::Label(else_label),
2187 CondBrKind::Cond(cond),
2188 ));
2189 sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2190
2191 // mov rd, rm
2192 sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2193
2194 // b out
2195 let b_out_offset = sink.cur_offset();
2196 sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2197 sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2198 sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2199
2200 // else:
2201 sink.bind_label(else_label);
2202
2203 // mov rd, rn
2204 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2205
2206 // out:
2207 sink.bind_label(out_label);
2208 }
2209 &Inst::MovToNZCV { rn } => {
2210 sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2211 }
2212 &Inst::MovFromNZCV { rd } => {
2213 sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2214 }
2215 &Inst::Extend {
2216 rd,
2217 rn,
2218 signed: false,
2219 from_bits: 1,
2220 to_bits,
2221 } => {
2222 assert!(to_bits <= 64);
2223 // Reduce zero-extend-from-1-bit to:
2224 // - and rd, rn, #1
2225 // Note: This is special cased as UBFX may take more cycles
2226 // than AND on smaller cores.
2227 let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2228 Inst::AluRRImmLogic {
2229 alu_op: ALUOp::And32,
2230 rd,
2231 rn,
2232 imml,
2233 }
2234 .emit(sink, emit_info, state);
2235 }
2236 &Inst::Extend {
2237 rd,
2238 rn,
2239 signed: false,
2240 from_bits: 32,
2241 to_bits: 64,
2242 } => {
2243 let mov = Inst::Mov32 { rd, rm: rn };
2244 mov.emit(sink, emit_info, state);
2245 }
2246 &Inst::Extend {
2247 rd,
2248 rn,
2249 signed,
2250 from_bits,
2251 to_bits,
2252 } => {
2253 let (opc, size) = if signed {
2254 (0b00, OperandSize::from_bits(to_bits))
2255 } else {
2256 (0b10, OperandSize::Size32)
2257 };
2258 sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2259 }
2260 &Inst::Jump { ref dest } => {
2261 let off = sink.cur_offset();
2262 // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2263 if let Some(l) = dest.as_label() {
2264 sink.use_label_at_offset(off, l, LabelUse::Branch26);
2265 sink.add_uncond_branch(off, off + 4, l);
2266 }
2267 // Emit the jump itself.
2268 sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2269 }
2270 &Inst::Ret => {
2271 sink.put4(0xd65f03c0);
2272 }
2273 &Inst::EpiloguePlaceholder => {
2274 // Noop; this is just a placeholder for epilogues.
2275 }
2276 &Inst::Call { ref info } => {
2277 if let Some(s) = state.take_stack_map() {
2278 sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2279 }
2280 let loc = state.cur_srcloc();
2281 sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0);
2282 sink.put4(enc_jump26(0b100101, 0));
2283 if info.opcode.is_call() {
2284 sink.add_call_site(loc, info.opcode);
2285 }
2286 }
2287 &Inst::CallInd { ref info } => {
2288 if let Some(s) = state.take_stack_map() {
2289 sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2290 }
2291 sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
2292 let loc = state.cur_srcloc();
2293 if info.opcode.is_call() {
2294 sink.add_call_site(loc, info.opcode);
2295 }
2296 }
2297 &Inst::CondBr {
2298 taken,
2299 not_taken,
2300 kind,
2301 } => {
2302 // Conditional part first.
2303 let cond_off = sink.cur_offset();
2304 if let Some(l) = taken.as_label() {
2305 sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
2306 let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
2307 sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
2308 }
2309 sink.put4(enc_conditional_br(taken, kind));
2310
2311 // Unconditional part next.
2312 let uncond_off = sink.cur_offset();
2313 if let Some(l) = not_taken.as_label() {
2314 sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
2315 sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
2316 }
2317 sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
2318 }
2319 &Inst::TrapIf { kind, trap_code } => {
2320 // condbr KIND, LABEL
2321 let off = sink.cur_offset();
2322 let label = sink.get_label();
2323 sink.put4(enc_conditional_br(
2324 BranchTarget::Label(label),
2325 kind.invert(),
2326 ));
2327 sink.use_label_at_offset(off, label, LabelUse::Branch19);
2328 // udf
2329 let trap = Inst::Udf { trap_code };
2330 trap.emit(sink, emit_info, state);
2331 // LABEL:
2332 sink.bind_label(label);
2333 }
2334 &Inst::IndirectBr { rn, .. } => {
2335 sink.put4(enc_br(rn));
2336 }
2337 &Inst::Nop0 => {}
2338 &Inst::Nop4 => {
2339 sink.put4(0xd503201f);
2340 }
2341 &Inst::Brk => {
2342 sink.put4(0xd4200000);
2343 }
2344 &Inst::Udf { trap_code } => {
2345 let srcloc = state.cur_srcloc();
2346 sink.add_trap(srcloc, trap_code);
2347 if let Some(s) = state.take_stack_map() {
2348 sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2349 }
2350 sink.put4(0xd4a00000);
2351 }
2352 &Inst::Adr { rd, off } => {
2353 assert!(off > -(1 << 20));
2354 assert!(off < (1 << 20));
2355 sink.put4(enc_adr(off, rd));
2356 }
2357 &Inst::Word4 { data } => {
2358 sink.put4(data);
2359 }
2360 &Inst::Word8 { data } => {
2361 sink.put8(data);
2362 }
2363 &Inst::JTSequence {
2364 ridx,
2365 rtmp1,
2366 rtmp2,
2367 ref info,
2368 ..
2369 } => {
2370 // This sequence is *one* instruction in the vcode, and is expanded only here at
2371 // emission time, because we cannot allow the regalloc to insert spills/reloads in
2372 // the middle; we depend on hardcoded PC-rel addressing below.
2373
2374 // Branch to default when condition code from prior comparison indicates.
2375 let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
2376 // No need to inform the sink's branch folding logic about this branch, because it
2377 // will not be merged with any other branch, flipped, or elided (it is not preceded
2378 // or succeeded by any other branch). Just emit it with the label use.
2379 let default_br_offset = sink.cur_offset();
2380 if let BranchTarget::Label(l) = info.default_target {
2381 sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
2382 }
2383 sink.put4(br);
2384
2385 // Save index in a tmp (the live range of ridx only goes to start of this
2386 // sequence; rtmp1 or rtmp2 may overwrite it).
2387 let inst = Inst::gen_move(rtmp2, ridx, I64);
2388 inst.emit(sink, emit_info, state);
2389 // Load address of jump table
2390 let inst = Inst::Adr { rd: rtmp1, off: 16 };
2391 inst.emit(sink, emit_info, state);
2392 // Load value out of jump table
2393 let inst = Inst::SLoad32 {
2394 rd: rtmp2,
2395 mem: AMode::reg_plus_reg_scaled_extended(
2396 rtmp1.to_reg(),
2397 rtmp2.to_reg(),
2398 I32,
2399 ExtendOp::UXTW,
2400 ),
2401 flags: MemFlags::trusted(),
2402 };
2403 inst.emit(sink, emit_info, state);
2404 // Add base of jump table to jump-table-sourced block offset
2405 let inst = Inst::AluRRR {
2406 alu_op: ALUOp::Add64,
2407 rd: rtmp1,
2408 rn: rtmp1.to_reg(),
2409 rm: rtmp2.to_reg(),
2410 };
2411 inst.emit(sink, emit_info, state);
2412 // Branch to computed address. (`targets` here is only used for successor queries
2413 // and is not needed for emission.)
2414 let inst = Inst::IndirectBr {
2415 rn: rtmp1.to_reg(),
2416 targets: vec![],
2417 };
2418 inst.emit(sink, emit_info, state);
2419 // Emit jump table (table of 32-bit offsets).
2420 let jt_off = sink.cur_offset();
2421 for &target in info.targets.iter() {
2422 let word_off = sink.cur_offset();
2423 // off_into_table is an addend here embedded in the label to be later patched
2424 // at the end of codegen. The offset is initially relative to this jump table
2425 // entry; with the extra addend, it'll be relative to the jump table's start,
2426 // after patching.
2427 let off_into_table = word_off - jt_off;
2428 sink.use_label_at_offset(
2429 word_off,
2430 target.as_label().unwrap(),
2431 LabelUse::PCRel32,
2432 );
2433 sink.put4(off_into_table);
2434 }
2435
2436 // Lowering produces an EmitIsland before using a JTSequence, so we can safely
2437 // disable the worst-case-size check in this case.
2438 start_off = sink.cur_offset();
2439 }
2440 &Inst::LoadExtName {
2441 rd,
2442 ref name,
2443 offset,
2444 } => {
2445 let inst = Inst::ULoad64 {
2446 rd,
2447 mem: AMode::Label(MemLabel::PCRel(8)),
2448 flags: MemFlags::trusted(),
2449 };
2450 inst.emit(sink, emit_info, state);
2451 let inst = Inst::Jump {
2452 dest: BranchTarget::ResolvedOffset(12),
2453 };
2454 inst.emit(sink, emit_info, state);
2455 let srcloc = state.cur_srcloc();
2456 sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
2457 if emit_info.flags().emit_all_ones_funcaddrs() {
2458 sink.put8(u64::max_value());
2459 } else {
2460 sink.put8(0);
2461 }
2462 }
2463 &Inst::LoadAddr { rd, ref mem } => {
2464 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
2465 for inst in mem_insts.into_iter() {
2466 inst.emit(sink, emit_info, state);
2467 }
2468
2469 let (reg, index_reg, offset) = match mem {
2470 AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
2471 AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
2472 AMode::UnsignedOffset(r, uimm12scaled) => {
2473 (r, None, uimm12scaled.value() as i32)
2474 }
2475 _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
2476 };
2477 let abs_offset = if offset < 0 {
2478 -offset as u64
2479 } else {
2480 offset as u64
2481 };
2482 let alu_op = if offset < 0 {
2483 ALUOp::Sub64
2484 } else {
2485 ALUOp::Add64
2486 };
2487
2488 if let Some((idx, extendop)) = index_reg {
2489 let add = Inst::AluRRRExtend {
2490 alu_op: ALUOp::Add64,
2491 rd,
2492 rn: reg,
2493 rm: idx,
2494 extendop,
2495 };
2496
2497 add.emit(sink, emit_info, state);
2498 } else if offset == 0 {
2499 if reg != rd.to_reg() {
2500 let mov = Inst::Mov64 { rd, rm: reg };
2501
2502 mov.emit(sink, emit_info, state);
2503 }
2504 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2505 let add = Inst::AluRRImm12 {
2506 alu_op,
2507 rd,
2508 rn: reg,
2509 imm12,
2510 };
2511 add.emit(sink, emit_info, state);
2512 } else {
2513 // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
2514 // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
2515 // that no other instructions will be inserted here (we're emitting directly),
2516 // and a live range of `tmp2` should not span this instruction, so this use
2517 // should otherwise be correct.
2518 debug_assert!(rd.to_reg() != tmp2_reg());
2519 debug_assert!(reg != tmp2_reg());
2520 let tmp = writable_tmp2_reg();
2521 for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
2522 insn.emit(sink, emit_info, state);
2523 }
2524 let add = Inst::AluRRR {
2525 alu_op,
2526 rd,
2527 rn: reg,
2528 rm: tmp.to_reg(),
2529 };
2530 add.emit(sink, emit_info, state);
2531 }
2532 }
2533 &Inst::VirtualSPOffsetAdj { offset } => {
2534 debug!(
2535 "virtual sp offset adjusted by {} -> {}",
2536 offset,
2537 state.virtual_sp_offset + offset,
2538 );
2539 state.virtual_sp_offset += offset;
2540 }
2541 &Inst::EmitIsland { needed_space } => {
2542 if sink.island_needed(needed_space + 4) {
2543 let jump_around_label = sink.get_label();
2544 let jmp = Inst::Jump {
2545 dest: BranchTarget::Label(jump_around_label),
2546 };
2547 jmp.emit(sink, emit_info, state);
2548 sink.emit_island();
2549 sink.bind_label(jump_around_label);
2550 }
2551 }
2552 &Inst::ValueLabelMarker { .. } => {
2553 // Nothing; this is only used to compute debug info.
2554 }
2555
2556 &Inst::Unwind { ref inst } => {
2557 sink.add_unwind(inst.clone());
2558 }
2559 }
2560
2561 let end_off = sink.cur_offset();
2562 debug_assert!((end_off - start_off) <= Inst::worst_case_size());
2563
2564 state.clear_post_insn();
2565 }
2566
pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String2567 fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
2568 self.print_with_state(mb_rru, state)
2569 }
2570 }
2571