1 //! AArch64 ISA: binary code emission.
2
3 use crate::binemit::{CodeOffset, Reloc, StackMap};
4 use crate::ir::constant::ConstantData;
5 use crate::ir::types::*;
6 use crate::ir::{LibCall, MemFlags, TrapCode};
7 use crate::isa::aarch64::inst::*;
8 use crate::machinst::ty_bits;
9
10 use regalloc::{Reg, RegClass, Writable};
11
12 use core::convert::TryFrom;
13
14 /// Memory label/reference finalization: convert a MemLabel to a PC-relative
15 /// offset, possibly emitting relocation(s) as necessary.
memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i3216 pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
17 match label {
18 &MemLabel::PCRel(rel) => rel,
19 }
20 }
21
22 /// Memory addressing mode finalization: convert "special" modes (e.g.,
23 /// generic arbitrary stack offset) into real addressing modes, possibly by
24 /// emitting some helper instructions that come immediately before the use
25 /// of this amode.
mem_finalize( insn_off: CodeOffset, mem: &AMode, state: &EmitState, ) -> (SmallVec<[Inst; 4]>, AMode)26 pub fn mem_finalize(
27 insn_off: CodeOffset,
28 mem: &AMode,
29 state: &EmitState,
30 ) -> (SmallVec<[Inst; 4]>, AMode) {
31 match mem {
32 &AMode::RegOffset(_, off, ty)
33 | &AMode::SPOffset(off, ty)
34 | &AMode::FPOffset(off, ty)
35 | &AMode::NominalSPOffset(off, ty) => {
36 let basereg = match mem {
37 &AMode::RegOffset(reg, _, _) => reg,
38 &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(),
39 &AMode::FPOffset(..) => fp_reg(),
40 _ => unreachable!(),
41 };
42 let adj = match mem {
43 &AMode::NominalSPOffset(..) => {
44 log::trace!(
45 "mem_finalize: nominal SP offset {} + adj {} -> {}",
46 off,
47 state.virtual_sp_offset,
48 off + state.virtual_sp_offset
49 );
50 state.virtual_sp_offset
51 }
52 _ => 0,
53 };
54 let off = off + adj;
55
56 if let Some(simm9) = SImm9::maybe_from_i64(off) {
57 let mem = AMode::Unscaled(basereg, simm9);
58 (smallvec![], mem)
59 } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
60 let mem = AMode::UnsignedOffset(basereg, uimm12s);
61 (smallvec![], mem)
62 } else {
63 let tmp = writable_spilltmp_reg();
64 let mut const_insts = Inst::load_constant(tmp, off as u64);
65 // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
66 // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
67 // is a valid base (for SPOffset) which we must handle here.
68 // Also, SP needs to be the first arg, not second.
69 let add_inst = Inst::AluRRRExtend {
70 alu_op: ALUOp::Add64,
71 rd: tmp,
72 rn: basereg,
73 rm: tmp.to_reg(),
74 extendop: ExtendOp::UXTX,
75 };
76 const_insts.push(add_inst);
77 (const_insts, AMode::reg(tmp.to_reg()))
78 }
79 }
80
81 &AMode::Label(ref label) => {
82 let off = memlabel_finalize(insn_off, label);
83 (smallvec![], AMode::Label(MemLabel::PCRel(off)))
84 }
85
86 _ => (smallvec![], mem.clone()),
87 }
88 }
89
90 /// Helper: get a ConstantData from a u64.
u64_constant(bits: u64) -> ConstantData91 pub fn u64_constant(bits: u64) -> ConstantData {
92 let data = bits.to_le_bytes();
93 ConstantData::from(&data[..])
94 }
95
96 //=============================================================================
97 // Instructions and subcomponents: emission
98
machreg_to_gpr(m: Reg) -> u3299 fn machreg_to_gpr(m: Reg) -> u32 {
100 assert_eq!(m.get_class(), RegClass::I64);
101 u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
102 }
103
machreg_to_vec(m: Reg) -> u32104 fn machreg_to_vec(m: Reg) -> u32 {
105 assert_eq!(m.get_class(), RegClass::V128);
106 u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
107 }
108
machreg_to_gpr_or_vec(m: Reg) -> u32109 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
110 u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
111 }
112
enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32113 fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
114 (bits_31_21 << 21)
115 | (bits_15_10 << 10)
116 | machreg_to_gpr(rd.to_reg())
117 | (machreg_to_gpr(rn) << 5)
118 | (machreg_to_gpr(rm) << 16)
119 }
120
enc_arith_rr_imm12( bits_31_24: u32, immshift: u32, imm12: u32, rn: Reg, rd: Writable<Reg>, ) -> u32121 fn enc_arith_rr_imm12(
122 bits_31_24: u32,
123 immshift: u32,
124 imm12: u32,
125 rn: Reg,
126 rd: Writable<Reg>,
127 ) -> u32 {
128 (bits_31_24 << 24)
129 | (immshift << 22)
130 | (imm12 << 10)
131 | (machreg_to_gpr(rn) << 5)
132 | machreg_to_gpr(rd.to_reg())
133 }
134
enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32135 fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
136 (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
137 }
138
enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32139 fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
140 (top11 << 21)
141 | (machreg_to_gpr(rm) << 16)
142 | (bit15 << 15)
143 | (machreg_to_gpr(ra) << 10)
144 | (machreg_to_gpr(rn) << 5)
145 | machreg_to_gpr(rd.to_reg())
146 }
147
enc_jump26(op_31_26: u32, off_26_0: u32) -> u32148 fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
149 assert!(off_26_0 < (1 << 26));
150 (op_31_26 << 26) | off_26_0
151 }
152
enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32153 fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
154 assert!(off_18_0 < (1 << 19));
155 (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
156 }
157
enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32158 fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
159 assert!(off_18_0 < (1 << 19));
160 assert!(cond < (1 << 4));
161 (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
162 }
163
enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32164 fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
165 match kind {
166 CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
167 CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
168 CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
169 }
170 }
171
172 const MOVE_WIDE_FIXED: u32 = 0x12800000;
173
174 #[repr(u32)]
175 enum MoveWideOpcode {
176 MOVN = 0b00,
177 MOVZ = 0b10,
178 MOVK = 0b11,
179 }
180
enc_move_wide( op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize, ) -> u32181 fn enc_move_wide(
182 op: MoveWideOpcode,
183 rd: Writable<Reg>,
184 imm: MoveWideConst,
185 size: OperandSize,
186 ) -> u32 {
187 assert!(imm.shift <= 0b11);
188 MOVE_WIDE_FIXED
189 | size.sf_bit() << 31
190 | (op as u32) << 29
191 | u32::from(imm.shift) << 21
192 | u32::from(imm.bits) << 5
193 | machreg_to_gpr(rd.to_reg())
194 }
195
enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32196 fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
197 (op_31_22 << 22)
198 | (simm7.bits() << 15)
199 | (machreg_to_gpr(rt2) << 10)
200 | (machreg_to_gpr(rn) << 5)
201 | machreg_to_gpr(rt)
202 }
203
enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32204 fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
205 (op_31_22 << 22)
206 | (simm9.bits() << 12)
207 | (op_11_10 << 10)
208 | (machreg_to_gpr(rn) << 5)
209 | machreg_to_gpr_or_vec(rd)
210 }
211
enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32212 fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
213 (op_31_22 << 22)
214 | (0b1 << 24)
215 | (uimm12.bits() << 10)
216 | (machreg_to_gpr(rn) << 5)
217 | machreg_to_gpr_or_vec(rd)
218 }
219
enc_ldst_reg( op_31_22: u32, rn: Reg, rm: Reg, s_bit: bool, extendop: Option<ExtendOp>, rd: Reg, ) -> u32220 fn enc_ldst_reg(
221 op_31_22: u32,
222 rn: Reg,
223 rm: Reg,
224 s_bit: bool,
225 extendop: Option<ExtendOp>,
226 rd: Reg,
227 ) -> u32 {
228 let s_bit = if s_bit { 1 } else { 0 };
229 let extend_bits = match extendop {
230 Some(ExtendOp::UXTW) => 0b010,
231 Some(ExtendOp::SXTW) => 0b110,
232 Some(ExtendOp::SXTX) => 0b111,
233 None => 0b011, // LSL
234 _ => panic!("bad extend mode for ld/st AMode"),
235 };
236 (op_31_22 << 22)
237 | (1 << 21)
238 | (machreg_to_gpr(rm) << 16)
239 | (extend_bits << 13)
240 | (s_bit << 12)
241 | (0b10 << 10)
242 | (machreg_to_gpr(rn) << 5)
243 | machreg_to_gpr_or_vec(rd)
244 }
245
enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32246 fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
247 (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
248 }
249
enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32250 fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
251 debug_assert_eq!(q & 0b1, q);
252 debug_assert_eq!(size & 0b11, size);
253 0b0_0_0011010_10_00000_110_0_00_00000_00000
254 | q << 30
255 | size << 10
256 | machreg_to_gpr(rn) << 5
257 | machreg_to_vec(rt.to_reg())
258 }
259
enc_ldst_vec_pair( opc: u32, amode: u32, is_load: bool, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg, ) -> u32260 fn enc_ldst_vec_pair(
261 opc: u32,
262 amode: u32,
263 is_load: bool,
264 simm7: SImm7Scaled,
265 rn: Reg,
266 rt: Reg,
267 rt2: Reg,
268 ) -> u32 {
269 debug_assert_eq!(opc & 0b11, opc);
270 debug_assert_eq!(amode & 0b11, amode);
271
272 0b00_10110_00_0_0000000_00000_00000_00000
273 | opc << 30
274 | amode << 23
275 | (is_load as u32) << 22
276 | simm7.bits() << 15
277 | machreg_to_vec(rt2) << 10
278 | machreg_to_gpr(rn) << 5
279 | machreg_to_vec(rt)
280 }
281
enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32282 fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
283 (top11 << 21)
284 | (machreg_to_vec(rm) << 16)
285 | (bit15_10 << 10)
286 | (machreg_to_vec(rn) << 5)
287 | machreg_to_vec(rd.to_reg())
288 }
289
enc_vec_rrr_long( q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>, ) -> u32290 fn enc_vec_rrr_long(
291 q: u32,
292 u: u32,
293 size: u32,
294 bit14: u32,
295 rm: Reg,
296 rn: Reg,
297 rd: Writable<Reg>,
298 ) -> u32 {
299 debug_assert_eq!(q & 0b1, q);
300 debug_assert_eq!(u & 0b1, u);
301 debug_assert_eq!(size & 0b11, size);
302 debug_assert_eq!(bit14 & 0b1, bit14);
303
304 0b0_0_0_01110_00_1_00000_100000_00000_00000
305 | q << 30
306 | u << 29
307 | size << 22
308 | bit14 << 14
309 | (machreg_to_vec(rm) << 16)
310 | (machreg_to_vec(rn) << 5)
311 | machreg_to_vec(rd.to_reg())
312 }
313
enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32314 fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
315 (0b01011010110 << 21)
316 | size << 31
317 | opcode2 << 16
318 | opcode1 << 10
319 | machreg_to_gpr(rn) << 5
320 | machreg_to_gpr(rd.to_reg())
321 }
322
enc_br(rn: Reg) -> u32323 fn enc_br(rn: Reg) -> u32 {
324 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
325 }
326
enc_adr(off: i32, rd: Writable<Reg>) -> u32327 fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
328 let off = u32::try_from(off).unwrap();
329 let immlo = off & 3;
330 let immhi = (off >> 2) & ((1 << 19) - 1);
331 (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
332 }
333
enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32334 fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
335 0b100_11010100_00000_0000_00_00000_00000
336 | (machreg_to_gpr(rm) << 16)
337 | (machreg_to_gpr(rn) << 5)
338 | machreg_to_gpr(rd.to_reg())
339 | (cond.bits() << 12)
340 }
341
enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32342 fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
343 0b000_11110_00_1_00000_0000_11_00000_00000
344 | (size.ftype() << 22)
345 | (machreg_to_vec(rm) << 16)
346 | (machreg_to_vec(rn) << 5)
347 | machreg_to_vec(rd.to_reg())
348 | (cond.bits() << 12)
349 }
350
enc_cset(rd: Writable<Reg>, cond: Cond) -> u32351 fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
352 0b100_11010100_11111_0000_01_11111_00000
353 | machreg_to_gpr(rd.to_reg())
354 | (cond.invert().bits() << 12)
355 }
356
enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32357 fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
358 0b110_11010100_11111_0000_00_11111_00000
359 | machreg_to_gpr(rd.to_reg())
360 | (cond.invert().bits() << 12)
361 }
362
enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32363 fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
364 0b0_1_1_11010010_00000_0000_10_00000_0_0000
365 | size.sf_bit() << 31
366 | imm.bits() << 16
367 | cond.bits() << 12
368 | machreg_to_gpr(rn) << 5
369 | nzcv.bits()
370 }
371
enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32372 fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
373 match size {
374 OperandSize::Size64 => {
375 debug_assert!(immr <= 63);
376 debug_assert!(imms <= 63);
377 }
378 OperandSize::Size32 => {
379 debug_assert!(immr <= 31);
380 debug_assert!(imms <= 31);
381 }
382 }
383 debug_assert_eq!(opc & 0b11, opc);
384 let n_bit = size.sf_bit();
385 0b0_00_100110_0_000000_000000_00000_00000
386 | size.sf_bit() << 31
387 | u32::from(opc) << 29
388 | n_bit << 22
389 | u32::from(immr) << 16
390 | u32::from(imms) << 10
391 | machreg_to_gpr(rn) << 5
392 | machreg_to_gpr(rd.to_reg())
393 }
394
enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32395 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
396 0b00001110_101_00000_00011_1_00000_00000
397 | ((is_16b as u32) << 30)
398 | machreg_to_vec(rd.to_reg())
399 | (machreg_to_vec(rn) << 16)
400 | (machreg_to_vec(rn) << 5)
401 }
402
enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32403 fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
404 (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
405 }
406
enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32407 fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
408 (top22 << 10)
409 | (machreg_to_vec(rm) << 16)
410 | (machreg_to_vec(rn) << 5)
411 | machreg_to_vec(rd.to_reg())
412 }
413
enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32414 fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
415 (top17 << 15)
416 | (machreg_to_vec(rm) << 16)
417 | (machreg_to_vec(ra) << 10)
418 | (machreg_to_vec(rn) << 5)
419 | machreg_to_vec(rd.to_reg())
420 }
421
enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32422 fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
423 0b000_11110_00_1_00000_00_1000_00000_00000
424 | (size.ftype() << 22)
425 | (machreg_to_vec(rm) << 16)
426 | (machreg_to_vec(rn) << 5)
427 }
428
enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32429 fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
430 (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
431 }
432
enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32433 fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
434 (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
435 }
436
enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32437 fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
438 (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
439 }
440
enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32441 fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
442 debug_assert_eq!(qu & 0b11, qu);
443 debug_assert_eq!(size & 0b11, size);
444 debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
445 let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
446 bits | qu << 29
447 | size << 22
448 | bits_12_16 << 12
449 | machreg_to_vec(rn) << 5
450 | machreg_to_vec(rd.to_reg())
451 }
452
enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32453 fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
454 debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
455
456 0b010_11110_11_11000_11011_10_00000_00000
457 | bits_12_16 << 12
458 | machreg_to_vec(rn) << 5
459 | machreg_to_vec(rd.to_reg())
460 }
461
enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32462 fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
463 debug_assert_eq!(u & 0b1, u);
464 debug_assert_eq!(enc_size & 0b1, enc_size);
465
466 0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
467 | u << 29
468 | enc_size << 22
469 | machreg_to_vec(rn) << 5
470 | machreg_to_vec(rd.to_reg())
471 }
472
enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32473 fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
474 debug_assert_eq!(q & 0b1, q);
475 debug_assert_eq!(u & 0b1, u);
476 debug_assert_eq!(size & 0b11, size);
477 debug_assert_eq!(opcode & 0b11111, opcode);
478 0b0_0_0_01110_00_11000_0_0000_10_00000_00000
479 | q << 30
480 | u << 29
481 | size << 22
482 | opcode << 12
483 | machreg_to_vec(rn) << 5
484 | machreg_to_vec(rd.to_reg())
485 }
486
enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32487 fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
488 debug_assert_eq!(len & 0b11, len);
489 0b0_1_001110_000_00000_0_00_0_00_00000_00000
490 | (machreg_to_vec(rm) << 16)
491 | len << 13
492 | (is_extension as u32) << 12
493 | (machreg_to_vec(rn) << 5)
494 | machreg_to_vec(rd.to_reg())
495 }
496
enc_dmb_ish() -> u32497 fn enc_dmb_ish() -> u32 {
498 0xD5033BBF
499 }
500
enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32501 fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
502 let sz = match ty {
503 I64 => 0b11,
504 I32 => 0b10,
505 I16 => 0b01,
506 I8 => 0b00,
507 _ => unreachable!(),
508 };
509 0b00_001000_1_1_0_11111_1_11111_00000_00000
510 | (sz << 30)
511 | (machreg_to_gpr(rn) << 5)
512 | machreg_to_gpr(rt.to_reg())
513 }
514
enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32515 fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
516 let sz = match ty {
517 I64 => 0b11,
518 I32 => 0b10,
519 I16 => 0b01,
520 I8 => 0b00,
521 _ => unreachable!(),
522 };
523 0b00_001000_100_11111_1_11111_00000_00000
524 | (sz << 30)
525 | (machreg_to_gpr(rn) << 5)
526 | machreg_to_gpr(rt)
527 }
528
enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32529 fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
530 let sz = match ty {
531 I64 => 0b11,
532 I32 => 0b10,
533 I16 => 0b01,
534 I8 => 0b00,
535 _ => unreachable!(),
536 };
537 0b00_001000_0_1_0_11111_1_11111_00000_00000
538 | (sz << 30)
539 | (machreg_to_gpr(rn) << 5)
540 | machreg_to_gpr(rt.to_reg())
541 }
542
enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32543 fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
544 let sz = match ty {
545 I64 => 0b11,
546 I32 => 0b10,
547 I16 => 0b01,
548 I8 => 0b00,
549 _ => unreachable!(),
550 };
551 0b00_001000_000_00000_1_11111_00000_00000
552 | (sz << 30)
553 | (machreg_to_gpr(rs.to_reg()) << 16)
554 | (machreg_to_gpr(rn) << 5)
555 | machreg_to_gpr(rt)
556 }
557
enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32558 fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
559 debug_assert_eq!(size & 0b11, size);
560
561 0b00_0010001_1_1_00000_1_11111_00000_00000
562 | size << 30
563 | machreg_to_gpr(rs.to_reg()) << 16
564 | machreg_to_gpr(rn) << 5
565 | machreg_to_gpr(rt)
566 }
567
enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32568 fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
569 let abc = (imm >> 5) as u32;
570 let defgh = (imm & 0b11111) as u32;
571
572 debug_assert_eq!(cmode & 0b1111, cmode);
573 debug_assert_eq!(q_op & 0b11, q_op);
574
575 0b0_0_0_0111100000_000_0000_01_00000_00000
576 | (q_op << 29)
577 | (abc << 16)
578 | (cmode << 12)
579 | (defgh << 5)
580 | machreg_to_vec(rd.to_reg())
581 }
582
583 /// State carried between emissions of a sequence of instructions.
584 #[derive(Default, Clone, Debug)]
585 pub struct EmitState {
586 /// Addend to convert nominal-SP offsets to real-SP offsets at the current
587 /// program point.
588 pub(crate) virtual_sp_offset: i64,
589 /// Offset of FP from nominal-SP.
590 pub(crate) nominal_sp_to_fp: i64,
591 /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
592 stack_map: Option<StackMap>,
593 /// Current source-code location corresponding to instruction to be emitted.
594 cur_srcloc: SourceLoc,
595 }
596
597 impl MachInstEmitState<Inst> for EmitState {
new(abi: &dyn ABICallee<I = Inst>) -> Self598 fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
599 EmitState {
600 virtual_sp_offset: 0,
601 nominal_sp_to_fp: abi.frame_size() as i64,
602 stack_map: None,
603 cur_srcloc: SourceLoc::default(),
604 }
605 }
606
pre_safepoint(&mut self, stack_map: StackMap)607 fn pre_safepoint(&mut self, stack_map: StackMap) {
608 self.stack_map = Some(stack_map);
609 }
610
pre_sourceloc(&mut self, srcloc: SourceLoc)611 fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
612 self.cur_srcloc = srcloc;
613 }
614 }
615
616 impl EmitState {
take_stack_map(&mut self) -> Option<StackMap>617 fn take_stack_map(&mut self) -> Option<StackMap> {
618 self.stack_map.take()
619 }
620
clear_post_insn(&mut self)621 fn clear_post_insn(&mut self) {
622 self.stack_map = None;
623 }
624
cur_srcloc(&self) -> SourceLoc625 fn cur_srcloc(&self) -> SourceLoc {
626 self.cur_srcloc
627 }
628 }
629
630 /// Constant state used during function compilation.
631 pub struct EmitInfo(settings::Flags);
632
633 impl EmitInfo {
new(flags: settings::Flags) -> Self634 pub(crate) fn new(flags: settings::Flags) -> Self {
635 Self(flags)
636 }
637 }
638
639 impl MachInstEmitInfo for EmitInfo {
flags(&self) -> &settings::Flags640 fn flags(&self) -> &settings::Flags {
641 &self.0
642 }
643 }
644
645 impl MachInstEmit for Inst {
646 type State = EmitState;
647 type Info = EmitInfo;
648
emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState)649 fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
650 // N.B.: we *must* not exceed the "worst-case size" used to compute
651 // where to insert islands, except when islands are explicitly triggered
652 // (with an `EmitIsland`). We check this in debug builds. This is `mut`
653 // to allow disabling the check for `JTSequence`, which is always
654 // emitted following an `EmitIsland`.
655 let mut start_off = sink.cur_offset();
656
657 match self {
658 &Inst::AluRRR { alu_op, rd, rn, rm } => {
659 let top11 = match alu_op {
660 ALUOp::Add32 => 0b00001011_000,
661 ALUOp::Add64 => 0b10001011_000,
662 ALUOp::Adc32 => 0b00011010_000,
663 ALUOp::Adc64 => 0b10011010_000,
664 ALUOp::AdcS32 => 0b00111010_000,
665 ALUOp::AdcS64 => 0b10111010_000,
666 ALUOp::Sub32 => 0b01001011_000,
667 ALUOp::Sub64 => 0b11001011_000,
668 ALUOp::Sbc32 => 0b01011010_000,
669 ALUOp::Sbc64 => 0b11011010_000,
670 ALUOp::SbcS32 => 0b01111010_000,
671 ALUOp::SbcS64 => 0b11111010_000,
672 ALUOp::Orr32 => 0b00101010_000,
673 ALUOp::Orr64 => 0b10101010_000,
674 ALUOp::And32 => 0b00001010_000,
675 ALUOp::And64 => 0b10001010_000,
676 ALUOp::AndS32 => 0b01101010_000,
677 ALUOp::AndS64 => 0b11101010_000,
678 ALUOp::Eor32 => 0b01001010_000,
679 ALUOp::Eor64 => 0b11001010_000,
680 ALUOp::OrrNot32 => 0b00101010_001,
681 ALUOp::OrrNot64 => 0b10101010_001,
682 ALUOp::AndNot32 => 0b00001010_001,
683 ALUOp::AndNot64 => 0b10001010_001,
684 ALUOp::EorNot32 => 0b01001010_001,
685 ALUOp::EorNot64 => 0b11001010_001,
686 ALUOp::AddS32 => 0b00101011_000,
687 ALUOp::AddS64 => 0b10101011_000,
688 ALUOp::SubS32 => 0b01101011_000,
689 ALUOp::SubS64 => 0b11101011_000,
690 ALUOp::SDiv64 => 0b10011010_110,
691 ALUOp::UDiv64 => 0b10011010_110,
692 ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
693 ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
694 ALUOp::SMulH => 0b10011011_010,
695 ALUOp::UMulH => 0b10011011_110,
696 };
697 let bit15_10 = match alu_op {
698 ALUOp::SDiv64 => 0b000011,
699 ALUOp::UDiv64 => 0b000010,
700 ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
701 ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
702 ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
703 ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
704 ALUOp::SMulH | ALUOp::UMulH => 0b011111,
705 _ => 0b000000,
706 };
707 debug_assert_ne!(writable_stack_reg(), rd);
708 // The stack pointer is the zero register in this context, so this might be an
709 // indication that something is wrong.
710 debug_assert_ne!(stack_reg(), rn);
711 debug_assert_ne!(stack_reg(), rm);
712 sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
713 }
714 &Inst::AluRRRR {
715 alu_op,
716 rd,
717 rm,
718 rn,
719 ra,
720 } => {
721 let (top11, bit15) = match alu_op {
722 ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
723 ALUOp3::MSub32 => (0b0_00_11011_000, 1),
724 ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
725 ALUOp3::MSub64 => (0b1_00_11011_000, 1),
726 };
727 sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
728 }
729 &Inst::AluRRImm12 {
730 alu_op,
731 rd,
732 rn,
733 ref imm12,
734 } => {
735 let top8 = match alu_op {
736 ALUOp::Add32 => 0b000_10001,
737 ALUOp::Add64 => 0b100_10001,
738 ALUOp::Sub32 => 0b010_10001,
739 ALUOp::Sub64 => 0b110_10001,
740 ALUOp::AddS32 => 0b001_10001,
741 ALUOp::AddS64 => 0b101_10001,
742 ALUOp::SubS32 => 0b011_10001,
743 ALUOp::SubS64 => 0b111_10001,
744 _ => unimplemented!("{:?}", alu_op),
745 };
746 sink.put4(enc_arith_rr_imm12(
747 top8,
748 imm12.shift_bits(),
749 imm12.imm_bits(),
750 rn,
751 rd,
752 ));
753 }
754 &Inst::AluRRImmLogic {
755 alu_op,
756 rd,
757 rn,
758 ref imml,
759 } => {
760 let (top9, inv) = match alu_op {
761 ALUOp::Orr32 => (0b001_100100, false),
762 ALUOp::Orr64 => (0b101_100100, false),
763 ALUOp::And32 => (0b000_100100, false),
764 ALUOp::And64 => (0b100_100100, false),
765 ALUOp::AndS32 => (0b011_100100, false),
766 ALUOp::AndS64 => (0b111_100100, false),
767 ALUOp::Eor32 => (0b010_100100, false),
768 ALUOp::Eor64 => (0b110_100100, false),
769 ALUOp::OrrNot32 => (0b001_100100, true),
770 ALUOp::OrrNot64 => (0b101_100100, true),
771 ALUOp::AndNot32 => (0b000_100100, true),
772 ALUOp::AndNot64 => (0b100_100100, true),
773 ALUOp::EorNot32 => (0b010_100100, true),
774 ALUOp::EorNot64 => (0b110_100100, true),
775 _ => unimplemented!("{:?}", alu_op),
776 };
777 let imml = if inv { imml.invert() } else { imml.clone() };
778 sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
779 }
780
781 &Inst::AluRRImmShift {
782 alu_op,
783 rd,
784 rn,
785 ref immshift,
786 } => {
787 let amt = immshift.value();
788 let (top10, immr, imms) = match alu_op {
789 ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
790 ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
791 ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
792 ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
793 ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
794 ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
795 ALUOp::Lsl32 => (
796 0b0101001100,
797 u32::from((32 - amt) % 32),
798 u32::from(31 - amt),
799 ),
800 ALUOp::Lsl64 => (
801 0b1101001101,
802 u32::from((64 - amt) % 64),
803 u32::from(63 - amt),
804 ),
805 _ => unimplemented!("{:?}", alu_op),
806 };
807 sink.put4(
808 (top10 << 22)
809 | (immr << 16)
810 | (imms << 10)
811 | (machreg_to_gpr(rn) << 5)
812 | machreg_to_gpr(rd.to_reg()),
813 );
814 }
815
816 &Inst::AluRRRShift {
817 alu_op,
818 rd,
819 rn,
820 rm,
821 ref shiftop,
822 } => {
823 let top11: u32 = match alu_op {
824 ALUOp::Add32 => 0b000_01011000,
825 ALUOp::Add64 => 0b100_01011000,
826 ALUOp::AddS32 => 0b001_01011000,
827 ALUOp::AddS64 => 0b101_01011000,
828 ALUOp::Sub32 => 0b010_01011000,
829 ALUOp::Sub64 => 0b110_01011000,
830 ALUOp::SubS32 => 0b011_01011000,
831 ALUOp::SubS64 => 0b111_01011000,
832 ALUOp::Orr32 => 0b001_01010000,
833 ALUOp::Orr64 => 0b101_01010000,
834 ALUOp::And32 => 0b000_01010000,
835 ALUOp::And64 => 0b100_01010000,
836 ALUOp::AndS32 => 0b011_01010000,
837 ALUOp::AndS64 => 0b111_01010000,
838 ALUOp::Eor32 => 0b010_01010000,
839 ALUOp::Eor64 => 0b110_01010000,
840 ALUOp::OrrNot32 => 0b001_01010001,
841 ALUOp::OrrNot64 => 0b101_01010001,
842 ALUOp::EorNot32 => 0b010_01010001,
843 ALUOp::EorNot64 => 0b110_01010001,
844 ALUOp::AndNot32 => 0b000_01010001,
845 ALUOp::AndNot64 => 0b100_01010001,
846 _ => unimplemented!("{:?}", alu_op),
847 };
848 let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
849 let bits_15_10 = u32::from(shiftop.amt().value());
850 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
851 }
852
853 &Inst::AluRRRExtend {
854 alu_op,
855 rd,
856 rn,
857 rm,
858 extendop,
859 } => {
860 let top11: u32 = match alu_op {
861 ALUOp::Add32 => 0b00001011001,
862 ALUOp::Add64 => 0b10001011001,
863 ALUOp::Sub32 => 0b01001011001,
864 ALUOp::Sub64 => 0b11001011001,
865 ALUOp::AddS32 => 0b00101011001,
866 ALUOp::AddS64 => 0b10101011001,
867 ALUOp::SubS32 => 0b01101011001,
868 ALUOp::SubS64 => 0b11101011001,
869 _ => unimplemented!("{:?}", alu_op),
870 };
871 let bits_15_10 = u32::from(extendop.bits()) << 3;
872 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
873 }
874
875 &Inst::BitRR { op, rd, rn, .. } => {
876 let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
877 let (op1, op2) = match op {
878 BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
879 BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
880 BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
881 };
882 sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
883 }
884
885 &Inst::ULoad8 { rd, ref mem, flags }
886 | &Inst::SLoad8 { rd, ref mem, flags }
887 | &Inst::ULoad16 { rd, ref mem, flags }
888 | &Inst::SLoad16 { rd, ref mem, flags }
889 | &Inst::ULoad32 { rd, ref mem, flags }
890 | &Inst::SLoad32 { rd, ref mem, flags }
891 | &Inst::ULoad64 {
892 rd, ref mem, flags, ..
893 }
894 | &Inst::FpuLoad32 { rd, ref mem, flags }
895 | &Inst::FpuLoad64 { rd, ref mem, flags }
896 | &Inst::FpuLoad128 { rd, ref mem, flags } => {
897 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
898
899 for inst in mem_insts.into_iter() {
900 inst.emit(sink, emit_info, state);
901 }
902
903 // ldst encoding helpers take Reg, not Writable<Reg>.
904 let rd = rd.to_reg();
905
906 // This is the base opcode (top 10 bits) for the "unscaled
907 // immediate" form (Unscaled). Other addressing modes will OR in
908 // other values for bits 24/25 (bits 1/2 of this constant).
909 let (op, bits) = match self {
910 &Inst::ULoad8 { .. } => (0b0011100001, 8),
911 &Inst::SLoad8 { .. } => (0b0011100010, 8),
912 &Inst::ULoad16 { .. } => (0b0111100001, 16),
913 &Inst::SLoad16 { .. } => (0b0111100010, 16),
914 &Inst::ULoad32 { .. } => (0b1011100001, 32),
915 &Inst::SLoad32 { .. } => (0b1011100010, 32),
916 &Inst::ULoad64 { .. } => (0b1111100001, 64),
917 &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
918 &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
919 &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
920 _ => unreachable!(),
921 };
922
923 let srcloc = state.cur_srcloc();
924 if srcloc != SourceLoc::default() && !flags.notrap() {
925 // Register the offset at which the actual load instruction starts.
926 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
927 }
928
929 match &mem {
930 &AMode::Unscaled(reg, simm9) => {
931 sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
932 }
933 &AMode::UnsignedOffset(reg, uimm12scaled) => {
934 if uimm12scaled.value() != 0 {
935 assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
936 }
937 sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
938 }
939 &AMode::RegReg(r1, r2) => {
940 sink.put4(enc_ldst_reg(
941 op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
942 ));
943 }
944 &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
945 assert_eq!(bits, ty_bits(ty));
946 let extendop = match &mem {
947 &AMode::RegScaled(..) => None,
948 &AMode::RegScaledExtended(_, _, _, op) => Some(op),
949 _ => unreachable!(),
950 };
951 sink.put4(enc_ldst_reg(
952 op, r1, r2, /* scaled = */ true, extendop, rd,
953 ));
954 }
955 &AMode::RegExtended(r1, r2, extendop) => {
956 sink.put4(enc_ldst_reg(
957 op,
958 r1,
959 r2,
960 /* scaled = */ false,
961 Some(extendop),
962 rd,
963 ));
964 }
965 &AMode::Label(ref label) => {
966 let offset = match label {
967 // cast i32 to u32 (two's-complement)
968 &MemLabel::PCRel(off) => off as u32,
969 } / 4;
970 assert!(offset < (1 << 19));
971 match self {
972 &Inst::ULoad32 { .. } => {
973 sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
974 }
975 &Inst::SLoad32 { .. } => {
976 sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
977 }
978 &Inst::FpuLoad32 { .. } => {
979 sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
980 }
981 &Inst::ULoad64 { .. } => {
982 sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
983 }
984 &Inst::FpuLoad64 { .. } => {
985 sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
986 }
987 &Inst::FpuLoad128 { .. } => {
988 sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
989 }
990 _ => panic!("Unspported size for LDR from constant pool!"),
991 }
992 }
993 &AMode::PreIndexed(reg, simm9) => {
994 sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
995 }
996 &AMode::PostIndexed(reg, simm9) => {
997 sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
998 }
999 // Eliminated by `mem_finalize()` above.
1000 &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
1001 panic!("Should not see stack-offset here!")
1002 }
1003 &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
1004 }
1005 }
1006
1007 &Inst::Store8 { rd, ref mem, flags }
1008 | &Inst::Store16 { rd, ref mem, flags }
1009 | &Inst::Store32 { rd, ref mem, flags }
1010 | &Inst::Store64 { rd, ref mem, flags }
1011 | &Inst::FpuStore32 { rd, ref mem, flags }
1012 | &Inst::FpuStore64 { rd, ref mem, flags }
1013 | &Inst::FpuStore128 { rd, ref mem, flags } => {
1014 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
1015
1016 for inst in mem_insts.into_iter() {
1017 inst.emit(sink, emit_info, state);
1018 }
1019
1020 let (op, bits) = match self {
1021 &Inst::Store8 { .. } => (0b0011100000, 8),
1022 &Inst::Store16 { .. } => (0b0111100000, 16),
1023 &Inst::Store32 { .. } => (0b1011100000, 32),
1024 &Inst::Store64 { .. } => (0b1111100000, 64),
1025 &Inst::FpuStore32 { .. } => (0b1011110000, 32),
1026 &Inst::FpuStore64 { .. } => (0b1111110000, 64),
1027 &Inst::FpuStore128 { .. } => (0b0011110010, 128),
1028 _ => unreachable!(),
1029 };
1030
1031 let srcloc = state.cur_srcloc();
1032 if srcloc != SourceLoc::default() && !flags.notrap() {
1033 // Register the offset at which the actual store instruction starts.
1034 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1035 }
1036
1037 match &mem {
1038 &AMode::Unscaled(reg, simm9) => {
1039 sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1040 }
1041 &AMode::UnsignedOffset(reg, uimm12scaled) => {
1042 if uimm12scaled.value() != 0 {
1043 assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
1044 }
1045 sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
1046 }
1047 &AMode::RegReg(r1, r2) => {
1048 sink.put4(enc_ldst_reg(
1049 op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1050 ));
1051 }
1052 &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
1053 let extendop = match &mem {
1054 &AMode::RegScaled(..) => None,
1055 &AMode::RegScaledExtended(_, _, _, op) => Some(op),
1056 _ => unreachable!(),
1057 };
1058 sink.put4(enc_ldst_reg(
1059 op, r1, r2, /* scaled = */ true, extendop, rd,
1060 ));
1061 }
1062 &AMode::RegExtended(r1, r2, extendop) => {
1063 sink.put4(enc_ldst_reg(
1064 op,
1065 r1,
1066 r2,
1067 /* scaled = */ false,
1068 Some(extendop),
1069 rd,
1070 ));
1071 }
1072 &AMode::Label(..) => {
1073 panic!("Store to a MemLabel not implemented!");
1074 }
1075 &AMode::PreIndexed(reg, simm9) => {
1076 sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
1077 }
1078 &AMode::PostIndexed(reg, simm9) => {
1079 sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
1080 }
1081 // Eliminated by `mem_finalize()` above.
1082 &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
1083 panic!("Should not see stack-offset here!")
1084 }
1085 &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
1086 }
1087 }
1088
1089 &Inst::StoreP64 {
1090 rt,
1091 rt2,
1092 ref mem,
1093 flags,
1094 } => {
1095 let srcloc = state.cur_srcloc();
1096 if srcloc != SourceLoc::default() && !flags.notrap() {
1097 // Register the offset at which the actual store instruction starts.
1098 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1099 }
1100 match mem {
1101 &PairAMode::SignedOffset(reg, simm7) => {
1102 assert_eq!(simm7.scale_ty, I64);
1103 sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1104 }
1105 &PairAMode::PreIndexed(reg, simm7) => {
1106 assert_eq!(simm7.scale_ty, I64);
1107 sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
1108 }
1109 &PairAMode::PostIndexed(reg, simm7) => {
1110 assert_eq!(simm7.scale_ty, I64);
1111 sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
1112 }
1113 }
1114 }
1115 &Inst::LoadP64 {
1116 rt,
1117 rt2,
1118 ref mem,
1119 flags,
1120 } => {
1121 let srcloc = state.cur_srcloc();
1122 if srcloc != SourceLoc::default() && !flags.notrap() {
1123 // Register the offset at which the actual load instruction starts.
1124 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1125 }
1126
1127 let rt = rt.to_reg();
1128 let rt2 = rt2.to_reg();
1129 match mem {
1130 &PairAMode::SignedOffset(reg, simm7) => {
1131 assert_eq!(simm7.scale_ty, I64);
1132 sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1133 }
1134 &PairAMode::PreIndexed(reg, simm7) => {
1135 assert_eq!(simm7.scale_ty, I64);
1136 sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
1137 }
1138 &PairAMode::PostIndexed(reg, simm7) => {
1139 assert_eq!(simm7.scale_ty, I64);
1140 sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
1141 }
1142 }
1143 }
1144 &Inst::FpuLoadP64 {
1145 rt,
1146 rt2,
1147 ref mem,
1148 flags,
1149 }
1150 | &Inst::FpuLoadP128 {
1151 rt,
1152 rt2,
1153 ref mem,
1154 flags,
1155 } => {
1156 let srcloc = state.cur_srcloc();
1157
1158 if srcloc != SourceLoc::default() && !flags.notrap() {
1159 // Register the offset at which the actual load instruction starts.
1160 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1161 }
1162
1163 let opc = match self {
1164 &Inst::FpuLoadP64 { .. } => 0b01,
1165 &Inst::FpuLoadP128 { .. } => 0b10,
1166 _ => unreachable!(),
1167 };
1168 let rt = rt.to_reg();
1169 let rt2 = rt2.to_reg();
1170
1171 match mem {
1172 &PairAMode::SignedOffset(reg, simm7) => {
1173 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1174 sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1175 }
1176 &PairAMode::PreIndexed(reg, simm7) => {
1177 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1178 sink.put4(enc_ldst_vec_pair(
1179 opc,
1180 0b11,
1181 true,
1182 simm7,
1183 reg.to_reg(),
1184 rt,
1185 rt2,
1186 ));
1187 }
1188 &PairAMode::PostIndexed(reg, simm7) => {
1189 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1190 sink.put4(enc_ldst_vec_pair(
1191 opc,
1192 0b01,
1193 true,
1194 simm7,
1195 reg.to_reg(),
1196 rt,
1197 rt2,
1198 ));
1199 }
1200 }
1201 }
1202 &Inst::FpuStoreP64 {
1203 rt,
1204 rt2,
1205 ref mem,
1206 flags,
1207 }
1208 | &Inst::FpuStoreP128 {
1209 rt,
1210 rt2,
1211 ref mem,
1212 flags,
1213 } => {
1214 let srcloc = state.cur_srcloc();
1215
1216 if srcloc != SourceLoc::default() && !flags.notrap() {
1217 // Register the offset at which the actual store instruction starts.
1218 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1219 }
1220
1221 let opc = match self {
1222 &Inst::FpuStoreP64 { .. } => 0b01,
1223 &Inst::FpuStoreP128 { .. } => 0b10,
1224 _ => unreachable!(),
1225 };
1226
1227 match mem {
1228 &PairAMode::SignedOffset(reg, simm7) => {
1229 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1230 sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1231 }
1232 &PairAMode::PreIndexed(reg, simm7) => {
1233 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1234 sink.put4(enc_ldst_vec_pair(
1235 opc,
1236 0b11,
1237 false,
1238 simm7,
1239 reg.to_reg(),
1240 rt,
1241 rt2,
1242 ));
1243 }
1244 &PairAMode::PostIndexed(reg, simm7) => {
1245 assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1246 sink.put4(enc_ldst_vec_pair(
1247 opc,
1248 0b01,
1249 false,
1250 simm7,
1251 reg.to_reg(),
1252 rt,
1253 rt2,
1254 ));
1255 }
1256 }
1257 }
1258 &Inst::Mov64 { rd, rm } => {
1259 assert!(rd.to_reg().get_class() == rm.get_class());
1260 assert!(rm.get_class() == RegClass::I64);
1261
1262 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1263 // should never MOV to XZR.
1264 assert!(rd.to_reg() != stack_reg());
1265
1266 if rm == stack_reg() {
1267 // We can't use ORR here, so use an `add rd, sp, #0` instead.
1268 let imm12 = Imm12::maybe_from_u64(0).unwrap();
1269 sink.put4(enc_arith_rr_imm12(
1270 0b100_10001,
1271 imm12.shift_bits(),
1272 imm12.imm_bits(),
1273 rm,
1274 rd,
1275 ));
1276 } else {
1277 // Encoded as ORR rd, rm, zero.
1278 sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1279 }
1280 }
1281 &Inst::Mov32 { rd, rm } => {
1282 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1283 // should never MOV to XZR.
1284 assert!(machreg_to_gpr(rd.to_reg()) != 31);
1285 // Encoded as ORR rd, rm, zero.
1286 sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1287 }
1288 &Inst::MovZ { rd, imm, size } => {
1289 sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
1290 }
1291 &Inst::MovN { rd, imm, size } => {
1292 sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
1293 }
1294 &Inst::MovK { rd, imm, size } => {
1295 sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
1296 }
1297 &Inst::CSel { rd, rn, rm, cond } => {
1298 sink.put4(enc_csel(rd, rn, rm, cond));
1299 }
1300 &Inst::CSet { rd, cond } => {
1301 sink.put4(enc_cset(rd, cond));
1302 }
1303 &Inst::CSetm { rd, cond } => {
1304 sink.put4(enc_csetm(rd, cond));
1305 }
1306 &Inst::CCmpImm {
1307 size,
1308 rn,
1309 imm,
1310 nzcv,
1311 cond,
1312 } => {
1313 sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1314 }
1315 &Inst::AtomicRMW { ty, op } => {
1316 /* Emit this:
1317 again:
1318 ldaxr{,b,h} x/w27, [x25]
1319 op x28, x27, x26 // op is add,sub,and,orr,eor
1320 stlxr{,b,h} w24, x/w28, [x25]
1321 cbnz x24, again
1322
1323 Operand conventions:
1324 IN: x25 (addr), x26 (2nd arg for op)
1325 OUT: x27 (old value), x24 (trashed), x28 (trashed)
1326
1327 It is unfortunate that, per the ARM documentation, x28 cannot be used for
1328 both the store-data and success-flag operands of stlxr. This causes the
1329 instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1330 instead for the success-flag.
1331
1332 In the case where the operation is 'xchg', the second insn is instead
1333 mov x28, x26
1334 so that we simply write in the destination, the "2nd arg for op".
1335 */
1336 // TODO: We should not hardcode registers here, a better idea would be to
1337 // pass some scratch registers in the AtomicRMW pseudo-instruction, and use those
1338 let xzr = zero_reg();
1339 let x24 = xreg(24);
1340 let x25 = xreg(25);
1341 let x26 = xreg(26);
1342 let x27 = xreg(27);
1343 let x28 = xreg(28);
1344 let x24wr = writable_xreg(24);
1345 let x27wr = writable_xreg(27);
1346 let x28wr = writable_xreg(28);
1347 let again_label = sink.get_label();
1348
1349 // again:
1350 sink.bind_label(again_label);
1351 let srcloc = state.cur_srcloc();
1352 if srcloc != SourceLoc::default() {
1353 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1354 }
1355 sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1356
1357 match op {
1358 AtomicRmwOp::Xchg => {
1359 // mov x28, x26
1360 Inst::Mov64 { rd: x28wr, rm: x26 }.emit(sink, emit_info, state);
1361 }
1362 AtomicRmwOp::Nand => {
1363 // and x28, x27, x26
1364 // mvn x28, x28
1365
1366 Inst::AluRRR {
1367 alu_op: ALUOp::And64,
1368 rd: x28wr,
1369 rn: x27,
1370 rm: x26,
1371 }
1372 .emit(sink, emit_info, state);
1373
1374 Inst::AluRRR {
1375 alu_op: ALUOp::OrrNot64,
1376 rd: x28wr,
1377 rn: xzr,
1378 rm: x28,
1379 }
1380 .emit(sink, emit_info, state);
1381 }
1382 AtomicRmwOp::Umin
1383 | AtomicRmwOp::Umax
1384 | AtomicRmwOp::Smin
1385 | AtomicRmwOp::Smax => {
1386 // cmp x27, x26
1387 // csel.op x28, x27, x26
1388
1389 let cond = match op {
1390 AtomicRmwOp::Umin => Cond::Lo,
1391 AtomicRmwOp::Umax => Cond::Hi,
1392 AtomicRmwOp::Smin => Cond::Lt,
1393 AtomicRmwOp::Smax => Cond::Gt,
1394 _ => unreachable!(),
1395 };
1396
1397 Inst::AluRRR {
1398 alu_op: if ty == I64 {
1399 ALUOp::SubS64
1400 } else {
1401 ALUOp::SubS32
1402 },
1403 rd: writable_zero_reg(),
1404 rn: x27,
1405 rm: x26,
1406 }
1407 .emit(sink, emit_info, state);
1408
1409 Inst::CSel {
1410 cond,
1411 rd: x28wr,
1412 rn: x27,
1413 rm: x26,
1414 }
1415 .emit(sink, emit_info, state);
1416 }
1417 _ => {
1418 // add/sub/and/orr/eor x28, x27, x26
1419 let alu_op = match op {
1420 AtomicRmwOp::Add => ALUOp::Add64,
1421 AtomicRmwOp::Sub => ALUOp::Sub64,
1422 AtomicRmwOp::And => ALUOp::And64,
1423 AtomicRmwOp::Or => ALUOp::Orr64,
1424 AtomicRmwOp::Xor => ALUOp::Eor64,
1425 AtomicRmwOp::Nand
1426 | AtomicRmwOp::Umin
1427 | AtomicRmwOp::Umax
1428 | AtomicRmwOp::Smin
1429 | AtomicRmwOp::Smax
1430 | AtomicRmwOp::Xchg => unreachable!(),
1431 };
1432
1433 Inst::AluRRR {
1434 alu_op,
1435 rd: x28wr,
1436 rn: x27,
1437 rm: x26,
1438 }
1439 .emit(sink, emit_info, state);
1440 }
1441 }
1442
1443 let srcloc = state.cur_srcloc();
1444 if srcloc != SourceLoc::default() {
1445 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1446 }
1447 sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1448
1449 // cbnz w24, again
1450 // Note, we're actually testing x24, and relying on the default zero-high-half
1451 // rule in the assignment that `stlxr` does.
1452 let br_offset = sink.cur_offset();
1453 sink.put4(enc_conditional_br(
1454 BranchTarget::Label(again_label),
1455 CondBrKind::NotZero(x24),
1456 ));
1457 sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1458 }
1459 &Inst::AtomicCAS { rs, rt, rn, ty } => {
1460 let size = match ty {
1461 I8 => 0b00,
1462 I16 => 0b01,
1463 I32 => 0b10,
1464 I64 => 0b11,
1465 _ => panic!("Unsupported type: {}", ty),
1466 };
1467
1468 sink.put4(enc_cas(size, rs, rt, rn));
1469 }
1470 &Inst::AtomicCASLoop { ty } => {
1471 /* Emit this:
1472 again:
1473 ldaxr{,b,h} x/w27, [x25]
1474 cmp x27, x/w26 uxt{b,h}
1475 b.ne out
1476 stlxr{,b,h} w24, x/w28, [x25]
1477 cbnz x24, again
1478 out:
1479
1480 Operand conventions:
1481 IN: x25 (addr), x26 (expected value), x28 (replacement value)
1482 OUT: x27 (old value), x24 (trashed)
1483 */
1484 let x24 = xreg(24);
1485 let x25 = xreg(25);
1486 let x26 = xreg(26);
1487 let x27 = xreg(27);
1488 let x28 = xreg(28);
1489 let xzrwr = writable_zero_reg();
1490 let x24wr = writable_xreg(24);
1491 let x27wr = writable_xreg(27);
1492 let again_label = sink.get_label();
1493 let out_label = sink.get_label();
1494
1495 // again:
1496 sink.bind_label(again_label);
1497 let srcloc = state.cur_srcloc();
1498 if srcloc != SourceLoc::default() {
1499 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1500 }
1501 // ldaxr x27, [x25]
1502 sink.put4(enc_ldaxr(ty, x27wr, x25));
1503
1504 // The top 32-bits are zero-extended by the ldaxr so we don't
1505 // have to use UXTW, just the x-form of the register.
1506 let (bit21, extend_op) = match ty {
1507 I8 => (0b1, 0b000000),
1508 I16 => (0b1, 0b001000),
1509 _ => (0b0, 0b000000),
1510 };
1511 let bits_31_21 = 0b111_01011_000 | bit21;
1512 // cmp x27, x26 (== subs xzr, x27, x26)
1513 sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1514
1515 // b.ne out
1516 let br_out_offset = sink.cur_offset();
1517 sink.put4(enc_conditional_br(
1518 BranchTarget::Label(out_label),
1519 CondBrKind::Cond(Cond::Ne),
1520 ));
1521 sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1522
1523 let srcloc = state.cur_srcloc();
1524 if srcloc != SourceLoc::default() {
1525 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
1526 }
1527 sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1528
1529 // cbnz w24, again.
1530 // Note, we're actually testing x24, and relying on the default zero-high-half
1531 // rule in the assignment that `stlxr` does.
1532 let br_again_offset = sink.cur_offset();
1533 sink.put4(enc_conditional_br(
1534 BranchTarget::Label(again_label),
1535 CondBrKind::NotZero(x24),
1536 ));
1537 sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1538
1539 // out:
1540 sink.bind_label(out_label);
1541 }
1542 &Inst::LoadAcquire { access_ty, rt, rn } => {
1543 sink.put4(enc_ldar(access_ty, rt, rn));
1544 }
1545 &Inst::StoreRelease { access_ty, rt, rn } => {
1546 sink.put4(enc_stlr(access_ty, rt, rn));
1547 }
1548 &Inst::Fence {} => {
1549 sink.put4(enc_dmb_ish()); // dmb ish
1550 }
1551 &Inst::FpuMove64 { rd, rn } => {
1552 sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1553 }
1554 &Inst::FpuMove128 { rd, rn } => {
1555 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1556 }
1557 &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1558 let (imm5, shift, mask) = match size.lane_size() {
1559 ScalarSize::Size32 => (0b00100, 3, 0b011),
1560 ScalarSize::Size64 => (0b01000, 4, 0b001),
1561 _ => unimplemented!(),
1562 };
1563 debug_assert_eq!(idx & mask, idx);
1564 let imm5 = imm5 | ((idx as u32) << shift);
1565 sink.put4(
1566 0b010_11110000_00000_000001_00000_00000
1567 | (imm5 << 16)
1568 | (machreg_to_vec(rn) << 5)
1569 | machreg_to_vec(rd.to_reg()),
1570 );
1571 }
1572 &Inst::FpuExtend { rd, rn, size } => {
1573 sink.put4(enc_fpurr(
1574 0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
1575 rd,
1576 rn,
1577 ));
1578 }
1579 &Inst::FpuRR { fpu_op, rd, rn } => {
1580 let top22 = match fpu_op {
1581 FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
1582 FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
1583 FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
1584 FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
1585 FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
1586 FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
1587 FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
1588 FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
1589 };
1590 sink.put4(enc_fpurr(top22, rd, rn));
1591 }
1592 &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
1593 let top22 = match fpu_op {
1594 FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
1595 FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
1596 FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
1597 FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
1598 FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
1599 FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
1600 FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
1601 FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
1602 FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
1603 FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
1604 FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
1605 FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
1606 FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
1607 FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
1608 FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
1609 FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
1610 };
1611 sink.put4(enc_fpurrr(top22, rd, rn, rm));
1612 }
1613 &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1614 FPUOpRI::UShr32(imm) => {
1615 debug_assert_eq!(32, imm.lane_size_in_bits);
1616 sink.put4(
1617 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1618 | imm.enc() << 16
1619 | machreg_to_vec(rn) << 5
1620 | machreg_to_vec(rd.to_reg()),
1621 )
1622 }
1623 FPUOpRI::UShr64(imm) => {
1624 debug_assert_eq!(64, imm.lane_size_in_bits);
1625 sink.put4(
1626 0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1627 | imm.enc() << 16
1628 | machreg_to_vec(rn) << 5
1629 | machreg_to_vec(rd.to_reg()),
1630 )
1631 }
1632 FPUOpRI::Sli64(imm) => {
1633 debug_assert_eq!(64, imm.lane_size_in_bits);
1634 sink.put4(
1635 0b01_1_111110_0000000_010101_00000_00000
1636 | imm.enc() << 16
1637 | machreg_to_vec(rn) << 5
1638 | machreg_to_vec(rd.to_reg()),
1639 )
1640 }
1641 FPUOpRI::Sli32(imm) => {
1642 debug_assert_eq!(32, imm.lane_size_in_bits);
1643 sink.put4(
1644 0b0_0_1_011110_0000000_010101_00000_00000
1645 | imm.enc() << 16
1646 | machreg_to_vec(rn) << 5
1647 | machreg_to_vec(rd.to_reg()),
1648 )
1649 }
1650 },
1651 &Inst::FpuRRRR {
1652 fpu_op,
1653 rd,
1654 rn,
1655 rm,
1656 ra,
1657 } => {
1658 let top17 = match fpu_op {
1659 FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
1660 FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
1661 };
1662 sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1663 }
1664 &Inst::VecMisc { op, rd, rn, size } => {
1665 let (q, enc_size) = size.enc_size();
1666 let (u, bits_12_16, size) = match op {
1667 VecMisc2::Not => (0b1, 0b00101, 0b00),
1668 VecMisc2::Neg => (0b1, 0b01011, enc_size),
1669 VecMisc2::Abs => (0b0, 0b01011, enc_size),
1670 VecMisc2::Fabs => {
1671 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1672 (0b0, 0b01111, enc_size)
1673 }
1674 VecMisc2::Fneg => {
1675 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1676 (0b1, 0b01111, enc_size)
1677 }
1678 VecMisc2::Fsqrt => {
1679 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1680 (0b1, 0b11111, enc_size)
1681 }
1682 VecMisc2::Rev64 => {
1683 debug_assert_ne!(VectorSize::Size64x2, size);
1684 (0b0, 0b00000, enc_size)
1685 }
1686 VecMisc2::Fcvtzs => {
1687 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1688 (0b0, 0b11011, enc_size)
1689 }
1690 VecMisc2::Fcvtzu => {
1691 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1692 (0b1, 0b11011, enc_size)
1693 }
1694 VecMisc2::Scvtf => {
1695 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1696 (0b0, 0b11101, enc_size & 0b1)
1697 }
1698 VecMisc2::Ucvtf => {
1699 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1700 (0b1, 0b11101, enc_size & 0b1)
1701 }
1702 VecMisc2::Frintn => {
1703 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1704 (0b0, 0b11000, enc_size & 0b01)
1705 }
1706 VecMisc2::Frintz => {
1707 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1708 (0b0, 0b11001, enc_size | 0b10)
1709 }
1710 VecMisc2::Frintm => {
1711 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1712 (0b0, 0b11001, enc_size & 0b01)
1713 }
1714 VecMisc2::Frintp => {
1715 debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1716 (0b0, 0b11000, enc_size | 0b10)
1717 }
1718 VecMisc2::Cnt => {
1719 debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1720 (0b0, 0b00101, enc_size)
1721 }
1722 VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
1723 };
1724 sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
1725 }
1726 &Inst::VecLanes { op, rd, rn, size } => {
1727 let (q, size) = match size {
1728 VectorSize::Size8x8 => (0b0, 0b00),
1729 VectorSize::Size8x16 => (0b1, 0b00),
1730 VectorSize::Size16x4 => (0b0, 0b01),
1731 VectorSize::Size16x8 => (0b1, 0b01),
1732 VectorSize::Size32x4 => (0b1, 0b10),
1733 _ => unreachable!(),
1734 };
1735 let (u, opcode) = match op {
1736 VecLanesOp::Uminv => (0b1, 0b11010),
1737 VecLanesOp::Addv => (0b0, 0b11011),
1738 };
1739 sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
1740 }
1741 &Inst::VecShiftImm {
1742 op,
1743 rd,
1744 rn,
1745 size,
1746 imm,
1747 } => {
1748 let (is_shr, template) = match op {
1749 VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
1750 VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
1751 VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
1752 };
1753 let imm = imm as u32;
1754 // Deal with the somewhat strange encoding scheme for, and limits on,
1755 // the shift amount.
1756 let immh_immb = match (size, is_shr) {
1757 (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
1758 0b_1000_000_u32 | (64 - imm)
1759 }
1760 (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
1761 0b_0100_000_u32 | (32 - imm)
1762 }
1763 (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
1764 0b_0010_000_u32 | (16 - imm)
1765 }
1766 (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
1767 0b_0001_000_u32 | (8 - imm)
1768 }
1769 (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
1770 (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
1771 (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
1772 (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
1773 _ => panic!(
1774 "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
1775 op, size, imm
1776 ),
1777 };
1778 let rn_enc = machreg_to_vec(rn);
1779 let rd_enc = machreg_to_vec(rd.to_reg());
1780 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
1781 }
1782 &Inst::VecExtract { rd, rn, rm, imm4 } => {
1783 if imm4 < 16 {
1784 let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
1785 let rm_enc = machreg_to_vec(rm);
1786 let rn_enc = machreg_to_vec(rn);
1787 let rd_enc = machreg_to_vec(rd.to_reg());
1788 sink.put4(
1789 template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
1790 );
1791 } else {
1792 panic!(
1793 "aarch64: Inst::VecExtract: emit: invalid extract index {}",
1794 imm4
1795 );
1796 }
1797 }
1798 &Inst::VecTbl {
1799 rd,
1800 rn,
1801 rm,
1802 is_extension,
1803 } => {
1804 sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
1805 }
1806 &Inst::VecTbl2 {
1807 rd,
1808 rn,
1809 rn2,
1810 rm,
1811 is_extension,
1812 } => {
1813 assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
1814 sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
1815 }
1816 &Inst::FpuCmp32 { rn, rm } => {
1817 sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
1818 }
1819 &Inst::FpuCmp64 { rn, rm } => {
1820 sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
1821 }
1822 &Inst::FpuToInt { op, rd, rn } => {
1823 let top16 = match op {
1824 // FCVTZS (32/32-bit)
1825 FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
1826 // FCVTZU (32/32-bit)
1827 FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
1828 // FCVTZS (32/64-bit)
1829 FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
1830 // FCVTZU (32/64-bit)
1831 FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
1832 // FCVTZS (64/32-bit)
1833 FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
1834 // FCVTZU (64/32-bit)
1835 FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
1836 // FCVTZS (64/64-bit)
1837 FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
1838 // FCVTZU (64/64-bit)
1839 FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
1840 };
1841 sink.put4(enc_fputoint(top16, rd, rn));
1842 }
1843 &Inst::IntToFpu { op, rd, rn } => {
1844 let top16 = match op {
1845 // SCVTF (32/32-bit)
1846 IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
1847 // UCVTF (32/32-bit)
1848 IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
1849 // SCVTF (64/32-bit)
1850 IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
1851 // UCVTF (64/32-bit)
1852 IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
1853 // SCVTF (32/64-bit)
1854 IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
1855 // UCVTF (32/64-bit)
1856 IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
1857 // SCVTF (64/64-bit)
1858 IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
1859 // UCVTF (64/64-bit)
1860 IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
1861 };
1862 sink.put4(enc_inttofpu(top16, rd, rn));
1863 }
1864 &Inst::LoadFpuConst64 { rd, const_data } => {
1865 let inst = Inst::FpuLoad64 {
1866 rd,
1867 mem: AMode::Label(MemLabel::PCRel(8)),
1868 flags: MemFlags::trusted(),
1869 };
1870 inst.emit(sink, emit_info, state);
1871 let inst = Inst::Jump {
1872 dest: BranchTarget::ResolvedOffset(12),
1873 };
1874 inst.emit(sink, emit_info, state);
1875 sink.put8(const_data);
1876 }
1877 &Inst::LoadFpuConst128 { rd, const_data } => {
1878 let inst = Inst::FpuLoad128 {
1879 rd,
1880 mem: AMode::Label(MemLabel::PCRel(8)),
1881 flags: MemFlags::trusted(),
1882 };
1883 inst.emit(sink, emit_info, state);
1884 let inst = Inst::Jump {
1885 dest: BranchTarget::ResolvedOffset(20),
1886 };
1887 inst.emit(sink, emit_info, state);
1888
1889 for i in const_data.to_le_bytes().iter() {
1890 sink.put1(*i);
1891 }
1892 }
1893 &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1894 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
1895 }
1896 &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1897 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
1898 }
1899 &Inst::FpuRound { op, rd, rn } => {
1900 let top22 = match op {
1901 FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
1902 FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
1903 FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
1904 FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
1905 FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
1906 FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
1907 FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
1908 FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
1909 };
1910 sink.put4(enc_fround(top22, rd, rn));
1911 }
1912 &Inst::MovToFpu { rd, rn, size } => {
1913 let template = match size {
1914 ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
1915 ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
1916 _ => unreachable!(),
1917 };
1918 sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
1919 }
1920 &Inst::MovToVec { rd, rn, idx, size } => {
1921 let (imm5, shift) = match size.lane_size() {
1922 ScalarSize::Size8 => (0b00001, 1),
1923 ScalarSize::Size16 => (0b00010, 2),
1924 ScalarSize::Size32 => (0b00100, 3),
1925 ScalarSize::Size64 => (0b01000, 4),
1926 _ => unreachable!(),
1927 };
1928 debug_assert_eq!(idx & (0b11111 >> shift), idx);
1929 let imm5 = imm5 | ((idx as u32) << shift);
1930 sink.put4(
1931 0b010_01110000_00000_0_0011_1_00000_00000
1932 | (imm5 << 16)
1933 | (machreg_to_gpr(rn) << 5)
1934 | machreg_to_vec(rd.to_reg()),
1935 );
1936 }
1937 &Inst::MovFromVec { rd, rn, idx, size } => {
1938 let (q, imm5, shift, mask) = match size {
1939 VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
1940 VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
1941 VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
1942 VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
1943 _ => unreachable!(),
1944 };
1945 debug_assert_eq!(idx & mask, idx);
1946 let imm5 = imm5 | ((idx as u32) << shift);
1947 sink.put4(
1948 0b000_01110000_00000_0_0111_1_00000_00000
1949 | (q << 30)
1950 | (imm5 << 16)
1951 | (machreg_to_vec(rn) << 5)
1952 | machreg_to_gpr(rd.to_reg()),
1953 );
1954 }
1955 &Inst::MovFromVecSigned {
1956 rd,
1957 rn,
1958 idx,
1959 size,
1960 scalar_size,
1961 } => {
1962 let (imm5, shift, half) = match size {
1963 VectorSize::Size8x8 => (0b00001, 1, true),
1964 VectorSize::Size8x16 => (0b00001, 1, false),
1965 VectorSize::Size16x4 => (0b00010, 2, true),
1966 VectorSize::Size16x8 => (0b00010, 2, false),
1967 VectorSize::Size32x2 => {
1968 debug_assert_ne!(scalar_size, OperandSize::Size32);
1969 (0b00100, 3, true)
1970 }
1971 VectorSize::Size32x4 => {
1972 debug_assert_ne!(scalar_size, OperandSize::Size32);
1973 (0b00100, 3, false)
1974 }
1975 _ => panic!("Unexpected vector operand size"),
1976 };
1977 debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
1978 let imm5 = imm5 | ((idx as u32) << shift);
1979 sink.put4(
1980 0b000_01110000_00000_0_0101_1_00000_00000
1981 | (scalar_size.is64() as u32) << 30
1982 | (imm5 << 16)
1983 | (machreg_to_vec(rn) << 5)
1984 | machreg_to_gpr(rd.to_reg()),
1985 );
1986 }
1987 &Inst::VecDup { rd, rn, size } => {
1988 let imm5 = match size {
1989 VectorSize::Size8x16 => 0b00001,
1990 VectorSize::Size16x8 => 0b00010,
1991 VectorSize::Size32x4 => 0b00100,
1992 VectorSize::Size64x2 => 0b01000,
1993 _ => unimplemented!(),
1994 };
1995 sink.put4(
1996 0b010_01110000_00000_000011_00000_00000
1997 | (imm5 << 16)
1998 | (machreg_to_gpr(rn) << 5)
1999 | machreg_to_vec(rd.to_reg()),
2000 );
2001 }
2002 &Inst::VecDupFromFpu { rd, rn, size } => {
2003 let imm5 = match size {
2004 VectorSize::Size32x4 => 0b00100,
2005 VectorSize::Size64x2 => 0b01000,
2006 _ => unimplemented!(),
2007 };
2008 sink.put4(
2009 0b010_01110000_00000_000001_00000_00000
2010 | (imm5 << 16)
2011 | (machreg_to_vec(rn) << 5)
2012 | machreg_to_vec(rd.to_reg()),
2013 );
2014 }
2015 &Inst::VecDupFPImm { rd, imm, size } => {
2016 let imm = imm.enc_bits();
2017 let op = match size.lane_size() {
2018 ScalarSize::Size32 => 0,
2019 ScalarSize::Size64 => 1,
2020 _ => unimplemented!(),
2021 };
2022 let q_op = op | ((size.is_128bits() as u32) << 1);
2023
2024 sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2025 }
2026 &Inst::VecDupImm {
2027 rd,
2028 imm,
2029 invert,
2030 size,
2031 } => {
2032 let (imm, shift, shift_ones) = imm.value();
2033 let (op, cmode) = match size.lane_size() {
2034 ScalarSize::Size8 => {
2035 assert!(!invert);
2036 assert_eq!(shift, 0);
2037
2038 (0, 0b1110)
2039 }
2040 ScalarSize::Size16 => {
2041 let s = shift & 8;
2042
2043 assert!(!shift_ones);
2044 assert_eq!(s, shift);
2045
2046 (invert as u32, 0b1000 | (s >> 2))
2047 }
2048 ScalarSize::Size32 => {
2049 if shift_ones {
2050 assert!(shift == 8 || shift == 16);
2051
2052 (invert as u32, 0b1100 | (shift >> 4))
2053 } else {
2054 let s = shift & 24;
2055
2056 assert_eq!(s, shift);
2057
2058 (invert as u32, 0b0000 | (s >> 2))
2059 }
2060 }
2061 ScalarSize::Size64 => {
2062 assert!(!invert);
2063 assert_eq!(shift, 0);
2064
2065 (1, 0b1110)
2066 }
2067 _ => unreachable!(),
2068 };
2069 let q_op = op | ((size.is_128bits() as u32) << 1);
2070
2071 sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2072 }
2073 &Inst::VecExtend {
2074 t,
2075 rd,
2076 rn,
2077 high_half,
2078 } => {
2079 let (u, immh) = match t {
2080 VecExtendOp::Sxtl8 => (0b0, 0b001),
2081 VecExtendOp::Sxtl16 => (0b0, 0b010),
2082 VecExtendOp::Sxtl32 => (0b0, 0b100),
2083 VecExtendOp::Uxtl8 => (0b1, 0b001),
2084 VecExtendOp::Uxtl16 => (0b1, 0b010),
2085 VecExtendOp::Uxtl32 => (0b1, 0b100),
2086 };
2087 sink.put4(
2088 0b000_011110_0000_000_101001_00000_00000
2089 | ((high_half as u32) << 30)
2090 | (u << 29)
2091 | (immh << 19)
2092 | (machreg_to_vec(rn) << 5)
2093 | machreg_to_vec(rd.to_reg()),
2094 );
2095 }
2096 &Inst::VecRRLong {
2097 op,
2098 rd,
2099 rn,
2100 high_half,
2101 } => {
2102 let (u, size, bits_12_16) = match op {
2103 VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2104 VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2105 VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2106 VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2107 VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2108 };
2109
2110 sink.put4(enc_vec_rr_misc(
2111 ((high_half as u32) << 1) | u,
2112 size,
2113 bits_12_16,
2114 rd,
2115 rn,
2116 ));
2117 }
2118 &Inst::VecRRNarrow {
2119 op,
2120 rd,
2121 rn,
2122 high_half,
2123 } => {
2124 let (u, size, bits_12_16) = match op {
2125 VecRRNarrowOp::Xtn16 => (0b0, 0b00, 0b10010),
2126 VecRRNarrowOp::Xtn32 => (0b0, 0b01, 0b10010),
2127 VecRRNarrowOp::Xtn64 => (0b0, 0b10, 0b10010),
2128 VecRRNarrowOp::Sqxtn16 => (0b0, 0b00, 0b10100),
2129 VecRRNarrowOp::Sqxtn32 => (0b0, 0b01, 0b10100),
2130 VecRRNarrowOp::Sqxtn64 => (0b0, 0b10, 0b10100),
2131 VecRRNarrowOp::Sqxtun16 => (0b1, 0b00, 0b10010),
2132 VecRRNarrowOp::Sqxtun32 => (0b1, 0b01, 0b10010),
2133 VecRRNarrowOp::Sqxtun64 => (0b1, 0b10, 0b10010),
2134 VecRRNarrowOp::Uqxtn16 => (0b1, 0b00, 0b10100),
2135 VecRRNarrowOp::Uqxtn32 => (0b1, 0b01, 0b10100),
2136 VecRRNarrowOp::Uqxtn64 => (0b1, 0b10, 0b10100),
2137 VecRRNarrowOp::Fcvtn32 => (0b0, 0b00, 0b10110),
2138 VecRRNarrowOp::Fcvtn64 => (0b0, 0b01, 0b10110),
2139 };
2140
2141 sink.put4(enc_vec_rr_misc(
2142 ((high_half as u32) << 1) | u,
2143 size,
2144 bits_12_16,
2145 rd,
2146 rn,
2147 ));
2148 }
2149 &Inst::VecMovElement {
2150 rd,
2151 rn,
2152 dest_idx,
2153 src_idx,
2154 size,
2155 } => {
2156 let (imm5, shift) = match size.lane_size() {
2157 ScalarSize::Size8 => (0b00001, 1),
2158 ScalarSize::Size16 => (0b00010, 2),
2159 ScalarSize::Size32 => (0b00100, 3),
2160 ScalarSize::Size64 => (0b01000, 4),
2161 _ => unreachable!(),
2162 };
2163 let mask = 0b11111 >> shift;
2164 debug_assert_eq!(dest_idx & mask, dest_idx);
2165 debug_assert_eq!(src_idx & mask, src_idx);
2166 let imm4 = (src_idx as u32) << (shift - 1);
2167 let imm5 = imm5 | ((dest_idx as u32) << shift);
2168 sink.put4(
2169 0b011_01110000_00000_0_0000_1_00000_00000
2170 | (imm5 << 16)
2171 | (imm4 << 11)
2172 | (machreg_to_vec(rn) << 5)
2173 | machreg_to_vec(rd.to_reg()),
2174 );
2175 }
2176 &Inst::VecRRPair { op, rd, rn } => {
2177 let bits_12_16 = match op {
2178 VecPairOp::Addp => 0b11011,
2179 };
2180
2181 sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2182 }
2183 &Inst::VecRRRLong {
2184 rd,
2185 rn,
2186 rm,
2187 alu_op,
2188 high_half,
2189 } => {
2190 let (u, size, bit14) = match alu_op {
2191 VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2192 VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2193 VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2194 VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2195 VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2196 VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2197 VecRRRLongOp::Umlal8 => (0b1, 0b00, 0b0),
2198 VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
2199 VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
2200 };
2201 sink.put4(enc_vec_rrr_long(
2202 high_half as u32,
2203 u,
2204 size,
2205 bit14,
2206 rm,
2207 rn,
2208 rd,
2209 ));
2210 }
2211 &Inst::VecRRPairLong { op, rd, rn } => {
2212 let (u, size) = match op {
2213 VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2214 VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2215 VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2216 VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2217 };
2218
2219 sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2220 }
2221 &Inst::VecRRR {
2222 rd,
2223 rn,
2224 rm,
2225 alu_op,
2226 size,
2227 } => {
2228 let (q, enc_size) = size.enc_size();
2229 let is_float = match alu_op {
2230 VecALUOp::Fcmeq
2231 | VecALUOp::Fcmgt
2232 | VecALUOp::Fcmge
2233 | VecALUOp::Fadd
2234 | VecALUOp::Fsub
2235 | VecALUOp::Fdiv
2236 | VecALUOp::Fmax
2237 | VecALUOp::Fmin
2238 | VecALUOp::Fmul => true,
2239 _ => false,
2240 };
2241 let enc_float_size = match (is_float, size) {
2242 (true, VectorSize::Size32x2) => 0b0,
2243 (true, VectorSize::Size32x4) => 0b0,
2244 (true, VectorSize::Size64x2) => 0b1,
2245 (true, _) => unimplemented!(),
2246 _ => 0,
2247 };
2248
2249 let (top11, bit15_10) = match alu_op {
2250 VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2251 VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2252 VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2253 VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2254 VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2255 VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2256 VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2257 VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2258 VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2259 VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2260 VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2261 VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2262 // The following logical instructions operate on bytes, so are not encoded differently
2263 // for the different vector types.
2264 VecALUOp::And => (0b000_01110_00_1, 0b000111),
2265 VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2266 VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2267 VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2268 VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
2269 VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
2270 VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2271 VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2272 VecALUOp::Mul => {
2273 debug_assert_ne!(size, VectorSize::Size64x2);
2274 (0b000_01110_00_1 | enc_size << 1, 0b100111)
2275 }
2276 VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2277 VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2278 VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
2279 VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
2280 VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
2281 VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
2282 VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
2283 VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2284 VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2285 VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2286 VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2287 VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2288 VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2289 VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2290 VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2291 VecALUOp::Sqrdmulh => {
2292 debug_assert!(
2293 size.lane_size() == ScalarSize::Size16
2294 || size.lane_size() == ScalarSize::Size32
2295 );
2296
2297 (0b001_01110_00_1 | enc_size << 1, 0b101101)
2298 }
2299 };
2300 let top11 = if is_float {
2301 top11 | enc_float_size << 1
2302 } else {
2303 top11
2304 };
2305 sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2306 }
2307 &Inst::VecLoadReplicate { rd, rn, size } => {
2308 let (q, size) = size.enc_size();
2309
2310 let srcloc = state.cur_srcloc();
2311 if srcloc != SourceLoc::default() {
2312 // Register the offset at which the actual load instruction starts.
2313 sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
2314 }
2315
2316 sink.put4(enc_ldst_vec(q, size, rn, rd));
2317 }
2318 &Inst::VecCSel { rd, rn, rm, cond } => {
2319 /* Emit this:
2320 b.cond else
2321 mov rd, rm
2322 b out
2323 else:
2324 mov rd, rn
2325 out:
2326
2327 Note, we could do better in the cases where rd == rn or rd == rm.
2328 */
2329 let else_label = sink.get_label();
2330 let out_label = sink.get_label();
2331
2332 // b.cond else
2333 let br_else_offset = sink.cur_offset();
2334 sink.put4(enc_conditional_br(
2335 BranchTarget::Label(else_label),
2336 CondBrKind::Cond(cond),
2337 ));
2338 sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2339
2340 // mov rd, rm
2341 sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2342
2343 // b out
2344 let b_out_offset = sink.cur_offset();
2345 sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2346 sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2347 sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2348
2349 // else:
2350 sink.bind_label(else_label);
2351
2352 // mov rd, rn
2353 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2354
2355 // out:
2356 sink.bind_label(out_label);
2357 }
2358 &Inst::MovToNZCV { rn } => {
2359 sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2360 }
2361 &Inst::MovFromNZCV { rd } => {
2362 sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2363 }
2364 &Inst::Extend {
2365 rd,
2366 rn,
2367 signed: false,
2368 from_bits: 1,
2369 to_bits,
2370 } => {
2371 assert!(to_bits <= 64);
2372 // Reduce zero-extend-from-1-bit to:
2373 // - and rd, rn, #1
2374 // Note: This is special cased as UBFX may take more cycles
2375 // than AND on smaller cores.
2376 let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2377 Inst::AluRRImmLogic {
2378 alu_op: ALUOp::And32,
2379 rd,
2380 rn,
2381 imml,
2382 }
2383 .emit(sink, emit_info, state);
2384 }
2385 &Inst::Extend {
2386 rd,
2387 rn,
2388 signed: false,
2389 from_bits: 32,
2390 to_bits: 64,
2391 } => {
2392 let mov = Inst::Mov32 { rd, rm: rn };
2393 mov.emit(sink, emit_info, state);
2394 }
2395 &Inst::Extend {
2396 rd,
2397 rn,
2398 signed,
2399 from_bits,
2400 to_bits,
2401 } => {
2402 let (opc, size) = if signed {
2403 (0b00, OperandSize::from_bits(to_bits))
2404 } else {
2405 (0b10, OperandSize::Size32)
2406 };
2407 sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2408 }
2409 &Inst::Jump { ref dest } => {
2410 let off = sink.cur_offset();
2411 // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2412 if let Some(l) = dest.as_label() {
2413 sink.use_label_at_offset(off, l, LabelUse::Branch26);
2414 sink.add_uncond_branch(off, off + 4, l);
2415 }
2416 // Emit the jump itself.
2417 sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2418 }
2419 &Inst::Ret => {
2420 sink.put4(0xd65f03c0);
2421 }
2422 &Inst::EpiloguePlaceholder => {
2423 // Noop; this is just a placeholder for epilogues.
2424 }
2425 &Inst::Call { ref info } => {
2426 if let Some(s) = state.take_stack_map() {
2427 sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2428 }
2429 let loc = state.cur_srcloc();
2430 sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0);
2431 sink.put4(enc_jump26(0b100101, 0));
2432 if info.opcode.is_call() {
2433 sink.add_call_site(loc, info.opcode);
2434 }
2435 }
2436 &Inst::CallInd { ref info } => {
2437 if let Some(s) = state.take_stack_map() {
2438 sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2439 }
2440 sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
2441 let loc = state.cur_srcloc();
2442 if info.opcode.is_call() {
2443 sink.add_call_site(loc, info.opcode);
2444 }
2445 }
2446 &Inst::CondBr {
2447 taken,
2448 not_taken,
2449 kind,
2450 } => {
2451 // Conditional part first.
2452 let cond_off = sink.cur_offset();
2453 if let Some(l) = taken.as_label() {
2454 sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
2455 let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
2456 sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
2457 }
2458 sink.put4(enc_conditional_br(taken, kind));
2459
2460 // Unconditional part next.
2461 let uncond_off = sink.cur_offset();
2462 if let Some(l) = not_taken.as_label() {
2463 sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
2464 sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
2465 }
2466 sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
2467 }
2468 &Inst::TrapIf { kind, trap_code } => {
2469 // condbr KIND, LABEL
2470 let off = sink.cur_offset();
2471 let label = sink.get_label();
2472 sink.put4(enc_conditional_br(
2473 BranchTarget::Label(label),
2474 kind.invert(),
2475 ));
2476 sink.use_label_at_offset(off, label, LabelUse::Branch19);
2477 // udf
2478 let trap = Inst::Udf { trap_code };
2479 trap.emit(sink, emit_info, state);
2480 // LABEL:
2481 sink.bind_label(label);
2482 }
2483 &Inst::IndirectBr { rn, .. } => {
2484 sink.put4(enc_br(rn));
2485 }
2486 &Inst::Nop0 => {}
2487 &Inst::Nop4 => {
2488 sink.put4(0xd503201f);
2489 }
2490 &Inst::Brk => {
2491 sink.put4(0xd4200000);
2492 }
2493 &Inst::Udf { trap_code } => {
2494 let srcloc = state.cur_srcloc();
2495 sink.add_trap(srcloc, trap_code);
2496 if let Some(s) = state.take_stack_map() {
2497 sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2498 }
2499 sink.put4(0xd4a00000);
2500 }
2501 &Inst::Adr { rd, off } => {
2502 assert!(off > -(1 << 20));
2503 assert!(off < (1 << 20));
2504 sink.put4(enc_adr(off, rd));
2505 }
2506 &Inst::Word4 { data } => {
2507 sink.put4(data);
2508 }
2509 &Inst::Word8 { data } => {
2510 sink.put8(data);
2511 }
2512 &Inst::JTSequence {
2513 ridx,
2514 rtmp1,
2515 rtmp2,
2516 ref info,
2517 ..
2518 } => {
2519 // This sequence is *one* instruction in the vcode, and is expanded only here at
2520 // emission time, because we cannot allow the regalloc to insert spills/reloads in
2521 // the middle; we depend on hardcoded PC-rel addressing below.
2522
2523 // Branch to default when condition code from prior comparison indicates.
2524 let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
2525 // No need to inform the sink's branch folding logic about this branch, because it
2526 // will not be merged with any other branch, flipped, or elided (it is not preceded
2527 // or succeeded by any other branch). Just emit it with the label use.
2528 let default_br_offset = sink.cur_offset();
2529 if let BranchTarget::Label(l) = info.default_target {
2530 sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
2531 }
2532 sink.put4(br);
2533
2534 // Save index in a tmp (the live range of ridx only goes to start of this
2535 // sequence; rtmp1 or rtmp2 may overwrite it).
2536 let inst = Inst::gen_move(rtmp2, ridx, I64);
2537 inst.emit(sink, emit_info, state);
2538 // Load address of jump table
2539 let inst = Inst::Adr { rd: rtmp1, off: 16 };
2540 inst.emit(sink, emit_info, state);
2541 // Load value out of jump table
2542 let inst = Inst::SLoad32 {
2543 rd: rtmp2,
2544 mem: AMode::reg_plus_reg_scaled_extended(
2545 rtmp1.to_reg(),
2546 rtmp2.to_reg(),
2547 I32,
2548 ExtendOp::UXTW,
2549 ),
2550 flags: MemFlags::trusted(),
2551 };
2552 inst.emit(sink, emit_info, state);
2553 // Add base of jump table to jump-table-sourced block offset
2554 let inst = Inst::AluRRR {
2555 alu_op: ALUOp::Add64,
2556 rd: rtmp1,
2557 rn: rtmp1.to_reg(),
2558 rm: rtmp2.to_reg(),
2559 };
2560 inst.emit(sink, emit_info, state);
2561 // Branch to computed address. (`targets` here is only used for successor queries
2562 // and is not needed for emission.)
2563 let inst = Inst::IndirectBr {
2564 rn: rtmp1.to_reg(),
2565 targets: vec![],
2566 };
2567 inst.emit(sink, emit_info, state);
2568 // Emit jump table (table of 32-bit offsets).
2569 let jt_off = sink.cur_offset();
2570 for &target in info.targets.iter() {
2571 let word_off = sink.cur_offset();
2572 // off_into_table is an addend here embedded in the label to be later patched
2573 // at the end of codegen. The offset is initially relative to this jump table
2574 // entry; with the extra addend, it'll be relative to the jump table's start,
2575 // after patching.
2576 let off_into_table = word_off - jt_off;
2577 sink.use_label_at_offset(
2578 word_off,
2579 target.as_label().unwrap(),
2580 LabelUse::PCRel32,
2581 );
2582 sink.put4(off_into_table);
2583 }
2584
2585 // Lowering produces an EmitIsland before using a JTSequence, so we can safely
2586 // disable the worst-case-size check in this case.
2587 start_off = sink.cur_offset();
2588 }
2589 &Inst::LoadExtName {
2590 rd,
2591 ref name,
2592 offset,
2593 } => {
2594 let inst = Inst::ULoad64 {
2595 rd,
2596 mem: AMode::Label(MemLabel::PCRel(8)),
2597 flags: MemFlags::trusted(),
2598 };
2599 inst.emit(sink, emit_info, state);
2600 let inst = Inst::Jump {
2601 dest: BranchTarget::ResolvedOffset(12),
2602 };
2603 inst.emit(sink, emit_info, state);
2604 let srcloc = state.cur_srcloc();
2605 sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
2606 if emit_info.flags().emit_all_ones_funcaddrs() {
2607 sink.put8(u64::max_value());
2608 } else {
2609 sink.put8(0);
2610 }
2611 }
2612 &Inst::LoadAddr { rd, ref mem } => {
2613 let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
2614 for inst in mem_insts.into_iter() {
2615 inst.emit(sink, emit_info, state);
2616 }
2617
2618 let (reg, index_reg, offset) = match mem {
2619 AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
2620 AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
2621 AMode::UnsignedOffset(r, uimm12scaled) => {
2622 (r, None, uimm12scaled.value() as i32)
2623 }
2624 _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
2625 };
2626 let abs_offset = if offset < 0 {
2627 -offset as u64
2628 } else {
2629 offset as u64
2630 };
2631 let alu_op = if offset < 0 {
2632 ALUOp::Sub64
2633 } else {
2634 ALUOp::Add64
2635 };
2636
2637 if let Some((idx, extendop)) = index_reg {
2638 let add = Inst::AluRRRExtend {
2639 alu_op: ALUOp::Add64,
2640 rd,
2641 rn: reg,
2642 rm: idx,
2643 extendop,
2644 };
2645
2646 add.emit(sink, emit_info, state);
2647 } else if offset == 0 {
2648 if reg != rd.to_reg() {
2649 let mov = Inst::Mov64 { rd, rm: reg };
2650
2651 mov.emit(sink, emit_info, state);
2652 }
2653 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2654 let add = Inst::AluRRImm12 {
2655 alu_op,
2656 rd,
2657 rn: reg,
2658 imm12,
2659 };
2660 add.emit(sink, emit_info, state);
2661 } else {
2662 // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
2663 // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
2664 // that no other instructions will be inserted here (we're emitting directly),
2665 // and a live range of `tmp2` should not span this instruction, so this use
2666 // should otherwise be correct.
2667 debug_assert!(rd.to_reg() != tmp2_reg());
2668 debug_assert!(reg != tmp2_reg());
2669 let tmp = writable_tmp2_reg();
2670 for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
2671 insn.emit(sink, emit_info, state);
2672 }
2673 let add = Inst::AluRRR {
2674 alu_op,
2675 rd,
2676 rn: reg,
2677 rm: tmp.to_reg(),
2678 };
2679 add.emit(sink, emit_info, state);
2680 }
2681 }
2682 &Inst::VirtualSPOffsetAdj { offset } => {
2683 log::trace!(
2684 "virtual sp offset adjusted by {} -> {}",
2685 offset,
2686 state.virtual_sp_offset + offset,
2687 );
2688 state.virtual_sp_offset += offset;
2689 }
2690 &Inst::EmitIsland { needed_space } => {
2691 if sink.island_needed(needed_space + 4) {
2692 let jump_around_label = sink.get_label();
2693 let jmp = Inst::Jump {
2694 dest: BranchTarget::Label(jump_around_label),
2695 };
2696 jmp.emit(sink, emit_info, state);
2697 sink.emit_island();
2698 sink.bind_label(jump_around_label);
2699 }
2700 }
2701
2702 &Inst::ElfTlsGetAddr { ref symbol } => {
2703 // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
2704 // See: https://gcc.godbolt.org/z/KhMh5Gvra
2705
2706 // adrp x0, <label>
2707 sink.add_reloc(state.cur_srcloc(), Reloc::Aarch64TlsGdAdrPage21, symbol, 0);
2708 sink.put4(0x90000000);
2709
2710 // add x0, x0, <label>
2711 sink.add_reloc(state.cur_srcloc(), Reloc::Aarch64TlsGdAddLo12Nc, symbol, 0);
2712 sink.put4(0x91000000);
2713
2714 // bl __tls_get_addr
2715 sink.add_reloc(
2716 state.cur_srcloc(),
2717 Reloc::Arm64Call,
2718 &ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2719 0,
2720 );
2721 sink.put4(0x94000000);
2722
2723 // nop
2724 sink.put4(0xd503201f);
2725 }
2726
2727 &Inst::ValueLabelMarker { .. } => {
2728 // Nothing; this is only used to compute debug info.
2729 }
2730
2731 &Inst::Unwind { ref inst } => {
2732 sink.add_unwind(inst.clone());
2733 }
2734 }
2735
2736 let end_off = sink.cur_offset();
2737 debug_assert!((end_off - start_off) <= Inst::worst_case_size());
2738
2739 state.clear_post_insn();
2740 }
2741
pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String2742 fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
2743 self.print_with_state(mb_rru, state)
2744 }
2745 }
2746